diff options
118 files changed, 38931 insertions, 2324 deletions
@@ -1611,8 +1611,10 @@ E: greg@kroah.com W: http://www.kroah.com/linux-usb/ D: USB Serial Converter driver framework, USB Handspring Visor driver D: ConnectTech WHITEHeat USB driver, Generic USB Serial driver -D: USB Bluetooth driver +D: USB I/O Edgeport driver, USB Serial IrDA driver +D: USB Bluetooth driver, USB Skeleton driver D: bits and pieces of USB core code. +D: PCI Hotplug core, PCI Hotplug Compaq driver modifications N: Russell Kroll E: rkroll@exploits.org diff --git a/MAINTAINERS b/MAINTAINERS index 741d225f624a..dcfcccc606a4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -764,6 +764,13 @@ P: Tigran Aivazian M: tigran@veritas.com S: Maintained +INTERMEZZO FILE SYSTEM +P: Peter J. Braam +M: braam@clusterfs.com +W: http://www.inter-mezzo.org/ +L: intermezzo-discuss@lists.sourceforge.net +S: Maintained + IP MASQUERADING: P: Juanjo Ciarlante M: jjciarla@raiz.uncu.edu.ar @@ -1161,6 +1168,17 @@ M: mj@ucw.cz L: linux-kernel@vger.kernel.org S: Odd Fixes +PCI HOTPLUG CORE +P: Greg Kroah-Hartman +M: greg@kroah.com +M: gregkh@us.ibm.com +S: Supported + +PCI HOTPLUG COMPAQ DRIVER +P: Greg Kroah-Hartman +M: greg@kroah.com +S: Maintained + PCMCIA SUBSYSTEM P: David Hinds M: dhinds@zen.stanford.edu @@ -1188,6 +1206,11 @@ M: paulus@samba.org L: linux-ppp@vger.kernel.org S: Maintained +PPP OVER ATM (RFC 2364) +P: Mitchell Blank Jr +M: mitch@sfgoth.com +S: Maintained + PPP OVER ETHERNET P: Michal Ostrowski M: mostrows@styx.uwaterloo.ca @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 15 -EXTRAVERSION =-pre2 +EXTRAVERSION =-pre3 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 9dfa12f38e5c..0a0026a790ee 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -315,6 +315,7 @@ CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_NCR53C406A is not set # CONFIG_SCSI_NCR53C7xx is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_NCR53C8XX is not set CONFIG_SCSI_SYM53C8XX=y CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=4 diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 3f95d10b00d8..227aa9187eba 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -12,7 +12,6 @@ unsigned long dmi_broken; int is_sony_vaio_laptop; -int enable_acpi_smp_table; struct dmi_header { diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index ad4adf1a8dba..17d091d639cc 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -112,6 +112,11 @@ EXPORT_SYMBOL(pcibios_penalize_isa_irq); EXPORT_SYMBOL(pci_mem_start); #endif +#ifdef CONFIG_PCI_BIOS +EXPORT_SYMBOL(pcibios_set_irq_routing); +EXPORT_SYMBOL(pcibios_get_irq_routing_table); +#endif + #ifdef CONFIG_X86_USE_3DNOW EXPORT_SYMBOL(_mmx_memcpy); EXPORT_SYMBOL(mmx_clear_page); diff --git a/arch/i386/kernel/pci-i386.h b/arch/i386/kernel/pci-i386.h index a78a051b1f3c..2c821af08fde 100644 --- a/arch/i386/kernel/pci-i386.h +++ b/arch/i386/kernel/pci-i386.h @@ -37,9 +37,6 @@ extern int pcibios_last_bus; extern struct pci_bus *pci_root_bus; extern struct pci_ops *pci_root_ops; -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - /* pci-irq.c */ struct irq_info { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index b2b93629b0c0..92502849a51f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -67,6 +67,10 @@ * * AMD Athlon/Duron/Thunderbird bluesmoke support. * Dave Jones <davej@suse.de>, April 2001. + * + * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix. + * Dave Jones <davej@suse.de>, September, October 2001. + * */ /* @@ -156,6 +160,8 @@ extern char _text, _etext, _edata, _end; static int disable_x86_serial_nr __initdata = 1; static int disable_x86_fxsr __initdata = 0; +int enable_acpi_smp_table; + /* * This is set up by the setup-routine at boot-time */ @@ -753,6 +759,10 @@ static void __init parse_mem_cmdline (char ** cmdline_p) add_memory_region(start_at, mem_size, E820_RAM); } } + /* acpismp=force forces parsing and use of the ACPI SMP table */ + if (c == ' ' && !memcmp(from, "acpismp=force", 13)) + enable_acpi_smp_table = 1; + c = *(from++); if (!c) break; @@ -1035,6 +1045,15 @@ void __init setup_arch(char **cmdline_p) #endif } +static int cachesize_override __initdata = -1; +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + + #ifndef CONFIG_X86_TSC static int tsc_disable __initdata = 0; @@ -1105,12 +1124,25 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c) l2size = 256; } + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && + (c->x86_model == 11) && (l2size == 0)) + l2size = 256; + /* VIA C3 CPUs (670-68F) need further shifting. */ if (c->x86_vendor == X86_VENDOR_CENTAUR && (c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) { l2size = l2size >> 8; } + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + if ( l2size == 0 ) return; /* Again, no L2 cache is possible */ @@ -2301,14 +2333,14 @@ static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } -int __init x86_serial_nr_setup(char *s) +static int __init x86_serial_nr_setup(char *s) { disable_x86_serial_nr = 0; return 1; } __setup("serialnumber", x86_serial_nr_setup); -int __init x86_fxsr_setup(char * s) +static int __init x86_fxsr_setup(char * s) { disable_x86_fxsr = 1; return 1; @@ -2403,7 +2435,6 @@ static int __init id_and_try_enable_cpuid(struct cpuinfo_x86 *c) { unsigned char ccr3, ccr4; unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); @@ -2835,6 +2866,53 @@ void __init cpu_init (void) } /* + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work + */ + +int __init ppro_with_ram_bug(void) +{ + char vendor_id[16]; + int ident; + + /* Must have CPUID */ + if(!have_cpuid_p()) + return 0; + if(cpuid_eax(0)<1) + return 0; + + /* Must be Intel */ + cpuid(0, &ident, + (int *)&vendor_id[0], + (int *)&vendor_id[8], + (int *)&vendor_id[4]); + + if(memcmp(vendor_id, "IntelInside", 12)) + return 0; + + ident = cpuid_eax(1); + + /* Model 6 */ + + if(((ident>>8)&15)!=6) + return 0; + + /* Pentium Pro */ + + if(((ident>>4)&15)!=1) + return 0; + + if((ident&15) < 8) + { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } + printk(KERN_INFO "Your Pentium Pro seems ok.\n"); + return 0; +} + +/* * Local Variables: * mode:c * c-file-style:"k&r" diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a66a15e47744..b4d807436ec5 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -519,6 +519,7 @@ unsigned long get_cmos_time(void) unsigned int year, mon, day, hour, min, sec; int i; + spin_lock(&rtc_lock); /* The Linux interpretation of the CMOS clock register contents: * When the Update-In-Progress (UIP) flag goes from 1 to 0, the * RTC registers show the second which has precisely just started. @@ -548,6 +549,7 @@ unsigned long get_cmos_time(void) BCD_TO_BIN(mon); BCD_TO_BIN(year); } + spin_unlock(&rtc_lock); if ((year += 1900) < 1970) year += 100; return mktime(year, mon, day, hour, min, sec); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 95e063f0fd5d..f0c3b00c0a51 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -439,13 +439,24 @@ static inline int page_is_ram (unsigned long pagenr) return 0; } +static inline int page_kills_ppro(unsigned long pagenr) +{ + if(pagenr >= 0x70000 && pagenr <= 0x7003F) + return 1; + return 0; +} + void __init mem_init(void) { + extern int ppro_with_ram_bug(void); int codesize, reservedpages, datasize, initsize; int tmp; + int bad_ppro; if (!mem_map) BUG(); + + bad_ppro = ppro_with_ram_bug(); #ifdef CONFIG_HIGHMEM highmem_start_page = mem_map + highstart_pfn; @@ -476,6 +487,11 @@ void __init mem_init(void) SetPageReserved(page); continue; } + if (bad_ppro && page_kills_ppro(tmp)) + { + SetPageReserved(page); + continue; + } ClearPageReserved(page); set_bit(PG_highmem, &page->flags); atomic_set(&page->count, 1); diff --git a/arch/sparc64/config.in b/arch/sparc64/config.in index 7fcb4fc0db3b..9f33017e721b 100644 --- a/arch/sparc64/config.in +++ b/arch/sparc64/config.in @@ -211,7 +211,9 @@ endmenu source drivers/fc4/Config.in -source drivers/message/fusion/Config.in +if [ "$CONFIG_PCI" = "y" ]; then + source drivers/message/fusion/Config.in +fi source drivers/ieee1394/Config.in diff --git a/drivers/Makefile b/drivers/Makefile index c2679823fd2d..ca56bf34ba3c 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -10,7 +10,7 @@ mod-subdirs := dio mtd sbus video macintosh usb input telephony sgi ide \ message/i2o message/fusion scsi md ieee1394 pnp isdn atm \ fc4 net/hamradio i2c acpi bluetooth -subdir-y := parport char block net sound misc media cdrom +subdir-y := parport char block net sound misc media cdrom hotplug subdir-m := $(subdir-y) @@ -48,4 +48,3 @@ subdir-$(CONFIG_ACPI) += acpi subdir-$(CONFIG_BLUEZ) += bluetooth include $(TOPDIR)/Rules.make - diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c index bc954ca04ab8..8ffecbb3d6d4 100644 --- a/drivers/block/blkpg.c +++ b/drivers/block/blkpg.c @@ -195,8 +195,13 @@ int blkpg_ioctl(kdev_t dev, struct blkpg_ioctl_arg *arg) int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) { + struct gendisk *g; + u64 ullval = 0; int intval; + if (!dev) + return -EINVAL; + switch (cmd) { case BLKROSET: if (!capable(CAP_SYS_ADMIN)) @@ -212,7 +217,7 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) case BLKRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if(!dev || arg > 0xff) + if(arg > 0xff) return -EINVAL; read_ahead[MAJOR(dev)] = arg; return 0; @@ -224,8 +229,6 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) case BLKFLSBUF: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!dev) - return -EINVAL; fsync_dev(dev); invalidate_buffers(dev); return 0; @@ -235,18 +238,16 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) intval = get_hardsect_size(dev); return put_user(intval, (int *) arg); -#if 0 case BLKGETSIZE: - /* Today get_gendisk() requires a linear scan; - add this when dev has pointer type. */ - /* add BLKGETSIZE64 too */ + case BLKGETSIZE64: g = get_gendisk(dev); - if (!g) - ulongval = 0; + if (g) + ullval = g->part[MINOR(dev)].nr_sects; + + if (cmd == BLKGETSIZE) + return put_user((unsigned long)ullval, (unsigned long *)arg); else - ulongval = g->part[MINOR(dev)].nr_sects; - return put_user(ulongval, (unsigned long *) arg); -#endif + return put_user(ullval, (u64 *)arg); #if 0 case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/char/Makefile b/drivers/char/Makefile index c0d7506a93f4..ddffec9a0282 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -126,6 +126,7 @@ endif obj-$(CONFIG_VT) += vt.o vc_screen.o consolemap.o consolemap_deftbl.o $(CONSOLE) selection.o obj-$(CONFIG_SERIAL) += $(SERIAL) +obj-$(CONFIG_SERIAL_ACPI) += acpi_serial.o obj-$(CONFIG_SERIAL_21285) += serial_21285.o obj-$(CONFIG_SERIAL_SA1100) += serial_sa1100.o obj-$(CONFIG_SERIAL_AMBA) += serial_amba.o @@ -231,6 +232,7 @@ obj-$(CONFIG_977_WATCHDOG) += wdt977.o obj-$(CONFIG_I810_TCO) += i810-tco.o obj-$(CONFIG_MACHZ_WDT) += machzwd.o obj-$(CONFIG_SH_WDT) += shwdt.o +obj-$(CONFIG_EUROTECH_WDT) += eurotechwdt.o obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o subdir-$(CONFIG_MWAVE) += mwave diff --git a/drivers/char/drm/ati_pcigart.h b/drivers/char/drm/ati_pcigart.h index 8b486c10abf1..5851b72f6529 100644 --- a/drivers/char/drm/ati_pcigart.h +++ b/drivers/char/drm/ati_pcigart.h @@ -36,7 +36,7 @@ #elif PAGE_SIZE == 4096 # define ATI_PCIGART_TABLE_ORDER 3 # define ATI_PCIGART_TABLE_PAGES (1 << 3) -#elif +#else # error - PAGE_SIZE not 8K or 4K #endif @@ -57,7 +57,7 @@ static unsigned long DRM(ati_alloc_pcigart_table)( void ) page = virt_to_page( address ); - for ( i = 0 ; i <= ATI_PCIGART_TABLE_PAGES ; i++, page++ ) { + for ( i = 0 ; i < ATI_PCIGART_TABLE_PAGES ; i++, page++ ) { atomic_inc( &page->count ); SetPageReserved( page ); } @@ -74,7 +74,7 @@ static void DRM(ati_free_pcigart_table)( unsigned long address ) page = virt_to_page( address ); - for ( i = 0 ; i <= ATI_PCIGART_TABLE_PAGES ; i++, page++ ) { + for ( i = 0 ; i < ATI_PCIGART_TABLE_PAGES ; i++, page++ ) { atomic_dec( &page->count ); ClearPageReserved( page ); } diff --git a/drivers/ide/pdcraid.c b/drivers/ide/pdcraid.c index f0b784128e3b..0c98b7b1d27b 100644 --- a/drivers/ide/pdcraid.c +++ b/drivers/ide/pdcraid.c @@ -12,9 +12,7 @@ Authors: Arjan van de Ven <arjanv@redhat.com> - - - + Based on work done by Søren Schmidt for FreeBSD */ @@ -54,6 +52,12 @@ static struct disk_dev devlist[]= { {IDE2_MAJOR, 64, -1 }, {IDE3_MAJOR, 0, -1 }, {IDE3_MAJOR, 64, -1 }, + {IDE4_MAJOR, 0, -1 }, + {IDE4_MAJOR, 64, -1 }, + {IDE5_MAJOR, 0, -1 }, + {IDE5_MAJOR, 64, -1 }, + {IDE6_MAJOR, 0, -1 }, + {IDE6_MAJOR, 64, -1 }, }; @@ -550,14 +554,8 @@ static __init int pdcraid_init_one(int device,int raidlevel) request_queue_t *q; int i,count; - probedisk(0, device, raidlevel); - probedisk(1, device, raidlevel); - probedisk(2, device, raidlevel); - probedisk(3, device, raidlevel); - probedisk(4, device, raidlevel); - probedisk(5, device, raidlevel); - probedisk(6, device, raidlevel); - probedisk(7, device, raidlevel); + for (i=0; i<14; i++) + probedisk(i, device, raidlevel); if (raidlevel==0) fill_cutoff(device); diff --git a/drivers/ieee1394/ieee1394_syms.c b/drivers/ieee1394/ieee1394_syms.c index 4b9324dd9cec..af0b2bc9704b 100644 --- a/drivers/ieee1394/ieee1394_syms.c +++ b/drivers/ieee1394/ieee1394_syms.c @@ -84,4 +84,5 @@ EXPORT_SYMBOL(hpsb_guid_fill_packet); EXPORT_SYMBOL(hpsb_register_protocol); EXPORT_SYMBOL(hpsb_unregister_protocol); EXPORT_SYMBOL(hpsb_release_unit_directory); + MODULE_LICENSE("GPL"); diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c index 815176c8fc1f..1038837ca567 100644 --- a/drivers/ieee1394/pcilynx.c +++ b/drivers/ieee1394/pcilynx.c @@ -1637,8 +1637,8 @@ MODULE_DEVICE_TABLE(pci, pci_table); static void __exit pcilynx_cleanup(void) { + pci_unregister_driver(&lynx_pcidriver); hpsb_unregister_lowlevel(&lynx_template); - pci_unregister_driver(&lynx_pcidriver); PRINT_G(KERN_INFO, "removed " PCILYNX_DRIVER_NAME " module"); } diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 732844b8902c..66541f522318 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -6,7 +6,7 @@ O_TARGET := mddev.o export-objs := md.o xor.o list-multi := lvm-mod.o -lvm-mod-objs := lvm.o lvm-snap.o +lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o # Note: link order is important. All raid personalities # and xor.o must come before md.o, as they each initialise diff --git a/drivers/md/lvm-fs.c b/drivers/md/lvm-fs.c new file mode 100644 index 000000000000..cf1f8d08a127 --- /dev/null +++ b/drivers/md/lvm-fs.c @@ -0,0 +1,623 @@ +/* + * kernel/lvm-fs.c + * + * Copyright (C) 2001 Sistina Software + * + * January,February 2001 + * + * LVM driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * LVM driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* + * Changelog + * + * 11/01/2001 - First version (Joe Thornber) + * 21/03/2001 - added display of stripes and stripe size (HM) + * 04/10/2001 - corrected devfs_register() call in lvm_init_fs() + * 11/04/2001 - don't devfs_register("lvm") as user-space always does it + * 10/05/2001 - show more of PV name in /proc/lvm/global + * + */ + +#include <linux/config.h> +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/smp_lock.h> + +#include <linux/devfs_fs_kernel.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/lvm.h> + +#include "lvm-internal.h" + + +static int _proc_read_vg(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int _proc_read_lv(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int _proc_read_pv(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int _proc_read_global(char *page, char **start, off_t off, + int count, int *eof, void *data); + +static int _vg_info(vg_t *vg_ptr, char *buf); +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf); +static int _pv_info(pv_t *pv_ptr, char *buf); + +static void _show_uuid(const char *src, char *b, char *e); + +#if 0 +static devfs_handle_t lvm_devfs_handle; +#endif +static devfs_handle_t vg_devfs_handle[MAX_VG]; +static devfs_handle_t ch_devfs_handle[MAX_VG]; +static devfs_handle_t lv_devfs_handle[MAX_LV]; + +static struct proc_dir_entry *lvm_proc_dir = NULL; +static struct proc_dir_entry *lvm_proc_vg_subdir = NULL; + +/* inline functions */ + +/* public interface */ +void __init lvm_init_fs() { + struct proc_dir_entry *pde; + +/* User-space has already registered this */ +#if 0 + lvm_devfs_handle = devfs_register( + 0 , "lvm", 0, LVM_CHAR_MAJOR, 0, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_chr_fops, NULL); +#endif + + lvm_proc_dir = create_proc_entry(LVM_DIR, S_IFDIR, &proc_root); + if (lvm_proc_dir) { + lvm_proc_vg_subdir = create_proc_entry(LVM_VG_SUBDIR, S_IFDIR, + lvm_proc_dir); + pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); + if ( pde != NULL) pde->read_proc = _proc_read_global; + } +} + +void lvm_fin_fs() { +#if 0 + devfs_unregister (lvm_devfs_handle); +#endif + + remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); + remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); + remove_proc_entry(LVM_DIR, &proc_root); +} + +void lvm_fs_create_vg(vg_t *vg_ptr) { + struct proc_dir_entry *pde; + + vg_devfs_handle[vg_ptr->vg_number] = + devfs_mk_dir(0, vg_ptr->vg_name, NULL); + + ch_devfs_handle[vg_ptr->vg_number] = devfs_register( + vg_devfs_handle[vg_ptr->vg_number] , "group", + DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_chr_fops, NULL); + + vg_ptr->vg_dir_pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR, + lvm_proc_vg_subdir); + + if((pde = create_proc_entry("group", S_IFREG, vg_ptr->vg_dir_pde))) { + pde->read_proc = _proc_read_vg; + pde->data = vg_ptr; + } + + vg_ptr->lv_subdir_pde = + create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde); + + vg_ptr->pv_subdir_pde = + create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde); +} + +void lvm_fs_remove_vg(vg_t *vg_ptr) { + int i; + + devfs_unregister(ch_devfs_handle[vg_ptr->vg_number]); + devfs_unregister(vg_devfs_handle[vg_ptr->vg_number]); + + /* remove lv's */ + for(i = 0; i < vg_ptr->lv_max; i++) + if(vg_ptr->lv[i]) lvm_fs_remove_lv(vg_ptr, vg_ptr->lv[i]); + + /* remove pv's */ + for(i = 0; i < vg_ptr->pv_max; i++) + if(vg_ptr->pv[i]) lvm_fs_remove_pv(vg_ptr, vg_ptr->pv[i]); + + if(vg_ptr->vg_dir_pde) { + remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde); + vg_ptr->lv_subdir_pde = NULL; + + remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde); + vg_ptr->pv_subdir_pde = NULL; + + remove_proc_entry("group", vg_ptr->vg_dir_pde); + vg_ptr->vg_dir_pde = NULL; + + remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir); + } +} + + +static inline const char *_basename(const char *str) { + const char *name = strrchr(str, '/'); + name = name ? name + 1 : str; + return name; +} + +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) { + struct proc_dir_entry *pde; + const char *name = _basename(lv->lv_name); + + lv_devfs_handle[MINOR(lv->lv_dev)] = devfs_register( + vg_devfs_handle[vg_ptr->vg_number], name, + DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, MINOR(lv->lv_dev), + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_blk_dops, NULL); + + if(vg_ptr->lv_subdir_pde && + (pde = create_proc_entry(name, S_IFREG, vg_ptr->lv_subdir_pde))) { + pde->read_proc = _proc_read_lv; + pde->data = lv; + } + return lv_devfs_handle[MINOR(lv->lv_dev)]; +} + +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) { + devfs_unregister(lv_devfs_handle[MINOR(lv->lv_dev)]); + + if(vg_ptr->lv_subdir_pde) { + const char *name = _basename(lv->lv_name); + remove_proc_entry(name, vg_ptr->lv_subdir_pde); + } +} + + +static inline void _make_pv_name(const char *src, char *b, char *e) { + int offset = strlen(LVM_DIR_PREFIX); + if(strncmp(src, LVM_DIR_PREFIX, offset)) + offset = 0; + + e--; + src += offset; + while(*src && (b != e)) { + *b++ = (*src == '/') ? '_' : *src; + src++; + } + *b = '\0'; +} + +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv) { + struct proc_dir_entry *pde; + char name[NAME_LEN]; + + if(!vg_ptr->pv_subdir_pde) + return; + + _make_pv_name(pv->pv_name, name, name + sizeof(name)); + if((pde = create_proc_entry(name, S_IFREG, vg_ptr->pv_subdir_pde))) { + pde->read_proc = _proc_read_pv; + pde->data = pv; + } +} + +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv) { + char name[NAME_LEN]; + + if(!vg_ptr->pv_subdir_pde) + return; + + _make_pv_name(pv->pv_name, name, name + sizeof(name)); + remove_proc_entry(name, vg_ptr->pv_subdir_pde); +} + + +static int _proc_read_vg(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + vg_t *vg_ptr = data; + char uuid[NAME_LEN]; + + sz += sprintf(page + sz, "name: %s\n", vg_ptr->vg_name); + sz += sprintf(page + sz, "size: %u\n", + vg_ptr->pe_total * vg_ptr->pe_size / 2); + sz += sprintf(page + sz, "access: %u\n", vg_ptr->vg_access); + sz += sprintf(page + sz, "status: %u\n", vg_ptr->vg_status); + sz += sprintf(page + sz, "number: %u\n", vg_ptr->vg_number); + sz += sprintf(page + sz, "LV max: %u\n", vg_ptr->lv_max); + sz += sprintf(page + sz, "LV current: %u\n", vg_ptr->lv_cur); + sz += sprintf(page + sz, "LV open: %u\n", vg_ptr->lv_open); + sz += sprintf(page + sz, "PV max: %u\n", vg_ptr->pv_max); + sz += sprintf(page + sz, "PV current: %u\n", vg_ptr->pv_cur); + sz += sprintf(page + sz, "PV active: %u\n", vg_ptr->pv_act); + sz += sprintf(page + sz, "PE size: %u\n", vg_ptr->pe_size / 2); + sz += sprintf(page + sz, "PE total: %u\n", vg_ptr->pe_total); + sz += sprintf(page + sz, "PE allocated: %u\n", vg_ptr->pe_allocated); + + _show_uuid(vg_ptr->vg_uuid, uuid, uuid + sizeof(uuid)); + sz += sprintf(page + sz, "uuid: %s\n", uuid); + + return sz; +} + +static int _proc_read_lv(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + lv_t *lv = data; + + sz += sprintf(page + sz, "name: %s\n", lv->lv_name); + sz += sprintf(page + sz, "size: %u\n", lv->lv_size); + sz += sprintf(page + sz, "access: %u\n", lv->lv_access); + sz += sprintf(page + sz, "status: %u\n", lv->lv_status); + sz += sprintf(page + sz, "number: %u\n", lv->lv_number); + sz += sprintf(page + sz, "open: %u\n", lv->lv_open); + sz += sprintf(page + sz, "allocation: %u\n", lv->lv_allocation); + if(lv->lv_stripes > 1) { + sz += sprintf(page + sz, "stripes: %u\n", + lv->lv_stripes); + sz += sprintf(page + sz, "stripesize: %u\n", + lv->lv_stripesize); + } + sz += sprintf(page + sz, "device: %02u:%02u\n", + MAJOR(lv->lv_dev), MINOR(lv->lv_dev)); + + return sz; +} + +static int _proc_read_pv(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + pv_t *pv = data; + char uuid[NAME_LEN]; + + sz += sprintf(page + sz, "name: %s\n", pv->pv_name); + sz += sprintf(page + sz, "size: %u\n", pv->pv_size); + sz += sprintf(page + sz, "status: %u\n", pv->pv_status); + sz += sprintf(page + sz, "number: %u\n", pv->pv_number); + sz += sprintf(page + sz, "allocatable: %u\n", pv->pv_allocatable); + sz += sprintf(page + sz, "LV current: %u\n", pv->lv_cur); + sz += sprintf(page + sz, "PE size: %u\n", pv->pe_size / 2); + sz += sprintf(page + sz, "PE total: %u\n", pv->pe_total); + sz += sprintf(page + sz, "PE allocated: %u\n", pv->pe_allocated); + sz += sprintf(page + sz, "device: %02u:%02u\n", + MAJOR(pv->pv_dev), MINOR(pv->pv_dev)); + + _show_uuid(pv->pv_uuid, uuid, uuid + sizeof(uuid)); + sz += sprintf(page + sz, "uuid: %s\n", uuid); + + return sz; +} + +static int _proc_read_global(char *page, char **start, off_t pos, int count, + int *eof, void *data) { + +#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) + + int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, + lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds; + static off_t sz; + off_t sz_last; + static char *buf = NULL; + static char dummy_buf[160]; /* sized for 2 lines */ + vg_t *vg_ptr; + lv_t *lv_ptr; + pv_t *pv_ptr; + + +#ifdef DEBUG_LVM_PROC_GET_INFO + printk(KERN_DEBUG + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d\n", + lvm_name, pos, count); +#endif + + if(pos != 0 && buf != NULL) + goto out; + + sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ + lv_open_total = pe_t_bytes = hash_table_bytes = \ + lv_block_exception_t_bytes = 0; + + /* get some statistics */ + for (v = 0; v < ABS_MAX_VG; v++) { + if ((vg_ptr = vg[v]) != NULL) { + vg_counter++; + pv_counter += vg_ptr->pv_cur; + lv_counter += vg_ptr->lv_cur; + if (vg_ptr->lv_cur > 0) { + for (l = 0; l < vg[v]->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL) { + pe_t_bytes += lv_ptr->lv_allocated_le; + hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size; + if (lv_ptr->lv_block_exception != NULL) + lv_block_exception_t_bytes += lv_ptr->lv_remap_end; + if (lv_ptr->lv_open > 0) { + lv_open_counter++; + lv_open_total += lv_ptr->lv_open; + } + } + } + } + } + } + + pe_t_bytes *= sizeof(pe_t); + lv_block_exception_t_bytes *= sizeof(lv_block_exception_t); + + if (buf != NULL) { + P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__); + lock_kernel(); + vfree(buf); + unlock_kernel(); + buf = NULL; + } + /* 2 times: first to get size to allocate buffer, + 2nd to fill the malloced buffer */ + for (i = 0; i < 2; i++) { + sz = 0; + sz += sprintf(LVM_PROC_BUF, + "LVM " +#ifdef MODULE + "module" +#else + "driver" +#endif + " %s\n\n" + "Total: %d VG%s %d PV%s %d LV%s ", + lvm_version, + vg_counter, vg_counter == 1 ? "" : "s", + pv_counter, pv_counter == 1 ? "" : "s", + lv_counter, lv_counter == 1 ? "" : "s"); + sz += sprintf(LVM_PROC_BUF, + "(%d LV%s open", + lv_open_counter, + lv_open_counter == 1 ? "" : "s"); + if (lv_open_total > 0) + sz += sprintf(LVM_PROC_BUF, + " %d times)\n", + lv_open_total); + else + sz += sprintf(LVM_PROC_BUF, ")"); + sz += sprintf(LVM_PROC_BUF, + "\nGlobal: %lu bytes malloced IOP version: %d ", + vg_counter * sizeof(vg_t) + + pv_counter * sizeof(pv_t) + + lv_counter * sizeof(lv_t) + + pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last, + lvm_iop_version); + + seconds = CURRENT_TIME - loadtime; + if (seconds < 0) + loadtime = CURRENT_TIME + seconds; + if (seconds / 86400 > 0) { + sz += sprintf(LVM_PROC_BUF, "%d day%s ", + seconds / 86400, + seconds / 86400 == 0 || + seconds / 86400 > 1 ? "s" : ""); + } + sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n", + (seconds % 86400) / 3600, + (seconds % 3600) / 60, + seconds % 60); + + if (vg_counter > 0) { + for (v = 0; v < ABS_MAX_VG; v++) { + /* volume group */ + if ((vg_ptr = vg[v]) != NULL) { + sz += _vg_info(vg_ptr, LVM_PROC_BUF); + + /* physical volumes */ + sz += sprintf(LVM_PROC_BUF, + "\n PV%s ", + vg_ptr->pv_cur == 1 ? ": " : "s:"); + c = 0; + for (p = 0; p < vg_ptr->pv_max; p++) { + if ((pv_ptr = vg_ptr->pv[p]) != NULL) { + sz += _pv_info(pv_ptr, LVM_PROC_BUF); + + c++; + if (c < vg_ptr->pv_cur) + sz += sprintf(LVM_PROC_BUF, + "\n "); + } + } + + /* logical volumes */ + sz += sprintf(LVM_PROC_BUF, + "\n LV%s ", + vg_ptr->lv_cur == 1 ? ": " : "s:"); + c = 0; + for (l = 0; l < vg_ptr->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL) { + sz += _lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF); + c++; + if (c < vg_ptr->lv_cur) + sz += sprintf(LVM_PROC_BUF, + "\n "); + } + } + if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none"); + sz += sprintf(LVM_PROC_BUF, "\n"); + } + } + } + if (buf == NULL) { + lock_kernel(); + buf = vmalloc(sz); + unlock_kernel(); + if (buf == NULL) { + sz = 0; + return sprintf(page, "%s - vmalloc error at line %d\n", + lvm_name, __LINE__); + } + } + sz_last = sz; + } + + out: + if (pos > sz - 1) { + lock_kernel(); + vfree(buf); + unlock_kernel(); + buf = NULL; + return 0; + } + *start = &buf[pos]; + if (sz - pos < count) + return sz - pos; + else + return count; + +#undef LVM_PROC_BUF +} + +/* + * provide VG info for proc filesystem use (global) + */ +static int _vg_info(vg_t *vg_ptr, char *buf) { + int sz = 0; + char inactive_flag = ' '; + + if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; + sz = sprintf(buf, + "\nVG: %c%s [%d PV, %d LV/%d open] " + " PE Size: %d KB\n" + " Usage [KB/PE]: %d /%d total " + "%d /%d used %d /%d free", + inactive_flag, + vg_ptr->vg_name, + vg_ptr->pv_cur, + vg_ptr->lv_cur, + vg_ptr->lv_open, + vg_ptr->pe_size >> 1, + vg_ptr->pe_size * vg_ptr->pe_total >> 1, + vg_ptr->pe_total, + vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, + vg_ptr->pe_allocated, + (vg_ptr->pe_total - vg_ptr->pe_allocated) * + vg_ptr->pe_size >> 1, + vg_ptr->pe_total - vg_ptr->pe_allocated); + return sz; +} + + +/* + * provide LV info for proc filesystem use (global) + */ +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' ', + stripes_flag = ' ', rw_flag = ' ', *basename; + + if (!(lv_ptr->lv_status & LV_ACTIVE)) + inactive_flag = 'I'; + rw_flag = 'R'; + if (lv_ptr->lv_access & LV_WRITE) + rw_flag = 'W'; + allocation_flag = 'D'; + if (lv_ptr->lv_allocation & LV_CONTIGUOUS) + allocation_flag = 'C'; + stripes_flag = 'L'; + if (lv_ptr->lv_stripes > 1) + stripes_flag = 'S'; + sz += sprintf(buf+sz, + "[%c%c%c%c", + inactive_flag, + rw_flag, + allocation_flag, + stripes_flag); + if (lv_ptr->lv_stripes > 1) + sz += sprintf(buf+sz, "%-2d", + lv_ptr->lv_stripes); + else + sz += sprintf(buf+sz, " "); + + /* FIXME: use _basename */ + basename = strrchr(lv_ptr->lv_name, '/'); + if ( basename == 0) basename = lv_ptr->lv_name; + else basename++; + sz += sprintf(buf+sz, "] %-25s", basename); + if (strlen(basename) > 25) + sz += sprintf(buf+sz, + "\n "); + sz += sprintf(buf+sz, "%9d /%-6d ", + lv_ptr->lv_size >> 1, + lv_ptr->lv_size / vg_ptr->pe_size); + + if (lv_ptr->lv_open == 0) + sz += sprintf(buf+sz, "close"); + else + sz += sprintf(buf+sz, "%dx open", + lv_ptr->lv_open); + + return sz; +} + + +/* + * provide PV info for proc filesystem use (global) + */ +static int _pv_info(pv_t *pv, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' '; + char *pv_name = NULL; + + if (!(pv->pv_status & PV_ACTIVE)) + inactive_flag = 'I'; + allocation_flag = 'A'; + if (!(pv->pv_allocatable & PV_ALLOCATABLE)) + allocation_flag = 'N'; + pv_name = strchr(pv->pv_name+1,'/'); + if ( pv_name == 0) pv_name = pv->pv_name; + else pv_name++; + sz = sprintf(buf, + "[%c%c] %-21s %8d /%-6d " + "%8d /%-6d %8d /%-6d", + inactive_flag, + allocation_flag, + pv_name, + pv->pe_total * pv->pe_size >> 1, + pv->pe_total, + pv->pe_allocated * pv->pe_size >> 1, + pv->pe_allocated, + (pv->pe_total - pv->pe_allocated) * + pv->pe_size >> 1, + pv->pe_total - pv->pe_allocated); + return sz; +} + +static void _show_uuid(const char *src, char *b, char *e) { + int i; + + e--; + for(i = 0; *src && (b != e); i++) { + if(i && !(i & 0x3)) + *b++ = '-'; + *b++ = *src++; + } + *b = '\0'; +} +MODULE_LICENSE("GPL"); diff --git a/drivers/md/lvm-internal.h b/drivers/md/lvm-internal.h new file mode 100644 index 000000000000..c13facf2328f --- /dev/null +++ b/drivers/md/lvm-internal.h @@ -0,0 +1,101 @@ +/* + * kernel/lvm-internal.h + * + * Copyright (C) 2001 Sistina Software + * + * + * LVM driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * LVM driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* + * Changelog + * + * 05/01/2001:Joe Thornber - Factored this file out of lvm.c + * + */ + +#ifndef LVM_INTERNAL_H +#define LVM_INTERNAL_H + +#include <linux/lvm.h> + +#define _LVM_INTERNAL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")" + +/* global variables, defined in lvm.c */ +extern char *lvm_version; +extern ushort lvm_iop_version; +extern int loadtime; +extern const char *const lvm_name; + + +extern vg_t *vg[]; +extern struct file_operations lvm_chr_fops; + +extern struct block_device_operations lvm_blk_dops; + + +/* debug macros */ +#ifdef DEBUG_IOCTL +#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args) +#else +#define P_IOCTL(fmt, args...) +#endif + +#ifdef DEBUG_MAP +#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args) +#else +#define P_MAP(fmt, args...) +#endif + +#ifdef DEBUG_KFREE +#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args) +#else +#define P_KFREE(fmt, args...) +#endif + +#ifdef DEBUG_DEVICE +#define P_DEV(fmt, args...) printk(KERN_DEBUG "lvm device: " fmt, ## args) +#else +#define P_DEV(fmt, args...) +#endif + + +/* lvm-snap.c */ +int lvm_get_blksize(kdev_t); +int lvm_snapshot_alloc(lv_t *); +int lvm_snapshot_fill_COW_page(vg_t *, lv_t *); +int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, vg_t *vg, lv_t *); +int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); +void lvm_snapshot_release(lv_t *); +int lvm_write_COW_table_block(vg_t *, lv_t *); +void lvm_hash_link(lv_block_exception_t *, kdev_t, ulong, lv_t *); +int lvm_snapshot_alloc_hash_table(lv_t *); +void lvm_drop_snapshot(vg_t *vg, lv_t *, const char *); + + +/* lvm_fs.c */ +void lvm_init_fs(void); +void lvm_fin_fs(void); + +void lvm_fs_create_vg(vg_t *vg_ptr); +void lvm_fs_remove_vg(vg_t *vg_ptr); +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv); +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv); +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv); +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv); + +#endif diff --git a/drivers/md/lvm-snap.c b/drivers/md/lvm-snap.c index 20e40c022e90..e3d2a8b99d27 100644 --- a/drivers/md/lvm-snap.c +++ b/drivers/md/lvm-snap.c @@ -26,10 +26,19 @@ * * 05/07/2000 - implemented persistent snapshot support * 23/11/2000 - used cpu_to_le64 rather than my own macro + * 25/01/2001 - Put LockPage back in + * 01/02/2001 - A dropped snapshot is now set as inactive + * 12/03/2001 - lvm_pv_get_number changes: + * o made it static + * o renamed it to _pv_get_number + * o pv number is returned in new uint * arg + * o -1 returned on error + * lvm_snapshot_fill_COW_table has a return value too. * */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/blkdev.h> #include <linux/smp_lock.h> @@ -38,28 +47,43 @@ #include <linux/lvm.h> -#include "lvm-snap.h" +#include "lvm-internal.h" + +static char *lvm_snap_version __attribute__ ((unused)) = + "LVM "LVM_RELEASE_NAME" snapshot code ("LVM_RELEASE_DATE")\n"; -static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9.1_beta2 snapshot code (18/01/2001)\n"; extern const char *const lvm_name; extern int lvm_blocksizes[]; void lvm_snapshot_release(lv_t *); +static int _write_COW_table_block(vg_t *vg, lv_t *lv, int idx, + const char **reason); +static void _disable_snapshot(vg_t *vg, lv_t *lv); -uint lvm_pv_get_number(vg_t * vg, kdev_t rdev) -{ + +static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn) { uint p; + for(p = 0; p < vg->pv_max; p++) { + if(vg->pv[p] == NULL) + continue; + + if(vg->pv[p]->pv_dev == rdev) + break; - for ( p = 0; p < vg->pv_max; p++) - { - if ( vg->pv[p] == NULL) continue; - if ( vg->pv[p]->pv_dev == rdev) break; } - return vg->pv[p]->pv_number; -} + if(p >= vg->pv_max) { + /* bad news, the snapshot COW table is probably corrupt */ + printk(KERN_ERR + "%s -- _pv_get_number failed for rdev = %u\n", + lvm_name, rdev); + return -1; + } + *pvn = vg->pv[p]->pv_number; + return 0; +} #define hashfn(dev,block,mask,chunk_size) \ ((HASHDEV(dev)^((block)/(chunk_size))) & (mask)) @@ -133,7 +157,7 @@ int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector, return ret; } -void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) +void lvm_drop_snapshot(vg_t *vg, lv_t *lv_snap, const char *reason) { kdev_t last_dev; int i; @@ -142,6 +166,9 @@ void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) or error on this snapshot --> release it */ invalidate_buffers(lv_snap->lv_dev); + /* wipe the snapshot since it's inconsistent now */ + _disable_snapshot(vg, lv_snap); + for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) { if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) { last_dev = lv_snap->lv_block_exception[i].rdev_new; @@ -150,26 +177,33 @@ void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) } lvm_snapshot_release(lv_snap); + lv_snap->lv_status &= ~LV_ACTIVE; printk(KERN_INFO - "%s -- giving up to snapshot %s on %s due %s\n", + "%s -- giving up to snapshot %s on %s: %s\n", lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name, reason); } -static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks, - unsigned long start, - int nr_sectors, - int blocksize) +static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks, + unsigned long start, + int nr_sectors, + int blocksize) { int i, sectors_per_block, nr_blocks; - sectors_per_block = blocksize >> 9; + sectors_per_block = blocksize / SECTOR_SIZE; + + if(start & (sectors_per_block - 1)) + return 0; + nr_blocks = nr_sectors / sectors_per_block; start /= sectors_per_block; for (i = 0; i < nr_blocks; i++) blocks[i] = start++; + + return 1; } inline int lvm_get_blksize(kdev_t dev) @@ -209,128 +243,59 @@ static inline void invalidate_snap_cache(unsigned long start, unsigned long nr, #endif -void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap) +int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap) { - int id = 0, is = lv_snap->lv_remap_ptr; - ulong blksize_snap; - lv_COW_table_disk_t * lv_COW_table = - ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + uint pvn; + int id = 0, is = lv_snap->lv_remap_ptr; + ulong blksize_snap; + lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *) + page_address(lv_snap->lv_COW_table_iobuf->maplist[0]); + + if (is == 0) + return 0; - if (is == 0) return; is--; - blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new); + blksize_snap = + lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new); is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t)); memset(lv_COW_table, 0, blksize_snap); for ( ; is < lv_snap->lv_remap_ptr; is++, id++) { /* store new COW_table entry */ - lv_COW_table[id].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org)); - lv_COW_table[id].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_org); - lv_COW_table[id].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new)); - lv_COW_table[id].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_new); + lv_block_exception_t *be = lv_snap->lv_block_exception + is; + if(_pv_get_number(vg, be->rdev_org, &pvn)) + goto bad; + + lv_COW_table[id].pv_org_number = cpu_to_le64(pvn); + lv_COW_table[id].pv_org_rsector = cpu_to_le64(be->rsector_org); + if(_pv_get_number(vg, be->rdev_new, &pvn)) + goto bad; + + lv_COW_table[id].pv_snap_number = cpu_to_le64(pvn); + lv_COW_table[id].pv_snap_rsector = + cpu_to_le64(be->rsector_new); } + + return 0; + + bad: + printk(KERN_ERR "%s -- lvm_snapshot_fill_COW_page failed", lvm_name); + return -1; } /* * writes a COW exception table sector to disk (HM) - * */ -int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap) +int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap) { - int blksize_snap; - int end_of_table; - int idx = lv_snap->lv_remap_ptr, idx_COW_table; - int nr_pages_tmp; - int length_tmp; - ulong snap_pe_start, COW_table_sector_offset, - COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; - const char * reason; - kdev_t snap_phys_dev; - struct kiobuf * iobuf = lv_snap->lv_iobuf; - struct page * page_tmp; - lv_COW_table_disk_t * lv_COW_table = - ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); - - idx--; - - COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap); - COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap); - - /* get physical addresse of destination chunk */ - snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; - snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; - - blksize_snap = lvm_get_blksize(snap_phys_dev); - - COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); - idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; - - if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap); - - /* sector offset into the on disk COW table */ - COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); - - /* COW table block to write next */ - iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); - - /* store new COW_table entry */ - lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org)); - lv_COW_table[idx_COW_table].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_org); - lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, snap_phys_dev)); - lv_COW_table[idx_COW_table].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_new); - - length_tmp = iobuf->length; - iobuf->length = blksize_snap; - page_tmp = iobuf->maplist[0]; - iobuf->maplist[0] = lv_snap->lv_COW_table_page; - nr_pages_tmp = iobuf->nr_pages; - iobuf->nr_pages = 1; - - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != blksize_snap) - goto fail_raw_write; - - - /* initialization of next COW exception table block with zeroes */ - end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1; - if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table) - { - /* don't go beyond the end */ - if (idx + 1 >= lv_snap->lv_remap_end) goto good_out; - - memset(lv_COW_table, 0, blksize_snap); - - if (end_of_table) - { - idx++; - snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; - snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; - blksize_snap = lvm_get_blksize(snap_phys_dev); - iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10); - } else iobuf->blocks[0]++; - - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != blksize_snap) - goto fail_raw_write; - } - - - good_out: - iobuf->length = length_tmp; - iobuf->maplist[0] = page_tmp; - iobuf->nr_pages = nr_pages_tmp; - return 0; - - /* slow path */ - out: - lvm_drop_snapshot(lv_snap, reason); - return 1; - - fail_raw_write: - reason = "write error"; - goto out; + int r; + const char *err; + if((r = _write_COW_table_block(vg, lv_snap, + lv_snap->lv_remap_ptr - 1, &err))) + lvm_drop_snapshot(vg, lv_snap, err); + return r; } /* @@ -345,7 +310,7 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, unsigned long org_phys_sector, unsigned long org_pe_start, unsigned long org_virt_sector, - lv_t * lv_snap) + vg_t *vg, lv_t* lv_snap) { const char * reason; unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; @@ -370,13 +335,11 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, #ifdef DEBUG_SNAPSHOT printk(KERN_INFO "%s -- COW: " - "org %02d:%02d faulting %lu start %lu, " - "snap %02d:%02d start %lu, " + "org %s faulting %lu start %lu, snap %s start %lu, " "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n", lvm_name, - MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector, - org_start, - MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start, + kdevname(org_phys_dev), org_phys_sector, org_start, + kdevname(snap_phys_dev), snap_start, chunk_size, org_pe_start, pe_off, org_virt_sector); @@ -400,14 +363,18 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, iobuf->length = nr_sectors << 9; - lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, - nr_sectors, blksize_org); + if(!lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, + nr_sectors, blksize_org)) + goto fail_prepare; + if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, iobuf->blocks, blksize_org) != (nr_sectors<<9)) goto fail_raw_read; - lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, - nr_sectors, blksize_snap); + if(!lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, + nr_sectors, blksize_snap)) + goto fail_prepare; + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, iobuf->blocks, blksize_snap) != (nr_sectors<<9)) goto fail_raw_write; @@ -435,7 +402,7 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, /* slow path */ out: - lvm_drop_snapshot(lv_snap, reason); + lvm_drop_snapshot(vg, lv_snap, reason); return 1; fail_out_of_space: @@ -450,20 +417,24 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, fail_blksize: reason = "blocksize error"; goto out; + + fail_prepare: + reason = "couldn't prepare kiovec blocks " + "(start probably isn't block aligned)"; + goto out; } int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) { int bytes, nr_pages, err, i; - bytes = sectors << 9; + bytes = sectors * SECTOR_SIZE; nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT; err = expand_kiobuf(iobuf, nr_pages); - if (err) - goto out; + if (err) goto out; err = -ENOMEM; - iobuf->locked = 0; + iobuf->locked = 1; iobuf->nr_pages = 0; for (i = 0; i < nr_pages; i++) { @@ -474,6 +445,7 @@ int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) goto out; iobuf->maplist[i] = page; + LockPage(page); iobuf->nr_pages++; } iobuf->offset = 0; @@ -521,47 +493,58 @@ int lvm_snapshot_alloc_hash_table(lv_t * lv) while (buckets--) INIT_LIST_HEAD(hash+buckets); err = 0; - out: +out: return err; } int lvm_snapshot_alloc(lv_t * lv_snap) { - int err, blocksize, max_sectors; + int ret, max_sectors; + int nbhs = KIO_MAX_SECTORS; - err = alloc_kiovec(1, &lv_snap->lv_iobuf); - if (err) - goto out; + /* allocate kiovec to do chunk io */ + ret = alloc_kiovec_sz(1, &lv_snap->lv_iobuf, &nbhs); + if (ret) goto out; - blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)]; max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9); - err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); - if (err) - goto out_free_kiovec; + ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); + if (ret) goto out_free_kiovec; - err = lvm_snapshot_alloc_hash_table(lv_snap); - if (err) - goto out_free_kiovec; + /* allocate kiovec to do exception table io */ + ret = alloc_kiovec_sz(1, &lv_snap->lv_COW_table_iobuf, &nbhs); + if (ret) goto out_free_kiovec; + ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_COW_table_iobuf, + PAGE_SIZE/SECTOR_SIZE); + if (ret) goto out_free_both_kiovecs; - lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL); - if (!lv_snap->lv_COW_table_page) - goto out_free_kiovec; + ret = lvm_snapshot_alloc_hash_table(lv_snap); + if (ret) goto out_free_both_kiovecs; - out: - return err; - out_free_kiovec: +out: + return ret; + +out_free_both_kiovecs: + unmap_kiobuf(lv_snap->lv_COW_table_iobuf); + free_kiovec_sz(1, &lv_snap->lv_COW_table_iobuf, &nbhs); + lv_snap->lv_COW_table_iobuf = NULL; + +out_free_kiovec: unmap_kiobuf(lv_snap->lv_iobuf); - free_kiovec(1, &lv_snap->lv_iobuf); - vfree(lv_snap->lv_snapshot_hash_table); + free_kiovec_sz(1, &lv_snap->lv_iobuf, &nbhs); + lv_snap->lv_iobuf = NULL; + if (lv_snap->lv_snapshot_hash_table != NULL) + vfree(lv_snap->lv_snapshot_hash_table); lv_snap->lv_snapshot_hash_table = NULL; goto out; } void lvm_snapshot_release(lv_t * lv) { + int nbhs = KIO_MAX_SECTORS; + if (lv->lv_block_exception) { vfree(lv->lv_block_exception); @@ -577,12 +560,129 @@ void lvm_snapshot_release(lv_t * lv) { kiobuf_wait_for_io(lv->lv_iobuf); unmap_kiobuf(lv->lv_iobuf); - free_kiovec(1, &lv->lv_iobuf); + free_kiovec_sz(1, &lv->lv_iobuf, &nbhs); lv->lv_iobuf = NULL; } - if (lv->lv_COW_table_page) + if (lv->lv_COW_table_iobuf) + { + kiobuf_wait_for_io(lv->lv_COW_table_iobuf); + unmap_kiobuf(lv->lv_COW_table_iobuf); + free_kiovec_sz(1, &lv->lv_COW_table_iobuf, &nbhs); + lv->lv_COW_table_iobuf = NULL; + } +} + + +static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap, + int idx, const char **reason) { + int blksize_snap; + int end_of_table; + int idx_COW_table; + uint pvn; + ulong snap_pe_start, COW_table_sector_offset, + COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; + ulong blocks[1]; + kdev_t snap_phys_dev; + lv_block_exception_t *be; + struct kiobuf * COW_table_iobuf = lv_snap->lv_COW_table_iobuf; + lv_COW_table_disk_t * lv_COW_table = + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_iobuf->maplist[0]); + + COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap); + COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap); + + /* get physical addresse of destination chunk */ + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + + blksize_snap = lvm_get_blksize(snap_phys_dev); + + COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); + idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; + + if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap); + + /* sector offset into the on disk COW table */ + COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); + + /* COW table block to write next */ + blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); + + /* store new COW_table entry */ + be = lv_snap->lv_block_exception + idx; + if(_pv_get_number(vg, be->rdev_org, &pvn)) + goto fail_pv_get_number; + + lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(pvn); + lv_COW_table[idx_COW_table].pv_org_rsector = + cpu_to_le64(be->rsector_org); + if(_pv_get_number(vg, snap_phys_dev, &pvn)) + goto fail_pv_get_number; + + lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(pvn); + lv_COW_table[idx_COW_table].pv_snap_rsector = + cpu_to_le64(be->rsector_new); + + COW_table_iobuf->length = blksize_snap; + + if (brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, + blocks, blksize_snap) != blksize_snap) + goto fail_raw_write; + + /* initialization of next COW exception table block with zeroes */ + end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1; + if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table) { - free_page((ulong)lv->lv_COW_table_page); - lv->lv_COW_table_page = NULL; + /* don't go beyond the end */ + if (idx + 1 >= lv_snap->lv_remap_end) goto out; + + memset(lv_COW_table, 0, blksize_snap); + + if (end_of_table) + { + idx++; + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + blksize_snap = lvm_get_blksize(snap_phys_dev); + blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else blocks[0]++; + + if (brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, + blocks, blksize_snap) != + blksize_snap) + goto fail_raw_write; } + + out: + return 0; + + fail_raw_write: + *reason = "write error"; + return 1; + + fail_pv_get_number: + *reason = "_pv_get_number failed"; + return 1; } + +/* + * FIXME_1.2 + * This function is a bit of a hack; we need to ensure that the + * snapshot is never made active again, because it will surely be + * corrupt. At the moment we do not have access to the LVM metadata + * from within the kernel. So we set the first exception to point to + * sector 1 (which will always be within the metadata, and as such + * invalid). User land tools will check for this when they are asked + * to activate the snapshot and prevent this from happening. + */ + +static void _disable_snapshot(vg_t *vg, lv_t *lv) { + const char *err; + lv->lv_block_exception[0].rsector_org = LVM_SNAPSHOT_DROPPED_SECTOR; + if(_write_COW_table_block(vg, lv, 0, &err) < 0) { + printk(KERN_ERR "%s -- couldn't disable snapshot: %s\n", + lvm_name, err); + } +} + +MODULE_LICENSE("GPL"); diff --git a/drivers/md/lvm-snap.h b/drivers/md/lvm-snap.h deleted file mode 100644 index 23538a1b7467..000000000000 --- a/drivers/md/lvm-snap.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * kernel/lvm-snap.h - * - * Copyright (C) 2001 Sistina Software - * - * - * LVM driver is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * LVM driver is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - */ - -/* - * Changelog - * - * 05/01/2001:Joe Thornber - Factored this file out of lvm.c - * - */ - -#ifndef LVM_SNAP_H -#define LVM_SNAP_H - -/* external snapshot calls */ -extern inline int lvm_get_blksize(kdev_t); -extern int lvm_snapshot_alloc(lv_t *); -extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *); -extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); -extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); -extern void lvm_snapshot_release(lv_t *); -extern int lvm_write_COW_table_block(vg_t *, lv_t *); -extern inline void lvm_hash_link(lv_block_exception_t *, - kdev_t, ulong, lv_t *); -extern int lvm_snapshot_alloc_hash_table(lv_t *); -extern void lvm_drop_snapshot(lv_t *, const char *); - -#endif diff --git a/drivers/md/lvm.c b/drivers/md/lvm.c index 05a0d2fd54e9..abae4cbe98ba 100644 --- a/drivers/md/lvm.c +++ b/drivers/md/lvm.c @@ -147,25 +147,51 @@ * 08/01/2001 - Removed conditional compiles related to PROC_FS, * procfs is always supported now. (JT) * 12/01/2001 - avoided flushing logical volume in case of shrinking - * because of unecessary overhead in case of heavy updates + * because of unnecessary overhead in case of heavy updates + * 25/01/2001 - Allow RO open of an inactive LV so it can be reactivated. + * 31/01/2001 - If you try and BMAP a snapshot you now get an -EPERM + * 01/02/2001 - factored __remap_snapshot out of lvm_map + * 12/02/2001 - move devfs code to create VG before LVs + * 14/02/2001 - tidied device defines for blk.h + * - tidied debug statements + * - more lvm_map tidying + * 14/02/2001 - bug: vg[] member not set back to NULL if activation fails + * 28/02/2001 - introduced the P_DEV macro and changed some internel + * functions to be static [AD] + * 28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD] + * - fixed user address accessing bug in lvm_do_lv_create() + * where the check for an existing LV takes place right at + * the beginning + * 01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility + * 02/03/2001 - Don't destroy usermode pointers in lv_t structures duing LV_ + * STATUS_BYxxx and remove redundant lv_t variables from same. + * 05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For + * lvdisplay -v (PC) + * - restore copying pe_t array in lvm_do_lv_status_byindex (HM) + * - added copying pe_t array in lvm_do_lv_status_bydev (HM) + * - enhanced lvm_do_lv_status_by{name,index,dev} to be capable + * to copy the lv_block_exception_t array to userspace (HM) + * 08/03/2001 - factored lvm_do_pv_flush out of lvm_chr_ioctl [HM] + * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock + * when the locking process closes. * 05/04/2001 - lvm_map bugs: don't use b_blocknr/b_dev in lvm_map, it * destroys stacking devices. call b_end_io on failed maps. * (Jens Axboe) + * - Defer writes to an extent that is being moved [JT + AD] + * 28/05/2001 - implemented missing BLKSSZGET ioctl [AD] * */ -static char *lvm_version = "LVM version 0.9.1_beta2 by Heinz Mauelshagen (18/01/2001)\n"; -static char *lvm_short_version = "version 0.9.1_beta2 (18/01/2001)"; - -#define MAJOR_NR LVM_BLK_MAJOR -#define DEVICE_OFF(device) +#define MAJOR_NR LVM_BLK_MAJOR +#define DEVICE_OFF(device) +#define LOCAL_END_REQUEST /* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ /* #define LVM_VFS_ENHANCEMENT */ #include <linux/config.h> -#include <linux/version.h> + #include <linux/module.h> #include <linux/kernel.h> @@ -180,6 +206,7 @@ static char *lvm_short_version = "version 0.9.1_beta2 (18/01/2001)"; #include <linux/blkdev.h> #include <linux/genhd.h> #include <linux/locks.h> +#include <linux/devfs_fs_kernel.h> #include <linux/smp_lock.h> #include <asm/ioctl.h> #include <asm/segment.h> @@ -195,38 +222,16 @@ static char *lvm_short_version = "version 0.9.1_beta2 (18/01/2001)"; #include <linux/errno.h> #include <linux/lvm.h> -#include "lvm-snap.h" +#include "lvm-internal.h" -#define LVM_CORRECT_READ_AHEAD(a) \ -do { \ - if ((a) < LVM_MIN_READ_AHEAD || \ - (a) > LVM_MAX_READ_AHEAD) \ - (a) = LVM_DEFAULT_READ_AHEAD; \ - read_ahead[MAJOR_NR] = (a); \ -} while(0) +#define LVM_CORRECT_READ_AHEAD( a) \ + if ( a < LVM_MIN_READ_AHEAD || \ + a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD; #ifndef WRITEA # define WRITEA WRITE #endif -/* debug macros */ -#ifdef DEBUG_IOCTL -#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args) -#else -#define P_IOCTL(fmt, args...) -#endif - -#ifdef DEBUG_MAP -#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args) -#else -#define P_MAP(fmt, args...) -#endif - -#ifdef DEBUG_KFREE -#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args) -#else -#define P_KFREE(fmt, args...) -#endif /* * External function prototypes @@ -236,27 +241,14 @@ static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); -static int lvm_chr_open(struct inode *, struct file *); - -static int lvm_chr_close(struct inode *, struct file *); static int lvm_blk_close(struct inode *, struct file *); +static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg); static int lvm_user_bmap(struct inode *, struct lv_bmap *); +static int lvm_chr_open(struct inode *, struct file *); +static int lvm_chr_close(struct inode *, struct file *); static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong); -int lvm_proc_read_vg_info(char *, char **, off_t, int, int *, void *); -int lvm_proc_read_lv_info(char *, char **, off_t, int, int *, void *); -int lvm_proc_read_pv_info(char *, char **, off_t, int, int *, void *); -static int lvm_proc_get_global_info(char *, char **, off_t, int, int *, void *); - -void lvm_do_create_devfs_entry_of_vg ( vg_t *); - -void lvm_do_create_proc_entry_of_vg ( vg_t *); -void lvm_do_remove_proc_entry_of_vg ( vg_t *); -void lvm_do_create_proc_entry_of_lv ( vg_t *, lv_t *); -void lvm_do_remove_proc_entry_of_lv ( vg_t *, lv_t *); -void lvm_do_create_proc_entry_of_pv ( vg_t *, pv_t *); -void lvm_do_remove_proc_entry_of_pv ( vg_t *, pv_t *); /* End external function prototypes */ @@ -288,34 +280,41 @@ static int lvm_do_pe_lock_unlock(vg_t *r, void *); static int lvm_do_pv_change(vg_t*, void*); static int lvm_do_pv_status(vg_t *, void *); +static int lvm_do_pv_flush(void *); -static int lvm_do_vg_create(int, void *); +static int lvm_do_vg_create(void *, int minor); static int lvm_do_vg_extend(vg_t *, void *); static int lvm_do_vg_reduce(vg_t *, void *); static int lvm_do_vg_rename(vg_t *, void *); static int lvm_do_vg_remove(int); static void lvm_geninit(struct gendisk *); -static char *lvm_show_uuid ( char *); +static void __update_hardsectsize(lv_t *lv); + + +static void _queue_io(struct buffer_head *bh, int rw); +static struct buffer_head *_dequeue_io(void); +static void _flush_io(struct buffer_head *bh); + +static int _open_pv(pv_t *pv); +static void _close_pv(pv_t *pv); + +static unsigned long _sectors_to_k(unsigned long sect); + #ifdef LVM_HD_NAME void lvm_hd_name(char *, int); #endif /* END Internal function prototypes */ -/* volume group descriptor area pointers */ -static vg_t *vg[ABS_MAX_VG]; - -static devfs_handle_t lvm_devfs_handle; -static devfs_handle_t vg_devfs_handle[MAX_VG]; -static devfs_handle_t ch_devfs_handle[MAX_VG]; -static devfs_handle_t lv_devfs_handle[MAX_LV]; +/* variables */ +char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")"; +ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION; +int loadtime = 0; +const char *const lvm_name = LVM_NAME; -static pv_t *pvp = NULL; -static lv_t *lvp = NULL; -static pe_t *pep = NULL; -static pe_t *pep1 = NULL; -static char *basename = NULL; +/* volume group descriptor area pointers */ +vg_t *vg[ABS_MAX_VG]; /* map from block minor number to VG and LV numbers */ typedef struct { @@ -327,9 +326,8 @@ static vg_lv_map_t vg_lv_map[ABS_MAX_LV]; /* Request structures (lvm_chr_ioctl()) */ static pv_change_req_t pv_change_req; -static pv_flush_req_t pv_flush_req; static pv_status_req_t pv_status_req; -static pe_lock_req_t pe_lock_req; +volatile static pe_lock_req_t pe_lock_req; static le_remap_req_t le_remap_req; static lv_req_t lv_req; @@ -339,35 +337,28 @@ static int lvm_reset_spindown = 0; static char pv_name[NAME_LEN]; /* static char rootvg[NAME_LEN] = { 0, }; */ -const char *const lvm_name = LVM_NAME; static int lock = 0; -static int loadtime = 0; +static int _lock_open_count = 0; static uint vg_count = 0; static long lvm_chr_open_count = 0; -static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION; static DECLARE_WAIT_QUEUE_HEAD(lvm_wait); -static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait); static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; -static struct proc_dir_entry *lvm_proc_dir = NULL; -static struct proc_dir_entry *lvm_proc_vg_subdir = NULL; -struct proc_dir_entry *pde = NULL; +static struct buffer_head *_pe_requests; +static DECLARE_RWSEM(_pe_lock); -static struct file_operations lvm_chr_fops = -{ - owner: THIS_MODULE, + +struct file_operations lvm_chr_fops = { open: lvm_chr_open, release: lvm_chr_close, ioctl: lvm_chr_ioctl, }; - /* block device operations structure needed for 2.3.38? and above */ -static struct block_device_operations lvm_blk_dops = +struct block_device_operations lvm_blk_dops = { - owner: THIS_MODULE, open: lvm_blk_open, release: lvm_blk_close, ioctl: lvm_blk_ioctl, @@ -376,10 +367,10 @@ static struct block_device_operations lvm_blk_dops = /* gendisk structures */ static struct hd_struct lvm_hd_struct[MAX_LV]; -static int lvm_blocksizes[MAX_LV] = -{0,}; -static int lvm_size[MAX_LV] = -{0,}; +static int lvm_blocksizes[MAX_LV]; +static int lvm_hardsectsizes[MAX_LV]; +static int lvm_size[MAX_LV]; + static struct gendisk lvm_gendisk = { major: MAJOR_NR, @@ -396,30 +387,24 @@ static struct gendisk lvm_gendisk = */ int lvm_init(void) { - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) { - printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); + if (devfs_register_chrdev(LVM_CHAR_MAJOR, + lvm_name, &lvm_chr_fops) < 0) { + printk(KERN_ERR "%s -- devfs_register_chrdev failed\n", + lvm_name); return -EIO; } - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) + + if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) { - printk("%s -- register_blkdev failed\n", lvm_name); - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); + printk("%s -- devfs_register_blkdev failed\n", lvm_name); + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) + printk(KERN_ERR + "%s -- devfs_unregister_chrdev failed\n", + lvm_name); return -EIO; } - lvm_devfs_handle = devfs_register( - 0 , "lvm", 0, 0, LVM_CHAR_MAJOR, - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_chr_fops, NULL); - - lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); - if (lvm_proc_dir != NULL) { - lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); - pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); - if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; - } - + lvm_init_fs(); lvm_init_vars(); lvm_geninit(&lvm_gendisk); @@ -433,20 +418,19 @@ int lvm_init(void) blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn); + /* initialise the pe lock */ + pe_lock_req.lock = UNLOCK_PE; + /* optional read root VGDA */ /* if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); */ - printk(KERN_INFO - "%s%s -- " #ifdef MODULE - "Module" + printk(KERN_INFO "%s module loaded\n", lvm_version); #else - "Driver" + printk(KERN_INFO "%s\n", lvm_version); #endif - " successfully initialized\n", - lvm_version, lvm_name); return 0; } /* lvm_init() */ @@ -457,15 +441,12 @@ int lvm_init(void) */ static void lvm_cleanup(void) { - devfs_unregister (lvm_devfs_handle); - - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); - } - if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) { - printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name); - } - + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) + printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n", + lvm_name); + if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0) + printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n", + lvm_name); del_gendisk(&lvm_gendisk); @@ -473,25 +454,25 @@ static void lvm_cleanup(void) blksize_size[MAJOR_NR] = NULL; hardsect_size[MAJOR_NR] = NULL; - remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); - remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); - remove_proc_entry(LVM_DIR, &proc_root); - #ifdef LVM_HD_NAME /* reference from linux/drivers/block/genhd.c */ lvm_hd_name_ptr = NULL; #endif + /* unregister with procfs and devfs */ + lvm_fin_fs(); + +#ifdef MODULE printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); +#endif return; } /* lvm_cleanup() */ - /* * support function to initialize lvm variables */ -void __init lvm_init_vars(void) +static void __init lvm_init_vars(void) { int v; @@ -500,8 +481,8 @@ void __init lvm_init_vars(void) lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = \ - pe_lock_req.data.pv_dev = \ + pe_lock_req.data.lv_dev = 0; + pe_lock_req.data.pv_dev = 0; pe_lock_req.data.pv_offset = 0; /* Initialize VG pointers */ @@ -524,19 +505,18 @@ void __init lvm_init_vars(void) * ********************************************************************/ +#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \ + (mode) & FMODE_WRITE ? "WRITE" : "" + /* * character device open routine */ -static int lvm_chr_open(struct inode *inode, - struct file *file) +static int lvm_chr_open(struct inode *inode, struct file *file) { - int minor = MINOR(inode->i_rdev); + unsigned int minor = MINOR(inode->i_rdev); -#ifdef DEBUG - printk(KERN_DEBUG - "%s -- lvm_chr_open MINOR: %d VG#: %d mode: 0x%X lock: %d\n", - lvm_name, minor, VG_CHR(minor), file->f_mode, lock); -#endif + P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n", + minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock); /* super user validation */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -544,8 +524,15 @@ static int lvm_chr_open(struct inode *inode, /* Group special file open */ if (VG_CHR(minor) > MAX_VG) return -ENXIO; + spin_lock(&lvm_lock); + if(lock == current->pid) + _lock_open_count++; + spin_unlock(&lvm_lock); + lvm_chr_open_count++; + MOD_INC_USE_COUNT; + return 0; } /* lvm_chr_open() */ @@ -558,7 +545,7 @@ static int lvm_chr_open(struct inode *inode, * */ static int lvm_chr_ioctl(struct inode *inode, struct file *file, - uint command, ulong a) + uint command, ulong a) { int minor = MINOR(inode->i_rdev); uint extendable, l, v; @@ -569,9 +556,8 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, /* otherwise cc will complain about unused variables */ (void) lvm_lock; - P_IOCTL("%s -- lvm_chr_ioctl: command: 0x%X MINOR: %d " - "VG#: %d mode: 0x%X\n", - lvm_name, command, minor, VG_CHR(minor), file->f_mode); + P_IOCTL("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n", + minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode)); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) return -EACCES; @@ -619,9 +605,13 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, physical volume (move's done in user space's pvmove) */ return lvm_do_pe_lock_unlock(vg_ptr,arg); - case VG_CREATE: + case VG_CREATE_OLD: /* create a VGDA */ - return lvm_do_vg_create(minor, arg); + return lvm_do_vg_create(arg, minor); + + case VG_CREATE: + /* create a VGDA, assume VG number is filled in */ + return lvm_do_vg_create(arg, -1); case VG_EXTEND: /* extend a volume group */ @@ -672,7 +662,7 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, case VG_STATUS_GET_NAMELIST: - /* get volume group count */ + /* get volume group names */ for (l = v = 0; v < ABS_MAX_VG; v++) { if (vg[v] != NULL) { if (copy_to_user(arg + l * NAME_LEN, @@ -727,6 +717,7 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, case LV_STATUS_BYDEV: + /* get status of a logical volume by device */ return lvm_do_lv_status_bydev(vg_ptr, arg); @@ -742,18 +733,12 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, case PV_FLUSH: /* physical volume buffer flush/invalidate */ - if (copy_from_user(&pv_flush_req, arg, - sizeof(pv_flush_req)) != 0) - return -EFAULT; - - fsync_dev(pv_flush_req.pv_dev); - invalidate_buffers(pv_flush_req.pv_dev); - return 0; + return lvm_do_pv_flush(arg); default: printk(KERN_WARNING - "%s -- lvm_chr_ioctl: unknown command %x\n", + "%s -- lvm_chr_ioctl: unknown command 0x%x\n", lvm_name, command); return -EINVAL; } @@ -767,11 +752,8 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, */ static int lvm_chr_close(struct inode *inode, struct file *file) { -#ifdef DEBUG - int minor = MINOR(inode->i_rdev); - printk(KERN_DEBUG - "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor)); -#endif + P_DEV("chr_close MINOR: %d VG#: %d\n", + MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev))); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) { @@ -781,10 +763,19 @@ static int lvm_chr_close(struct inode *inode, struct file *file) #endif if (lvm_chr_open_count > 0) lvm_chr_open_count--; - if (lock == current->pid) { - lock = 0; /* release lock */ - wake_up_interruptible(&lvm_wait); + + spin_lock(&lvm_lock); + if(lock == current->pid) { + if(!_lock_open_count) { + P_DEV("chr_close: unlocking LVM for pid %d\n", lock); + lock = 0; + wake_up_interruptible(&lvm_wait); + } else + _lock_open_count--; } + spin_unlock(&lvm_lock); + + MOD_DEC_USE_COUNT; return 0; } /* lvm_chr_close() */ @@ -806,11 +797,8 @@ static int lvm_blk_open(struct inode *inode, struct file *file) lv_t *lv_ptr; vg_t *vg_ptr = vg[VG_BLK(minor)]; -#ifdef DEBUG_LVM_BLK_OPEN - printk(KERN_DEBUG - "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d mode: 0x%X\n", - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode); -#endif + P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n", + minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode)); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) @@ -827,8 +815,12 @@ static int lvm_blk_open(struct inode *inode, struct file *file) if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM; /* Check inactive LV and open for read/write */ - if (!(lv_ptr->lv_status & LV_ACTIVE)) - return -EPERM; + /* We need to be able to "read" an inactive LV + to re-activate it again */ + if ((file->f_mode & FMODE_WRITE) && + (!(lv_ptr->lv_status & LV_ACTIVE))) + return -EPERM; + if (!(lv_ptr->lv_access & LV_WRITE) && (file->f_mode & FMODE_WRITE)) return -EACCES; @@ -838,12 +830,9 @@ static int lvm_blk_open(struct inode *inode, struct file *file) if (lv_ptr->lv_open == 0) vg_ptr->lv_open++; lv_ptr->lv_open++; -#ifdef DEBUG_LVM_BLK_OPEN - printk(KERN_DEBUG - "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d size: %d\n", - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), - lv_ptr->lv_size); -#endif + MOD_INC_USE_COUNT; + + P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size); return 0; } @@ -863,16 +852,18 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, void *arg = (void *) a; struct hd_geometry *hd = (struct hd_geometry *) a; - P_IOCTL("%s -- lvm_blk_ioctl MINOR: %d command: 0x%X arg: %X " - "VG#: %dl LV#: %d\n", - lvm_name, minor, command, (ulong) arg, - VG_BLK(minor), LV_BLK(minor)); + P_IOCTL("blk MINOR: %d command: 0x%X arg: %p VG#: %d LV#: %d " + "mode: %s%s\n", minor, command, arg, VG_BLK(minor), + LV_BLK(minor), MODE_TO_STR(file->f_mode)); switch (command) { + case BLKSSZGET: + /* get block device sector size as needed e.g. by fdisk */ + return put_user(get_hardsect_size(inode->i_rdev), (int *) arg); + case BLKGETSIZE: /* return device size */ - P_IOCTL("%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n", - lvm_name, lv_ptr->lv_size); + P_IOCTL("BLKGETSIZE: %u\n", lv_ptr->lv_size); if (put_user(lv_ptr->lv_size, (unsigned long *)arg)) return -EFAULT; break; @@ -887,7 +878,7 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, /* flush buffer cache */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - P_IOCTL("%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name); + P_IOCTL("BLKFLSBUF\n"); fsync_dev(inode->i_rdev); invalidate_buffers(inode->i_rdev); @@ -898,20 +889,19 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, /* set read ahead for block device */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n", - lvm_name, (long) arg, MAJOR(inode->i_rdev), minor); + P_IOCTL("BLKRASET: %ld sectors for %s\n", + (long) arg, kdevname(inode->i_rdev)); if ((long) arg < LVM_MIN_READ_AHEAD || (long) arg > LVM_MAX_READ_AHEAD) return -EINVAL; lv_ptr->lv_read_ahead = (long) arg; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead; break; case BLKRAGET: /* get current read ahead setting */ - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name); + P_IOCTL("BLKRAGET %d\n", lv_ptr->lv_read_ahead); if (put_user(lv_ptr->lv_read_ahead, (long *)arg)) return -EFAULT; break; @@ -937,10 +927,10 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, copy_to_user((long *) &hd->start, &start, sizeof(start)) != 0) return -EFAULT; - } - P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n", - lvm_name, lv_ptr->lv_size / heads / sectors); + P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n", + lvm_name, cylinders); + } break; @@ -964,13 +954,12 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, break; case LV_BMAP: - /* turn logical block into (dev_t, block). non privileged. */ - /* don't bmap a snapshot, since the mapping can change */ - if (lv_ptr->lv_access & LV_SNAPSHOT) + /* turn logical block into (dev_t, block). non privileged. */ + /* don't bmap a snapshot, since the mapping can change */ + if(lv_ptr->lv_access & LV_SNAPSHOT) return -EPERM; return lvm_user_bmap(inode, (struct lv_bmap *) arg); - break; case LV_SET_ALLOCATION: /* set allocation flags of a logical volume */ @@ -979,40 +968,11 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, break; case LV_SNAPSHOT_USE_RATE: - if (!(lv_ptr->lv_access & LV_SNAPSHOT)) return -EPERM; - { - lv_snapshot_use_rate_req_t lv_snapshot_use_rate_req; - - if (copy_from_user(&lv_snapshot_use_rate_req, arg, - sizeof(lv_snapshot_use_rate_req_t))) - return -EFAULT; - if (lv_snapshot_use_rate_req.rate < 0 || - lv_snapshot_use_rate_req.rate > 100) return -EFAULT; - - switch (lv_snapshot_use_rate_req.block) - { - case 0: - lv_ptr->lv_snapshot_use_rate = lv_snapshot_use_rate_req.rate; - if (lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end < lv_ptr->lv_snapshot_use_rate) - interruptible_sleep_on (&lv_ptr->lv_snapshot_wait); - break; - - case O_NONBLOCK: - break; - - default: - return -EFAULT; - } - lv_snapshot_use_rate_req.rate = lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end; - if (copy_to_user(arg, &lv_snapshot_use_rate_req, - sizeof(lv_snapshot_use_rate_req_t))) - return -EFAULT; - } - break; + return lvm_get_snapshot_use_rate(lv_ptr, arg); default: printk(KERN_WARNING - "%s -- lvm_blk_ioctl: unknown command %d\n", + "%s -- lvm_blk_ioctl: unknown command 0x%x\n", lvm_name, command); return -EINVAL; } @@ -1030,18 +990,49 @@ static int lvm_blk_close(struct inode *inode, struct file *file) vg_t *vg_ptr = vg[VG_BLK(minor)]; lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)]; -#ifdef DEBUG - printk(KERN_DEBUG - "%s -- lvm_blk_close MINOR: %d VG#: %d LV#: %d\n", - lvm_name, minor, VG_BLK(minor), LV_BLK(minor)); -#endif + P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n", + minor, VG_BLK(minor), LV_BLK(minor)); if (lv_ptr->lv_open == 1) vg_ptr->lv_open--; lv_ptr->lv_open--; + MOD_DEC_USE_COUNT; + return 0; } /* lvm_blk_close() */ +static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg) +{ + lv_snapshot_use_rate_req_t lv_rate_req; + + if (!(lv->lv_access & LV_SNAPSHOT)) + return -EPERM; + + if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req))) + return -EFAULT; + + if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100) + return -EINVAL; + + switch (lv_rate_req.block) { + case 0: + lv->lv_snapshot_use_rate = lv_rate_req.rate; + if (lv->lv_remap_ptr * 100 / lv->lv_remap_end < + lv->lv_snapshot_use_rate) + interruptible_sleep_on(&lv->lv_snapshot_wait); + break; + + case O_NONBLOCK: + break; + + default: + return -EINVAL; + } + lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end; + + return copy_to_user(arg, &lv_rate_req, + sizeof(lv_rate_req)) ? -EFAULT : 0; +} static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) { @@ -1056,6 +1047,7 @@ static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) bh.b_blocknr = block; bh.b_dev = bh.b_rdev = inode->i_rdev; bh.b_size = lvm_get_blksize(bh.b_dev); + bh.b_rsector = block * (bh.b_size >> 9); if ((err=lvm_map(&bh, READ)) < 0) { printk("lvm map failed: %d\n", err); return -EINVAL; @@ -1068,557 +1060,202 @@ static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) /* - * provide VG info for proc filesystem use (global) + * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c + * (see init_module/lvm_init) */ -int lvm_vg_info(vg_t *vg_ptr, char *buf) { - int sz = 0; - char inactive_flag = ' '; - - if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; - sz = sprintf(buf, - "\nVG: %c%s [%d PV, %d LV/%d open] " - " PE Size: %d KB\n" - " Usage [KB/PE]: %d /%d total " - "%d /%d used %d /%d free", - inactive_flag, - vg_ptr->vg_name, - vg_ptr->pv_cur, - vg_ptr->lv_cur, - vg_ptr->lv_open, - vg_ptr->pe_size >> 1, - vg_ptr->pe_size * vg_ptr->pe_total >> 1, - vg_ptr->pe_total, - vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, - vg_ptr->pe_allocated, - (vg_ptr->pe_total - vg_ptr->pe_allocated) * - vg_ptr->pe_size >> 1, - vg_ptr->pe_total - vg_ptr->pe_allocated); - return sz; -} +static void __remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { + /* copy a chunk from the origin to a snapshot device */ + down_write(&lv->lv_lock); -/* - * provide LV info for proc filesystem use (global) - */ -int lvm_lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) { - int sz = 0; - char inactive_flag = 'A', allocation_flag = ' ', - stripes_flag = ' ', rw_flag = ' '; - - if (!(lv_ptr->lv_status & LV_ACTIVE)) - inactive_flag = 'I'; - rw_flag = 'R'; - if (lv_ptr->lv_access & LV_WRITE) - rw_flag = 'W'; - allocation_flag = 'D'; - if (lv_ptr->lv_allocation & LV_CONTIGUOUS) - allocation_flag = 'C'; - stripes_flag = 'L'; - if (lv_ptr->lv_stripes > 1) - stripes_flag = 'S'; - sz += sprintf(buf+sz, - "[%c%c%c%c", - inactive_flag, - rw_flag, - allocation_flag, - stripes_flag); - if (lv_ptr->lv_stripes > 1) - sz += sprintf(buf+sz, "%-2d", - lv_ptr->lv_stripes); - else - sz += sprintf(buf+sz, " "); - basename = strrchr(lv_ptr->lv_name, '/'); - if ( basename == 0) basename = lv_ptr->lv_name; - else basename++; - sz += sprintf(buf+sz, "] %-25s", basename); - if (strlen(basename) > 25) - sz += sprintf(buf+sz, - "\n "); - sz += sprintf(buf+sz, "%9d /%-6d ", - lv_ptr->lv_size >> 1, - lv_ptr->lv_size / vg_ptr->pe_size); - - if (lv_ptr->lv_open == 0) - sz += sprintf(buf+sz, "close"); - else - sz += sprintf(buf+sz, "%dx open", - lv_ptr->lv_open); + /* we must redo lvm_snapshot_remap_block in order to avoid a + race condition in the gap where no lock was held */ + if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) && + !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv)) + lvm_write_COW_table_block(vg, lv); - return sz; + up_write(&lv->lv_lock); } +static inline void _remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { + int r; -/* - * provide PV info for proc filesystem use (global) - */ -int lvm_pv_info(pv_t *pv_ptr, char *buf) { - int sz = 0; - char inactive_flag = 'A', allocation_flag = ' '; - char *pv_name = NULL; - - if (!(pv_ptr->pv_status & PV_ACTIVE)) - inactive_flag = 'I'; - allocation_flag = 'A'; - if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) - allocation_flag = 'N'; - pv_name = strrchr(pv_ptr->pv_name+1,'/'); - if ( pv_name == 0) pv_name = pv_ptr->pv_name; - else pv_name++; - sz = sprintf(buf, - "[%c%c] %-21s %8d /%-6d " - "%8d /%-6d %8d /%-6d", - inactive_flag, - allocation_flag, - pv_name, - pv_ptr->pe_total * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total, - pv_ptr->pe_allocated * - pv_ptr->pe_size >> 1, - pv_ptr->pe_allocated, - (pv_ptr->pe_total - - pv_ptr->pe_allocated) * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total - - pv_ptr->pe_allocated); - return sz; + /* check to see if this chunk is already in the snapshot */ + down_read(&lv->lv_lock); + r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv); + up_read(&lv->lv_lock); + + if (!r) + /* we haven't yet copied this block to the snapshot */ + __remap_snapshot(rdev, rsector, pe_start, lv, vg); } /* - * Support functions /proc-Filesystem + * extents destined for a pe that is on the move should be deferred */ +static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) { + return ((pe_lock_req.lock == LOCK_PE) && + (pv == pe_lock_req.data.pv_dev) && + (sector >= pe_lock_req.data.pv_offset) && + (sector < (pe_lock_req.data.pv_offset + pe_size))); +} -#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) - -/* - * provide global LVM information - */ -static int lvm_proc_get_global_info(char *page, char **start, off_t pos, int count, int *eof, void *data) +static inline int _defer_extent(struct buffer_head *bh, int rw, + kdev_t pv, ulong sector, uint32_t pe_size) { - int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, - lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds; - static off_t sz; - off_t sz_last; - static char *buf = NULL; - static char dummy_buf[160]; /* sized for 2 lines */ - vg_t *vg_ptr; - lv_t *lv_ptr; - pv_t *pv_ptr; - - -#ifdef DEBUG_LVM_PROC_GET_INFO - printk(KERN_DEBUG - "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n", - lvm_name, pos, count, whence); -#endif - - MOD_INC_USE_COUNT; - - if (pos == 0 || buf == NULL) { - sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ - lv_open_total = pe_t_bytes = hash_table_bytes = \ - lv_block_exception_t_bytes = 0; - - /* search for activity */ - for (v = 0; v < ABS_MAX_VG; v++) { - if ((vg_ptr = vg[v]) != NULL) { - vg_counter++; - pv_counter += vg_ptr->pv_cur; - lv_counter += vg_ptr->lv_cur; - if (vg_ptr->lv_cur > 0) { - for (l = 0; l < vg[v]->lv_max; l++) { - if ((lv_ptr = vg_ptr->lv[l]) != NULL) { - pe_t_bytes += lv_ptr->lv_allocated_le; - hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size; - if (lv_ptr->lv_block_exception != NULL) - lv_block_exception_t_bytes += lv_ptr->lv_remap_end; - if (lv_ptr->lv_open > 0) { - lv_open_counter++; - lv_open_total += lv_ptr->lv_open; - } - } - } - } - } - } - pe_t_bytes *= sizeof(pe_t); - lv_block_exception_t_bytes *= sizeof(lv_block_exception_t); - - if (buf != NULL) { - P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__); - lock_kernel(); - vfree(buf); - unlock_kernel(); - buf = NULL; - } - /* 2 times: first to get size to allocate buffer, - 2nd to fill the malloced buffer */ - for (i = 0; i < 2; i++) { - sz = 0; - sz += sprintf(LVM_PROC_BUF, - "LVM " -#ifdef MODULE - "module" -#else - "driver" -#endif - " %s\n\n" - "Total: %d VG%s %d PV%s %d LV%s ", - lvm_short_version, - vg_counter, vg_counter == 1 ? "" : "s", - pv_counter, pv_counter == 1 ? "" : "s", - lv_counter, lv_counter == 1 ? "" : "s"); - sz += sprintf(LVM_PROC_BUF, - "(%d LV%s open", - lv_open_counter, - lv_open_counter == 1 ? "" : "s"); - if (lv_open_total > 0) - sz += sprintf(LVM_PROC_BUF, - " %d times)\n", - lv_open_total); - else - sz += sprintf(LVM_PROC_BUF, ")"); - sz += sprintf(LVM_PROC_BUF, - "\nGlobal: %lu bytes malloced IOP version: %d ", - vg_counter * sizeof(vg_t) + - pv_counter * sizeof(pv_t) + - lv_counter * sizeof(lv_t) + - pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last, - lvm_iop_version); - - seconds = CURRENT_TIME - loadtime; - if (seconds < 0) - loadtime = CURRENT_TIME + seconds; - if (seconds / 86400 > 0) { - sz += sprintf(LVM_PROC_BUF, "%d day%s ", - seconds / 86400, - seconds / 86400 == 0 || - seconds / 86400 > 1 ? "s" : ""); - } - sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n", - (seconds % 86400) / 3600, - (seconds % 3600) / 60, - seconds % 60); - - if (vg_counter > 0) { - for (v = 0; v < ABS_MAX_VG; v++) { - /* volume group */ - if ((vg_ptr = vg[v]) != NULL) { - sz += lvm_vg_info(vg_ptr, LVM_PROC_BUF); - - /* physical volumes */ - sz += sprintf(LVM_PROC_BUF, - "\n PV%s ", - vg_ptr->pv_cur == 1 ? ": " : "s:"); - c = 0; - for (p = 0; p < vg_ptr->pv_max; p++) { - if ((pv_ptr = vg_ptr->pv[p]) != NULL) { - sz += lvm_pv_info(pv_ptr, LVM_PROC_BUF); - - c++; - if (c < vg_ptr->pv_cur) - sz += sprintf(LVM_PROC_BUF, - "\n "); - } - } - - /* logical volumes */ - sz += sprintf(LVM_PROC_BUF, - "\n LV%s ", - vg_ptr->lv_cur == 1 ? ": " : "s:"); - c = 0; - for (l = 0; l < vg_ptr->lv_max; l++) { - if ((lv_ptr = vg_ptr->lv[l]) != NULL) { - sz += lvm_lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF); - c++; - if (c < vg_ptr->lv_cur) - sz += sprintf(LVM_PROC_BUF, - "\n "); - } - } - if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none"); - sz += sprintf(LVM_PROC_BUF, "\n"); - } - } - } - if (buf == NULL) { - lock_kernel(); - buf = vmalloc(sz); - unlock_kernel(); - if (buf == NULL) { - sz = 0; - MOD_DEC_USE_COUNT; - return sprintf(page, "%s - vmalloc error at line %d\n", - lvm_name, __LINE__); - } - } - sz_last = sz; + if (pe_lock_req.lock == LOCK_PE) { + down_read(&_pe_lock); + if (_should_defer(pv, sector, pe_size)) { + up_read(&_pe_lock); + down_write(&_pe_lock); + if (_should_defer(pv, sector, pe_size)) + _queue_io(bh, rw); + up_write(&_pe_lock); + return 1; } + up_read(&_pe_lock); } - MOD_DEC_USE_COUNT; - if (pos > sz - 1) { - lock_kernel(); - vfree(buf); - unlock_kernel(); - buf = NULL; - return 0; - } - *start = &buf[pos]; - if (sz - pos < count) - return sz - pos; - else - return count; -} /* lvm_proc_get_global_info() */ - - -/* - * provide VG information - */ -int lvm_proc_read_vg_info(char *page, char **start, off_t off, - int count, int *eof, void *data) { - int sz = 0; - vg_t *vg = data; - - sz += sprintf ( page+sz, "name: %s\n", vg->vg_name); - sz += sprintf ( page+sz, "size: %u\n", - vg->pe_total * vg->pe_size / 2); - sz += sprintf ( page+sz, "access: %u\n", vg->vg_access); - sz += sprintf ( page+sz, "status: %u\n", vg->vg_status); - sz += sprintf ( page+sz, "number: %u\n", vg->vg_number); - sz += sprintf ( page+sz, "LV max: %u\n", vg->lv_max); - sz += sprintf ( page+sz, "LV current: %u\n", vg->lv_cur); - sz += sprintf ( page+sz, "LV open: %u\n", vg->lv_open); - sz += sprintf ( page+sz, "PV max: %u\n", vg->pv_max); - sz += sprintf ( page+sz, "PV current: %u\n", vg->pv_cur); - sz += sprintf ( page+sz, "PV active: %u\n", vg->pv_act); - sz += sprintf ( page+sz, "PE size: %u\n", vg->pe_size / 2); - sz += sprintf ( page+sz, "PE total: %u\n", vg->pe_total); - sz += sprintf ( page+sz, "PE allocated: %u\n", vg->pe_allocated); - sz += sprintf ( page+sz, "uuid: %s\n", lvm_show_uuid(vg->vg_uuid)); - - return sz; -} - - -/* - * provide LV information - */ -int lvm_proc_read_lv_info(char *page, char **start, off_t off, - int count, int *eof, void *data) { - int sz = 0; - lv_t *lv = data; - - sz += sprintf ( page+sz, "name: %s\n", lv->lv_name); - sz += sprintf ( page+sz, "size: %u\n", lv->lv_size); - sz += sprintf ( page+sz, "access: %u\n", lv->lv_access); - sz += sprintf ( page+sz, "status: %u\n", lv->lv_status); - sz += sprintf ( page+sz, "number: %u\n", lv->lv_number); - sz += sprintf ( page+sz, "open: %u\n", lv->lv_open); - sz += sprintf ( page+sz, "allocation: %u\n", lv->lv_allocation); - sz += sprintf ( page+sz, "device: %02u:%02u\n", - MAJOR(lv->lv_dev), MINOR(lv->lv_dev)); - - return sz; -} - - -/* - * provide PV information - */ -int lvm_proc_read_pv_info(char *page, char **start, off_t off, - int count, int *eof, void *data) { - int sz = 0; - pv_t *pv = data; - - sz += sprintf ( page+sz, "name: %s\n", pv->pv_name); - sz += sprintf ( page+sz, "size: %u\n", pv->pv_size); - sz += sprintf ( page+sz, "status: %u\n", pv->pv_status); - sz += sprintf ( page+sz, "number: %u\n", pv->pv_number); - sz += sprintf ( page+sz, "allocatable: %u\n", pv->pv_allocatable); - sz += sprintf ( page+sz, "LV current: %u\n", pv->lv_cur); - sz += sprintf ( page+sz, "PE size: %u\n", pv->pe_size / 2); - sz += sprintf ( page+sz, "PE total: %u\n", pv->pe_total); - sz += sprintf ( page+sz, "PE allocated: %u\n", pv->pe_allocated); - sz += sprintf ( page+sz, "device: %02u:%02u\n", - MAJOR(pv->pv_dev), MINOR(pv->pv_dev)); - sz += sprintf ( page+sz, "uuid: %s\n", lvm_show_uuid(pv->pv_uuid)); - - - return sz; + return 0; } - -/* - * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c - * (see init_module/lvm_init) - */ static int lvm_map(struct buffer_head *bh, int rw) { int minor = MINOR(bh->b_rdev); - int ret = 0; ulong index; ulong pe_start; ulong size = bh->b_size >> 9; - ulong rsector_tmp = bh->b_rsector; - ulong rsector_sav; - kdev_t rdev_tmp = bh->b_rdev; - kdev_t rdev_sav; + ulong rsector_org = bh->b_rsector; + ulong rsector_map; + kdev_t rdev_map; vg_t *vg_this = vg[VG_BLK(minor)]; lv_t *lv = vg_this->lv[LV_BLK(minor)]; + down_read(&lv->lv_lock); if (!(lv->lv_status & LV_ACTIVE)) { printk(KERN_ALERT "%s - lvm_map: ll_rw_blk for inactive LV %s\n", lvm_name, lv->lv_name); - return -1; + goto bad; } if ((rw == WRITE || rw == WRITEA) && !(lv->lv_access & LV_WRITE)) { printk(KERN_CRIT - "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", + "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", lvm_name, lv->lv_name); - return -1; + goto bad; } - P_MAP("%s - lvm_map minor:%d *rdev: %02d:%02d *rsector: %lu " - "size:%lu\n", + P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n", lvm_name, minor, - MAJOR(rdev_tmp), - MINOR(rdev_tmp), - rsector_tmp, size); + kdevname(bh->b_rdev), + rsector_org, size); - if (rsector_tmp + size > lv->lv_size) { + if (rsector_org + size > lv->lv_size) { printk(KERN_ALERT "%s - lvm_map access beyond end of device; *rsector: " "%lu or size: %lu wrong for minor: %2d\n", - lvm_name, rsector_tmp, size, minor); - return -1; + lvm_name, rsector_org, size, minor); + goto bad; } - rsector_sav = rsector_tmp; - rdev_sav = rdev_tmp; -lvm_second_remap: - /* linear mapping */ - if (lv->lv_stripes < 2) { + + if (lv->lv_stripes < 2) { /* linear mapping */ /* get the index */ - index = rsector_tmp / vg_this->pe_size; + index = rsector_org / vg_this->pe_size; pe_start = lv->lv_current_pe[index].pe; - rsector_tmp = lv->lv_current_pe[index].pe + - (rsector_tmp % vg_this->pe_size); - rdev_tmp = lv->lv_current_pe[index].dev; - - P_MAP("lv_current_pe[%ld].pe: %ld rdev: %02d:%02d " - "rsector:%ld\n", - index, - lv->lv_current_pe[index].pe, - MAJOR(rdev_tmp), - MINOR(rdev_tmp), - rsector_tmp); - - /* striped mapping */ - } else { + rsector_map = lv->lv_current_pe[index].pe + + (rsector_org % vg_this->pe_size); + rdev_map = lv->lv_current_pe[index].dev; + + P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n", + index, lv->lv_current_pe[index].pe, + kdevname(rdev_map), rsector_map); + + } else { /* striped mapping */ ulong stripe_index; ulong stripe_length; stripe_length = vg_this->pe_size * lv->lv_stripes; - stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize; - index = rsector_tmp / stripe_length + - (stripe_index % lv->lv_stripes) * - (lv->lv_allocated_le / lv->lv_stripes); + stripe_index = (rsector_org % stripe_length) / + lv->lv_stripesize; + index = rsector_org / stripe_length + + (stripe_index % lv->lv_stripes) * + (lv->lv_allocated_le / lv->lv_stripes); pe_start = lv->lv_current_pe[index].pe; - rsector_tmp = lv->lv_current_pe[index].pe + - (rsector_tmp % stripe_length) - - (stripe_index % lv->lv_stripes) * lv->lv_stripesize - - stripe_index / lv->lv_stripes * - (lv->lv_stripes - 1) * lv->lv_stripesize; - rdev_tmp = lv->lv_current_pe[index].dev; - } - - P_MAP("lv_current_pe[%ld].pe: %ld rdev: %02d:%02d rsector:%ld\n" - "stripe_length: %ld stripe_index: %ld\n", - index, - lv->lv_current_pe[index].pe, - MAJOR(rdev_tmp), - MINOR(rdev_tmp), - rsector_tmp, - stripe_length, - stripe_index); - - /* handle physical extents on the move */ - if (pe_lock_req.lock == LOCK_PE) { - if (rdev_tmp == pe_lock_req.data.pv_dev && - rsector_tmp >= pe_lock_req.data.pv_offset && - rsector_tmp < (pe_lock_req.data.pv_offset + - vg_this->pe_size)) { - sleep_on(&lvm_map_wait); - rsector_tmp = rsector_sav; - rdev_tmp = rdev_sav; - goto lvm_second_remap; + rsector_map = lv->lv_current_pe[index].pe + + (rsector_org % stripe_length) - + (stripe_index % lv->lv_stripes) * lv->lv_stripesize - + stripe_index / lv->lv_stripes * + (lv->lv_stripes - 1) * lv->lv_stripesize; + rdev_map = lv->lv_current_pe[index].dev; + + P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n" + "stripe_length: %ld stripe_index: %ld\n", + index, lv->lv_current_pe[index].pe, kdevname(rdev_map), + rsector_map, stripe_length, stripe_index); + } + + /* + * Queue writes to physical extents on the move until move completes. + * Don't get _pe_lock until there is a reasonable expectation that + * we need to queue this request, because this is in the fast path. + */ + if (rw == WRITE || rw == WRITEA) { + if(_defer_extent(bh, rw, rdev_map, + rsector_map, vg_this->pe_size)) { + + up_read(&lv->lv_lock); + return 0; } - } - /* statistic */ - if (rw == WRITE || rw == WRITEA) - lv->lv_current_pe[index].writes++; - else - lv->lv_current_pe[index].reads++; + + lv->lv_current_pe[index].writes++; /* statistic */ + } else + lv->lv_current_pe[index].reads++; /* statistic */ /* snapshot volume exception handling on physical device address base */ - if (lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)) { - /* original logical volume */ - if (lv->lv_access & LV_SNAPSHOT_ORG) { - /* Serializes the access to the lv_snapshot_next list */ - down(&lv->lv_snapshot_sem); - if (rw == WRITE || rw == WRITEA) - { - lv_t *lv_ptr; - - /* start with first snapshot and loop thrugh all of them */ - for (lv_ptr = lv->lv_snapshot_next; - lv_ptr != NULL; - lv_ptr = lv_ptr->lv_snapshot_next) { - /* Check for inactive snapshot */ - if (!(lv_ptr->lv_status & LV_ACTIVE)) continue; - /* Serializes the COW with the accesses to the snapshot device */ - down(&lv_ptr->lv_snapshot_sem); - /* do we still have exception storage for this snapshot free? */ - if (lv_ptr->lv_block_exception != NULL) { - rdev_sav = rdev_tmp; - rsector_sav = rsector_tmp; - if (!lvm_snapshot_remap_block(&rdev_tmp, - &rsector_tmp, - pe_start, - lv_ptr)) { - /* create a new mapping */ - if (!(ret = lvm_snapshot_COW(rdev_tmp, - rsector_tmp, - pe_start, - rsector_sav, - lv_ptr))) - ret = lvm_write_COW_table_block(vg_this, - lv_ptr); - } - rdev_tmp = rdev_sav; - rsector_tmp = rsector_sav; - } - up(&lv_ptr->lv_snapshot_sem); - } - } - up(&lv->lv_snapshot_sem); - } else { - /* remap snapshot logical volume */ - down(&lv->lv_snapshot_sem); - if (lv->lv_block_exception != NULL) - lvm_snapshot_remap_block(&rdev_tmp, &rsector_tmp, pe_start, lv); - up(&lv->lv_snapshot_sem); + if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG))) + goto out; + + if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */ + if (lv->lv_block_exception) + lvm_snapshot_remap_block(&rdev_map, &rsector_map, + pe_start, lv); + else + goto bad; + + } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */ + lv_t *snap; + + /* start with first snapshot and loop through all of + them */ + for (snap = lv->lv_snapshot_next; snap; + snap = snap->lv_snapshot_next) { + /* Check for inactive snapshot */ + if (!(snap->lv_status & LV_ACTIVE)) + continue; + + /* Serializes the COW with the accesses to the + snapshot device */ + _remap_snapshot(rdev_map, rsector_map, + pe_start, snap, vg_this); } - } - bh->b_rdev = rdev_tmp; - bh->b_rsector = rsector_tmp; + } - return ret; + out: + bh->b_rdev = rdev_map; + bh->b_rsector = rsector_map; + up_read(&lv->lv_lock); + return 1; + + bad: + buffer_IO_error(bh); + up_read(&lv->lv_lock); + return -1; } /* lvm_map() */ @@ -1651,13 +1288,8 @@ void lvm_hd_name(char *buf, int minor) */ static int lvm_make_request_fn(request_queue_t *q, int rw, - struct buffer_head *bh) -{ - if (lvm_map(bh, rw) >= 0) - return 1; - - buffer_IO_error(bh); - return 0; + struct buffer_head *bh) { + return (lvm_map(bh, rw) <= 0) ? 0 : 1; } @@ -1674,8 +1306,7 @@ static int lvm_do_lock_lvm(void) lock_try_again: spin_lock(&lvm_lock); if (lock != 0 && lock != current->pid) { - P_IOCTL("lvm_do_lock_lvm: %s is locked by pid %d ...\n", - lvm_name, lock); + P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock); spin_unlock(&lvm_lock); interruptible_sleep_on(&lvm_wait); if (current->sigpending != 0) @@ -1687,6 +1318,7 @@ lock_try_again: goto lock_try_again; } lock = current->pid; + P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock); spin_unlock(&lvm_lock); return 0; } /* lvm_do_lock_lvm */ @@ -1697,33 +1329,60 @@ lock_try_again: */ static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) { + pe_lock_req_t new_lock; + struct buffer_head *bh; uint p; if (vg_ptr == NULL) return -ENXIO; - if (copy_from_user(&pe_lock_req, arg, - sizeof(pe_lock_req_t)) != 0) return -EFAULT; + if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0) + return -EFAULT; - switch (pe_lock_req.lock) { + switch (new_lock.lock) { case LOCK_PE: for (p = 0; p < vg_ptr->pv_max; p++) { if (vg_ptr->pv[p] != NULL && - pe_lock_req.data.pv_dev == - vg_ptr->pv[p]->pv_dev) + new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev) break; } if (p == vg_ptr->pv_max) return -ENXIO; - pe_lock_req.lock = UNLOCK_PE; + /* + * this sync releaves memory pressure to lessen the + * likelyhood of pvmove being paged out - resulting in + * deadlock. + * + * This method of doing a pvmove is broken + */ fsync_dev(pe_lock_req.data.lv_dev); + + down_write(&_pe_lock); + if (pe_lock_req.lock == LOCK_PE) { + up_write(&_pe_lock); + return -EBUSY; + } + + /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */ pe_lock_req.lock = LOCK_PE; + pe_lock_req.data.lv_dev = new_lock.data.lv_dev; + pe_lock_req.data.pv_dev = new_lock.data.pv_dev; + pe_lock_req.data.pv_offset = new_lock.data.pv_offset; + up_write(&_pe_lock); + + /* some requests may have got through since the fsync */ + fsync_dev(pe_lock_req.data.pv_dev); break; case UNLOCK_PE: + down_write(&_pe_lock); pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = \ - pe_lock_req.data.pv_dev = \ + pe_lock_req.data.lv_dev = 0; + pe_lock_req.data.pv_dev = 0; pe_lock_req.data.pv_offset = 0; - wake_up(&lvm_map_wait); + bh = _dequeue_io(); + up_write(&_pe_lock); + + /* handle all deferred io for this PE */ + _flush_io(bh); break; default: @@ -1760,6 +1419,8 @@ static int lvm_do_le_remap(vg_t *vg_ptr, void *arg) le_remap_req.new_dev; lv_ptr->lv_current_pe[le].pe = le_remap_req.new_pe; + + __update_hardsectsize(lv_ptr); return 0; } } @@ -1773,7 +1434,7 @@ static int lvm_do_le_remap(vg_t *vg_ptr, void *arg) /* * character device support function VGDA create */ -int lvm_do_vg_create(int minor, void *arg) +static int lvm_do_vg_create(void *arg, int minor) { int ret = 0; ulong l, ls = 0, p, size; @@ -1781,8 +1442,6 @@ int lvm_do_vg_create(int minor, void *arg) vg_t *vg_ptr; lv_t **snap_lv_ptr; - if (vg[VG_CHR(minor)] != NULL) return -EPERM; - if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) { printk(KERN_CRIT "%s -- VG_CREATE: kmalloc error VG at line %d\n", @@ -1791,35 +1450,51 @@ int lvm_do_vg_create(int minor, void *arg) } /* get the volume group structure */ if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) { + P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n", + arg, sizeof(vg_t)); kfree(vg_ptr); return -EFAULT; } + /* VG_CREATE now uses minor number in VG structure */ + if (minor == -1) minor = vg_ptr->vg_number; + + /* Validate it */ + if (vg[VG_CHR(minor)] != NULL) { + P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor); + kfree(vg_ptr); + return -EPERM; + } + /* we are not that active so far... */ vg_ptr->vg_status &= ~VG_ACTIVE; - vg[VG_CHR(minor)] = vg_ptr; - vg[VG_CHR(minor)]->pe_allocated = 0; + vg_ptr->pe_allocated = 0; if (vg_ptr->pv_max > ABS_MAX_PV) { printk(KERN_WARNING "%s -- Can't activate VG: ABS_MAX_PV too small\n", lvm_name); kfree(vg_ptr); - vg[VG_CHR(minor)] = NULL; return -EPERM; } + if (vg_ptr->lv_max > ABS_MAX_LV) { printk(KERN_WARNING "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n", lvm_name, vg_ptr->lv_max); kfree(vg_ptr); - vg_ptr = NULL; return -EPERM; } + /* create devfs and procfs entries */ + lvm_fs_create_vg(vg_ptr); + + vg[VG_CHR(minor)] = vg_ptr; + /* get the physical volume structures */ vg_ptr->pv_act = vg_ptr->pv_cur = 0; for (p = 0; p < vg_ptr->pv_max; p++) { + pv_t *pvp; /* user space address */ if ((pvp = vg_ptr->pv[p]) != NULL) { ret = lvm_do_pv_create(pvp, vg_ptr, p); @@ -1843,9 +1518,12 @@ int lvm_do_vg_create(int minor, void *arg) /* get the logical volume structures */ vg_ptr->lv_cur = 0; for (l = 0; l < vg_ptr->lv_max; l++) { + lv_t *lvp; /* user space address */ if ((lvp = vg_ptr->lv[l]) != NULL) { if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { + P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n", + lvp, sizeof(lv_t)); lvm_do_vg_remove(minor); return -EFAULT; } @@ -1864,12 +1542,10 @@ int lvm_do_vg_create(int minor, void *arg) } } - lvm_do_create_devfs_entry_of_vg ( vg_ptr); - /* Second path to correct snapshot logical volumes which are not in place during first path above */ for (l = 0; l < ls; l++) { - lvp = snap_lv_ptr[l]; + lv_t *lvp = snap_lv_ptr[l]; if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { lvm_do_vg_remove(minor); return -EFAULT; @@ -1880,8 +1556,6 @@ int lvm_do_vg_create(int minor, void *arg) } } - lvm_do_create_proc_entry_of_vg ( vg_ptr); - vfree(snap_lv_ptr); vg_count++; @@ -1913,7 +1587,6 @@ static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg) if ( ret != 0) return ret; pv_ptr = vg_ptr->pv[p]; vg_ptr->pe_total += pv_ptr->pe_total; - lvm_do_create_proc_entry_of_pv(vg_ptr, pv_ptr); return 0; } } @@ -1963,10 +1636,12 @@ static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg) lv_t *lv_ptr = NULL; pv_t *pv_ptr = NULL; + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0) return -EFAULT; - lvm_do_remove_proc_entry_of_vg ( vg_ptr); + lvm_fs_remove_vg(vg_ptr); strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1); for ( l = 0; l < vg_ptr->lv_max; l++) @@ -1988,7 +1663,7 @@ static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg) strncpy(pv_ptr->vg_name, vg_name, NAME_LEN); } - lvm_do_create_proc_entry_of_vg ( vg_ptr); + lvm_fs_create_vg(vg_ptr); return 0; } /* lvm_do_vg_rename */ @@ -2015,6 +1690,9 @@ static int lvm_do_vg_remove(int minor) /* let's go inactive */ vg_ptr->vg_status &= ~VG_ACTIVE; + /* remove from procfs and devfs */ + lvm_fs_remove_vg(vg_ptr); + /* free LVs */ /* first free snapshot logical volumes */ for (i = 0; i < vg_ptr->lv_max; i++) { @@ -2042,11 +1720,6 @@ static int lvm_do_vg_remove(int minor) } } - devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]); - devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]); - - lvm_do_remove_proc_entry_of_vg ( vg_ptr); - P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); kfree(vg_ptr); vg[VG_CHR(minor)] = NULL; @@ -2063,66 +1736,112 @@ static int lvm_do_vg_remove(int minor) * character device support function physical volume create */ static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) { - pv_t *pv_ptr = NULL; + pv_t *pv; + int err; - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); - if (pv_ptr == NULL) { + pv = kmalloc(sizeof(pv_t),GFP_KERNEL); + if (pv == NULL) { printk(KERN_CRIT - "%s -- VG_CREATE: kmalloc error PV at line %d\n", + "%s -- PV_CREATE: kmalloc error PV at line %d\n", lvm_name, __LINE__); return -ENOMEM; } - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { + + memset(pv, 0, sizeof(*pv)); + + if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) { + P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n", + pvp, sizeof(pv_t)); + kfree(pv); return -EFAULT; } + + if ((err = _open_pv(pv))) { + kfree(pv); + return err; + } + /* We don't need the PE list in kernel space as with LVs pe_t list (see below) */ - pv_ptr->pe = NULL; - pv_ptr->pe_allocated = 0; - pv_ptr->pv_status = PV_ACTIVE; + pv->pe = NULL; + pv->pe_allocated = 0; + pv->pv_status = PV_ACTIVE; vg_ptr->pv_act++; vg_ptr->pv_cur++; + lvm_fs_create_pv(vg_ptr, pv); + vg_ptr->pv[p] = pv; return 0; } /* lvm_do_pv_create() */ /* - * character device support function physical volume create + * character device support function physical volume remove */ static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) { - pv_t *pv_ptr = vg_ptr->pv[p]; + pv_t *pv = vg_ptr->pv[p]; + + lvm_fs_remove_pv(vg_ptr, pv); - lvm_do_remove_proc_entry_of_pv ( vg_ptr, pv_ptr); - vg_ptr->pe_total -= pv_ptr->pe_total; + vg_ptr->pe_total -= pv->pe_total; vg_ptr->pv_cur--; vg_ptr->pv_act--; -#ifdef LVM_GET_INODE - lvm_clear_inode(pv_ptr->inode); -#endif - kfree(pv_ptr); + + _close_pv(pv); + kfree(pv); + vg_ptr->pv[p] = NULL; return 0; } +static void __update_hardsectsize(lv_t *lv) { + int le, e; + int max_hardsectsize = 0, hardsectsize; + + for (le = 0; le < lv->lv_allocated_le; le++) { + hardsectsize = get_hardsect_size(lv->lv_current_pe[le].dev); + if (hardsectsize == 0) + hardsectsize = 512; + if (hardsectsize > max_hardsectsize) + max_hardsectsize = hardsectsize; + } + + /* only perform this operation on active snapshots */ + if ((lv->lv_access & LV_SNAPSHOT) && + (lv->lv_status & LV_ACTIVE)) { + for (e = 0; e < lv->lv_remap_end; e++) { + hardsectsize = get_hardsect_size( lv->lv_block_exception[e].rdev_new); + if (hardsectsize == 0) + hardsectsize = 512; + if (hardsectsize > max_hardsectsize) + max_hardsectsize = hardsectsize; + } + } + + lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize; +} + /* * character device support function logical volume create */ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) { - int e, ret, l, le, l_new, p, size; + int e, ret, l, le, l_new, p, size, activate = 1; ulong lv_status_save; lv_block_exception_t *lvbe = lv->lv_block_exception; vg_t *vg_ptr = vg[VG_CHR(minor)]; lv_t *lv_ptr = NULL; + pe_t *pep; - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL; - if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK) + if (!(pep = lv->lv_current_pe)) return -EINVAL; - for (l = 0; l < vg_ptr->lv_max; l++) { + if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK) + return -EINVAL; + + for (l = 0; l < vg_ptr->lv_cur; l++) { if (vg_ptr->lv[l] != NULL && strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) return -EEXIST; @@ -2151,23 +1870,26 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) lv_status_save = lv_ptr->lv_status; lv_ptr->lv_status &= ~LV_ACTIVE; - lv_ptr->lv_snapshot_org = \ - lv_ptr->lv_snapshot_prev = \ + lv_ptr->lv_snapshot_org = NULL; + lv_ptr->lv_snapshot_prev = NULL; lv_ptr->lv_snapshot_next = NULL; lv_ptr->lv_block_exception = NULL; lv_ptr->lv_iobuf = NULL; + lv_ptr->lv_COW_table_iobuf = NULL; lv_ptr->lv_snapshot_hash_table = NULL; lv_ptr->lv_snapshot_hash_table_size = 0; lv_ptr->lv_snapshot_hash_mask = 0; - lv_ptr->lv_COW_table_page = NULL; - init_MUTEX(&lv_ptr->lv_snapshot_sem); + init_rwsem(&lv_ptr->lv_lock); + lv_ptr->lv_snapshot_use_rate = 0; + vg_ptr->lv[l] = lv_ptr; /* get the PE structures from user space if this - is no snapshot logical volume */ + is not a snapshot logical volume */ if (!(lv_ptr->lv_access & LV_SNAPSHOT)) { size = lv_ptr->lv_allocated_le * sizeof(pe_t); + if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) { printk(KERN_CRIT "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte " @@ -2179,6 +1901,8 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) return -ENOMEM; } if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) { + P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n", + pep, sizeof(size)); vfree(lv_ptr->lv_current_pe); kfree(lv_ptr); vg_ptr->lv[l] = NULL; @@ -2200,6 +1924,15 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)]; if (lv_ptr->lv_snapshot_org != NULL) { size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t); + + if(!size) { + printk(KERN_WARNING + "%s -- zero length exception table requested\n", + lvm_name); + kfree(lv_ptr); + return -EINVAL; + } + if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) { printk(KERN_CRIT "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION " @@ -2217,6 +1950,17 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) vg_ptr->lv[l] = NULL; return -EFAULT; } + + if(lv_ptr->lv_block_exception[0].rsector_org == + LVM_SNAPSHOT_DROPPED_SECTOR) + { + printk(KERN_WARNING + "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n", + lvm_name); + activate = 0; + } + + /* point to the original logical volume */ lv_ptr = lv_ptr->lv_snapshot_org; @@ -2250,10 +1994,13 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); /* need to fill the COW exception table data into the page for disk i/o */ - lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr); + if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) { + kfree(lv_ptr); + vg_ptr->lv[l] = NULL; + return -EINVAL; + } init_waitqueue_head(&lv_ptr->lv_snapshot_wait); } else { - vfree(lv_ptr->lv_block_exception); kfree(lv_ptr); vg_ptr->lv[l] = NULL; return -EFAULT; @@ -2275,21 +2022,7 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) vg_ptr->lv_cur++; lv_ptr->lv_status = lv_status_save; - { - char *lv_tmp, *lv_buf = lv->lv_name; - - strtok(lv->lv_name, "/"); /* /dev */ - while((lv_tmp = strtok(NULL, "/")) != NULL) - lv_buf = lv_tmp; - - lv_devfs_handle[lv->lv_number] = devfs_register( - vg_devfs_handle[vg_ptr->vg_number], lv_buf, - DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, lv->lv_number, - S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_blk_dops, NULL); - } - - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); + __update_hardsectsize(lv_ptr); /* optionally add our new snapshot LV */ if (lv_ptr->lv_access & LV_SNAPSHOT) { @@ -2302,7 +2035,7 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) fsync_dev_lockfs(org->lv_dev); #endif - down(&org->lv_snapshot_sem); + down_write(&org->lv_lock); org->lv_access |= LV_SNAPSHOT_ORG; lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */ @@ -2310,11 +2043,15 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next); lv_ptr->lv_snapshot_prev = last; last->lv_snapshot_next = lv_ptr; - up(&org->lv_snapshot_sem); + up_write(&org->lv_lock); } /* activate the logical volume */ - lv_ptr->lv_status |= LV_ACTIVE; + if(activate) + lv_ptr->lv_status |= LV_ACTIVE; + else + lv_ptr->lv_status &= ~LV_ACTIVE; + if ( lv_ptr->lv_access & LV_WRITE) set_device_ro(lv_ptr->lv_dev, 0); else @@ -2322,13 +2059,15 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) #ifdef LVM_VFS_ENHANCEMENT /* VFS function call to unlock the filesystem */ - if (lv_ptr->lv_access & LV_SNAPSHOT) { + if (lv_ptr->lv_access & LV_SNAPSHOT) unlockfs(lv_ptr->lv_snapshot_org->lv_dev); - } #endif lv_ptr->vg = vg_ptr; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = + lvm_fs_create_lv(vg_ptr, lv_ptr); + return 0; } /* lvm_do_lv_create() */ @@ -2366,13 +2105,15 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) lv_ptr->lv_snapshot_next != NULL) return -EPERM; + lvm_fs_remove_lv(vg_ptr, lv_ptr); + if (lv_ptr->lv_access & LV_SNAPSHOT) { /* * Atomically make the the snapshot invisible * to the original lv before playing with it. */ lv_t * org = lv_ptr->lv_snapshot_org; - down(&org->lv_snapshot_sem); + down_write(&org->lv_lock); /* remove this snapshot logical volume from the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; @@ -2380,11 +2121,13 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) lv_ptr->lv_snapshot_next->lv_snapshot_prev = lv_ptr->lv_snapshot_prev; } - up(&org->lv_snapshot_sem); /* no more snapshots? */ - if (!org->lv_snapshot_next) + if (!org->lv_snapshot_next) { org->lv_access &= ~LV_SNAPSHOT_ORG; + } + up_write(&org->lv_lock); + lvm_snapshot_release(lv_ptr); /* Update the VG PE(s) used by snapshot reserve space. */ @@ -2404,6 +2147,7 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) /* reset generic hd */ lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1; lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0; lvm_size[MINOR(lv_ptr->lv_dev)] = 0; /* reset VG/LV mapping */ @@ -2427,10 +2171,6 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) vfree(lv_ptr->lv_current_pe); } - devfs_unregister(lv_devfs_handle[lv_ptr->lv_number]); - - lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); - P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); kfree(lv_ptr); vg_ptr->lv[l] = NULL; @@ -2440,205 +2180,217 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) /* - * character device support function logical volume extend / reduce + * logical volume extend / reduce */ -static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) -{ - ulong end, l, le, p, size, old_allocated_le; - vg_t *vg_ptr = vg[VG_CHR(minor)]; - lv_t *lv_ptr; - pe_t *pe; - - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL; - - for (l = 0; l < vg_ptr->lv_max; l++) { - if (vg_ptr->lv[l] != NULL && - strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) - break; - } - if (l == vg_ptr->lv_max) return -ENXIO; - lv_ptr = vg_ptr->lv[l]; - - /* check for active snapshot */ - if (lv->lv_access & LV_SNAPSHOT) - { - ulong e; - lv_block_exception_t *lvbe, *lvbe_old; - struct list_head * lvs_hash_table_old; - - if (lv->lv_block_exception == NULL) return -ENXIO; - size = lv->lv_remap_end * sizeof ( lv_block_exception_t); - if ((lvbe = vmalloc(size)) == NULL) - { - printk(KERN_CRIT - "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_BLOCK_EXCEPTION " - "of %lu Byte at line %d\n", - lvm_name, size, __LINE__); - return -ENOMEM; - } - if (lv->lv_remap_end > lv_ptr->lv_remap_end) - { - if (copy_from_user(lvbe, lv->lv_block_exception, size)) - { - vfree(lvbe); - return -EFAULT; - } - } - - lvbe_old = lv_ptr->lv_block_exception; - lvs_hash_table_old = lv_ptr->lv_snapshot_hash_table; - - /* we need to play on the safe side here... */ - down(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - if (lv_ptr->lv_block_exception == NULL || - lv_ptr->lv_remap_ptr > lv_ptr->lv_remap_end) - { - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - vfree(lvbe); - return -EPERM; - } - memcpy(lvbe, - lv_ptr->lv_block_exception, - (lv->lv_remap_end > lv_ptr->lv_remap_end ? - lv_ptr->lv_remap_ptr : lv->lv_remap_end) * sizeof(lv_block_exception_t)); - - lv_ptr->lv_block_exception = lvbe; - lv_ptr->lv_remap_end = lv->lv_remap_end; - if (lvm_snapshot_alloc_hash_table(lv_ptr) != 0) - { - lvm_drop_snapshot(lv_ptr, "no memory for hash table"); - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - vfree(lvbe_old); - vfree(lvs_hash_table_old); - return -ENOMEM; - } - - for (e = 0; e < lv_ptr->lv_remap_ptr; e++) - lvm_hash_link (lv_ptr->lv_block_exception + e, - lv_ptr->lv_block_exception[e].rdev_org, - lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); - - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - - vfree(lvbe_old); - vfree(lvs_hash_table_old); +static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) { + ulong size; + lv_block_exception_t *lvbe; + + if (!new_lv->lv_block_exception) + return -ENXIO; + + size = new_lv->lv_remap_end * sizeof(lv_block_exception_t); + if ((lvbe = vmalloc(size)) == NULL) { + printk(KERN_CRIT + "%s -- lvm_do_lv_extend_reduce: vmalloc " + "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n", + lvm_name, size, __LINE__); + return -ENOMEM; + } - return 0; - } + if ((new_lv->lv_remap_end > old_lv->lv_remap_end) && + (copy_from_user(lvbe, new_lv->lv_block_exception, size))) { + vfree(lvbe); + return -EFAULT; + } + new_lv->lv_block_exception = lvbe; + if (lvm_snapshot_alloc_hash_table(new_lv)) { + vfree(new_lv->lv_block_exception); + return -ENOMEM; + } - /* we drop in here in case it is an original logical volume */ - if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) { - printk(KERN_CRIT - "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE " - "of %lu Byte at line %d\n", - lvm_name, size, __LINE__); - return -ENOMEM; - } - /* get the PE structures from user space */ - if (copy_from_user(pe, pep, size)) { - vfree(pe); - return -EFAULT; - } + return 0; +} - /* reduce allocation counters on PV(s) */ - for (le = 0; le < lv_ptr->lv_allocated_le; le++) { - vg_ptr->pe_allocated--; - for (p = 0; p < vg_ptr->pv_cur; p++) { - if (vg_ptr->pv[p]->pv_dev == - lv_ptr->lv_current_pe[le].dev) { - vg_ptr->pv[p]->pe_allocated--; - break; - } - } - } +static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) { + ulong size, l, p, end; + pe_t *pe; + + /* allocate space for new pe structures */ + size = new_lv->lv_current_le * sizeof(pe_t); + if ((pe = vmalloc(size)) == NULL) { + printk(KERN_CRIT + "%s -- lvm_do_lv_extend_reduce: " + "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n", + lvm_name, size, __LINE__); + return -ENOMEM; + } + /* get the PE structures from user space */ + if (copy_from_user(pe, new_lv->lv_current_pe, size)) { + if(old_lv->lv_access & LV_SNAPSHOT) + vfree(new_lv->lv_snapshot_hash_table); + vfree(pe); + return -EFAULT; + } - /* save pointer to "old" lv/pe pointer array */ - pep1 = lv_ptr->lv_current_pe; - end = lv_ptr->lv_current_le; + new_lv->lv_current_pe = pe; - /* save open counter... */ - lv->lv_open = lv_ptr->lv_open; - lv->lv_snapshot_prev = lv_ptr->lv_snapshot_prev; - lv->lv_snapshot_next = lv_ptr->lv_snapshot_next; - lv->lv_snapshot_org = lv_ptr->lv_snapshot_org; + /* reduce allocation counters on PV(s) */ + for (l = 0; l < old_lv->lv_allocated_le; l++) { + vg_ptr->pe_allocated--; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + old_lv->lv_current_pe[l].dev) { + vg_ptr->pv[p]->pe_allocated--; + break; + } + } + } - lv->lv_current_pe = pe; + /* extend the PE count in PVs */ + for (l = 0; l < new_lv->lv_allocated_le; l++) { + vg_ptr->pe_allocated++; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + new_lv->lv_current_pe[l].dev) { + vg_ptr->pv[p]->pe_allocated++; + break; + } + } + } - /* save # of old allocated logical extents */ - old_allocated_le = lv_ptr->lv_allocated_le; + /* save availiable i/o statistic data */ + if (old_lv->lv_stripes < 2) { /* linear logical volume */ + end = min(old_lv->lv_current_le, new_lv->lv_current_le); + for (l = 0; l < end; l++) { + new_lv->lv_current_pe[l].reads += + old_lv->lv_current_pe[l].reads; - /* copy preloaded LV */ - memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); + new_lv->lv_current_pe[l].writes += + old_lv->lv_current_pe[l].writes; + } - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; - /* vg_lv_map array doesn't have to be changed here */ + } else { /* striped logical volume */ + uint i, j, source, dest, end, old_stripe_size, new_stripe_size; - LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); + old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes; + new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes; + end = min(old_stripe_size, new_stripe_size); - /* save availiable i/o statistic data */ - /* linear logical volume */ - if (lv_ptr->lv_stripes < 2) { - /* Check what last LE shall be used */ - if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le; - for (le = 0; le < end; le++) { - lv_ptr->lv_current_pe[le].reads += pep1[le].reads; - lv_ptr->lv_current_pe[le].writes += pep1[le].writes; - } - /* striped logical volume */ - } else { - uint i, j, source, dest, end, old_stripe_size, new_stripe_size; - - old_stripe_size = old_allocated_le / lv_ptr->lv_stripes; - new_stripe_size = lv_ptr->lv_allocated_le / lv_ptr->lv_stripes; - end = old_stripe_size; - if (end > new_stripe_size) end = new_stripe_size; - for (i = source = dest = 0; - i < lv_ptr->lv_stripes; i++) { - for (j = 0; j < end; j++) { - lv_ptr->lv_current_pe[dest + j].reads += - pep1[source + j].reads; - lv_ptr->lv_current_pe[dest + j].writes += - pep1[source + j].writes; - } - source += old_stripe_size; - dest += new_stripe_size; - } - } + for (i = source = dest = 0; + i < new_lv->lv_stripes; i++) { + for (j = 0; j < end; j++) { + new_lv->lv_current_pe[dest + j].reads += + old_lv->lv_current_pe[source + j].reads; + new_lv->lv_current_pe[dest + j].writes += + old_lv->lv_current_pe[source + j].writes; + } + source += old_stripe_size; + dest += new_stripe_size; + } + } - /* extend the PE count in PVs */ - for (le = 0; le < lv_ptr->lv_allocated_le; le++) { - vg_ptr->pe_allocated++; - for (p = 0; p < vg_ptr->pv_cur; p++) { - if (vg_ptr->pv[p]->pv_dev == - lv_ptr->lv_current_pe[le].dev) { - vg_ptr->pv[p]->pe_allocated++; - break; - } - } - } + return 0; +} - vfree ( pep1); - pep1 = NULL; +static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv) +{ + int r; + ulong l, e, size; + vg_t *vg_ptr = vg[VG_CHR(minor)]; + lv_t *old_lv; + pe_t *pe; + + if ((pe = new_lv->lv_current_pe) == NULL) + return -EINVAL; + + for (l = 0; l < vg_ptr->lv_max; l++) + if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name)) + break; + + if (l == vg_ptr->lv_max) + return -ENXIO; + + old_lv = vg_ptr->lv[l]; + + if (old_lv->lv_access & LV_SNAPSHOT) { + /* only perform this operation on active snapshots */ + if (old_lv->lv_status & LV_ACTIVE) + r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv); + else + r = -EPERM; + + } else + r = __extend_reduce(vg_ptr, old_lv, new_lv); + + if(r) + return r; + + /* copy relevent fields */ + down_write(&old_lv->lv_lock); + + if(new_lv->lv_access & LV_SNAPSHOT) { + size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ? + old_lv->lv_remap_ptr : new_lv->lv_remap_end; + size *= sizeof(lv_block_exception_t); + memcpy(new_lv->lv_block_exception, + old_lv->lv_block_exception, size); + + old_lv->lv_remap_end = new_lv->lv_remap_end; + old_lv->lv_block_exception = new_lv->lv_block_exception; + old_lv->lv_snapshot_hash_table = + new_lv->lv_snapshot_hash_table; + old_lv->lv_snapshot_hash_table_size = + new_lv->lv_snapshot_hash_table_size; + old_lv->lv_snapshot_hash_mask = + new_lv->lv_snapshot_hash_mask; + + for (e = 0; e < new_lv->lv_remap_ptr; e++) + lvm_hash_link(new_lv->lv_block_exception + e, + new_lv->lv_block_exception[e].rdev_org, + new_lv->lv_block_exception[e].rsector_org, + new_lv); + + } else { + + vfree(old_lv->lv_current_pe); + vfree(old_lv->lv_snapshot_hash_table); + + old_lv->lv_size = new_lv->lv_size; + old_lv->lv_allocated_le = new_lv->lv_allocated_le; + old_lv->lv_current_le = new_lv->lv_current_le; + old_lv->lv_current_pe = new_lv->lv_current_pe; + lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects = + old_lv->lv_size; + lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1; + + if (old_lv->lv_access & LV_SNAPSHOT_ORG) { + lv_t *snap; + for(snap = old_lv->lv_snapshot_next; snap; + snap = snap->lv_snapshot_next) { + down_write(&snap->lv_lock); + snap->lv_current_pe = old_lv->lv_current_pe; + snap->lv_allocated_le = + old_lv->lv_allocated_le; + snap->lv_current_le = old_lv->lv_current_le; + snap->lv_size = old_lv->lv_size; + + lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects + = old_lv->lv_size; + lvm_size[MINOR(snap->lv_dev)] = + old_lv->lv_size >> 1; + __update_hardsectsize(snap); + up_write(&snap->lv_lock); + } + } + } - if (lv->lv_access & LV_SNAPSHOT_ORG) - { - /* Correct the snapshot size information */ - while ((lv_ptr = lv_ptr->lv_snapshot_next) != NULL) - { - lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe; - lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le; - lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le; - lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; - } - } + __update_hardsectsize(old_lv); + up_write(&old_lv->lv_lock); - return 0; + return 0; } /* lvm_do_lv_extend_reduce() */ @@ -2648,10 +2400,10 @@ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg) { uint l; - ulong size; - lv_t lv; - lv_t *lv_ptr; lv_status_byname_req_t lv_status_byname_req; + void *saved_ptr1; + void *saved_ptr2; + lv_t *lv_ptr; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_status_byname_req, arg, @@ -2659,28 +2411,31 @@ static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg) return -EFAULT; if (lv_status_byname_req.lv == NULL) return -EINVAL; - if (copy_from_user(&lv, lv_status_byname_req.lv, - sizeof(lv_t)) != 0) - return -EFAULT; for (l = 0; l < vg_ptr->lv_max; l++) { - lv_ptr = vg_ptr->lv[l]; - if (lv_ptr != NULL && + if ((lv_ptr = vg_ptr->lv[l]) != NULL && strcmp(lv_ptr->lv_name, - lv_status_byname_req.lv_name) == 0) { - if (copy_to_user(lv_status_byname_req.lv, + lv_status_byname_req.lv_name) == 0) { + /* Save usermode pointers */ + if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0) + return -EFAULT; + if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0) + return -EFAULT; + if (copy_to_user(lv_status_byname_req.lv, lv_ptr, sizeof(lv_t)) != 0) return -EFAULT; - if (lv.lv_current_pe != NULL) { - size = lv_ptr->lv_allocated_le * - sizeof(pe_t); - if (copy_to_user(lv.lv_current_pe, + if (saved_ptr1 != NULL) { + if (copy_to_user(saved_ptr1, lv_ptr->lv_current_pe, - size) != 0) + lv_ptr->lv_allocated_le * + sizeof(pe_t)) != 0) return -EFAULT; } + /* Restore usermode pointers */ + if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0) + return -EFAULT; return 0; } } @@ -2693,34 +2448,44 @@ static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg) */ static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg) { - ulong size; - lv_t lv; - lv_t *lv_ptr; lv_status_byindex_req_t lv_status_byindex_req; + void *saved_ptr1; + void *saved_ptr2; + lv_t *lv_ptr; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_status_byindex_req, arg, sizeof(lv_status_byindex_req)) != 0) return -EFAULT; - if ((lvp = lv_status_byindex_req.lv) == NULL) + if (lv_status_byindex_req.lv == NULL) + return -EINVAL; + if (lv_status_byindex_req.lv_index <0 || + lv_status_byindex_req.lv_index >= MAX_LV) return -EINVAL; if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL) return -ENXIO; - if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) - return -EFAULT; + /* Save usermode pointers */ + if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0) + return -EFAULT; + if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0) + return -EFAULT; - if (copy_to_user(lvp, lv_ptr, sizeof(lv_t)) != 0) + if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0) return -EFAULT; - - if (lv.lv_current_pe != NULL) { - size = lv_ptr->lv_allocated_le * sizeof(pe_t); - if (copy_to_user(lv.lv_current_pe, - lv_ptr->lv_current_pe, - size) != 0) + if (saved_ptr1 != NULL) { + if (copy_to_user(saved_ptr1, + lv_ptr->lv_current_pe, + lv_ptr->lv_allocated_le * + sizeof(pe_t)) != 0) return -EFAULT; } + + /* Restore usermode pointers */ + if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0) + return -EFAULT; + return 0; } /* lvm_do_lv_status_byindex() */ @@ -2731,6 +2496,9 @@ static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg) static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) { int l; lv_status_bydev_req_t lv_status_bydev_req; + void *saved_ptr1; + void *saved_ptr2; + lv_t *lv_ptr; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_status_bydev_req, arg, @@ -2743,10 +2511,26 @@ static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) { } if ( l == vg_ptr->lv_max) return -ENXIO; + lv_ptr = vg_ptr->lv[l]; + + /* Save usermode pointers */ + if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0) + return -EFAULT; + if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0) + return -EFAULT; - if (copy_to_user(lv_status_bydev_req.lv, - vg_ptr->lv[l], sizeof(lv_t)) != 0) + if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0) return -EFAULT; + if (saved_ptr1 != NULL) { + if (copy_to_user(saved_ptr1, + lv_ptr->lv_current_pe, + lv_ptr->lv_allocated_le * + sizeof(pe_t)) != 0) + return -EFAULT; + } + /* Restore usermode pointers */ + if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0) + return -EFAULT; return 0; } /* lvm_do_lv_status_bydev() */ @@ -2766,11 +2550,11 @@ static int lvm_do_lv_rename(vg_t *vg_ptr, lv_req_t *lv_req, lv_t *lv) if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue; if (lv_ptr->lv_dev == lv->lv_dev) { - lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); + lvm_fs_remove_lv(vg_ptr, lv_ptr); strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN); - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); + lvm_fs_create_lv(vg_ptr, lv_ptr); break; } } @@ -2787,9 +2571,7 @@ static int lvm_do_pv_change(vg_t *vg_ptr, void *arg) { uint p; pv_t *pv_ptr; -#ifdef LVM_GET_INODE - struct inode *inode_sav; -#endif + struct block_device *bd; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&pv_change_req, arg, @@ -2801,20 +2583,17 @@ static int lvm_do_pv_change(vg_t *vg_ptr, void *arg) if (pv_ptr != NULL && strcmp(pv_ptr->pv_name, pv_change_req.pv_name) == 0) { -#ifdef LVM_GET_INODE - inode_sav = pv_ptr->inode; -#endif + + bd = pv_ptr->bd; if (copy_from_user(pv_ptr, pv_change_req.pv, sizeof(pv_t)) != 0) return -EFAULT; + pv_ptr->bd = bd; /* We don't need the PE list in kernel space as with LVs pe_t list */ pv_ptr->pe = NULL; -#ifdef LVM_GET_INODE - pv_ptr->inode = inode_sav; -#endif return 0; } } @@ -2849,161 +2628,27 @@ static int lvm_do_pv_status(vg_t *vg_ptr, void *arg) return -ENXIO; } /* lvm_do_pv_status() */ - - -/* - * create a devfs entry for a volume group - */ -void lvm_do_create_devfs_entry_of_vg ( vg_t *vg_ptr) { - vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL); - ch_devfs_handle[vg_ptr->vg_number] = devfs_register( - vg_devfs_handle[vg_ptr->vg_number] , "group", - DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_chr_fops, NULL); -} - - -/* - * create a /proc entry for a logical volume - */ -void lvm_do_create_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { - char *basename; - - if ( vg_ptr->lv_subdir_pde != NULL) { - basename = strrchr(lv_ptr->lv_name, '/'); - if (basename == NULL) basename = lv_ptr->lv_name; - else basename++; - pde = create_proc_entry(basename, S_IFREG, - vg_ptr->lv_subdir_pde); - if ( pde != NULL) { - pde->read_proc = lvm_proc_read_lv_info; - pde->data = lv_ptr; - } - } -} - - /* - * remove a /proc entry for a logical volume + * character device support function flush and invalidate all buffers of a PV */ -void lvm_do_remove_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { - char *basename; - - if ( vg_ptr->lv_subdir_pde != NULL) { - basename = strrchr(lv_ptr->lv_name, '/'); - if (basename == NULL) basename = lv_ptr->lv_name; - else basename++; - remove_proc_entry(basename, vg_ptr->lv_subdir_pde); - } -} - +static int lvm_do_pv_flush(void *arg) +{ + pv_flush_req_t pv_flush_req; -/* - * create a /proc entry for a physical volume - */ -void lvm_do_create_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { - int offset = 0; - char *basename; - char buffer[NAME_LEN]; - - basename = pv_ptr->pv_name; - if (strncmp(basename, "/dev/", 5) == 0) offset = 5; - strncpy(buffer, basename + offset, sizeof(buffer)); - basename = buffer; - while ( ( basename = strchr ( basename, '/')) != NULL) *basename = '_'; - pde = create_proc_entry(buffer, S_IFREG, vg_ptr->pv_subdir_pde); - if ( pde != NULL) { - pde->read_proc = lvm_proc_read_pv_info; - pde->data = pv_ptr; - } -} + if (copy_from_user(&pv_flush_req, arg, + sizeof(pv_flush_req)) != 0) + return -EFAULT; + fsync_dev(pv_flush_req.pv_dev); + invalidate_buffers(pv_flush_req.pv_dev); -/* - * remove a /proc entry for a physical volume - */ -void lvm_do_remove_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { - char *basename; - - basename = strrchr(pv_ptr->pv_name, '/'); - if ( vg_ptr->pv_subdir_pde != NULL) { - basename = strrchr(pv_ptr->pv_name, '/'); - if (basename == NULL) basename = pv_ptr->pv_name; - else basename++; - remove_proc_entry(basename, vg_ptr->pv_subdir_pde); - } + return 0; } - -/* - * create a /proc entry for a volume group - */ -void lvm_do_create_proc_entry_of_vg ( vg_t *vg_ptr) { - int l, p; - pv_t *pv_ptr; - lv_t *lv_ptr; - - pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR, - lvm_proc_vg_subdir); - if ( pde != NULL) { - vg_ptr->vg_dir_pde = pde; - pde = create_proc_entry("group", S_IFREG, - vg_ptr->vg_dir_pde); - if ( pde != NULL) { - pde->read_proc = lvm_proc_read_vg_info; - pde->data = vg_ptr; - } - pde = create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, - vg_ptr->vg_dir_pde); - if ( pde != NULL) { - vg_ptr->lv_subdir_pde = pde; - for ( l = 0; l < vg_ptr->lv_max; l++) { - if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); - } - } - pde = create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, - vg_ptr->vg_dir_pde); - if ( pde != NULL) { - vg_ptr->pv_subdir_pde = pde; - for ( p = 0; p < vg_ptr->pv_max; p++) { - if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; - lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr); - } - } - } -} - -/* - * remove a /proc entry for a volume group - */ -void lvm_do_remove_proc_entry_of_vg ( vg_t *vg_ptr) { - int l, p; - lv_t *lv_ptr; - pv_t *pv_ptr; - - for ( l = 0; l < vg_ptr->lv_max; l++) { - if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; - lvm_do_remove_proc_entry_of_lv ( vg_ptr, vg_ptr->lv[l]); - } - for ( p = 0; p < vg_ptr->pv_max; p++) { - if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; - lvm_do_remove_proc_entry_of_pv ( vg_ptr, vg_ptr->pv[p]); - } - if ( vg_ptr->vg_dir_pde != NULL) { - remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde); - remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde); - remove_proc_entry("group", vg_ptr->vg_dir_pde); - remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir); - } -} - - /* * support function initialize gendisk variables */ -void __init lvm_geninit(struct gendisk *lvm_gdisk) +static void __init lvm_geninit(struct gendisk *lvm_gdisk) { int i = 0; @@ -3019,36 +2664,85 @@ void __init lvm_geninit(struct gendisk *lvm_gdisk) blk_size[MAJOR_NR] = lvm_size; blksize_size[MAJOR_NR] = lvm_blocksizes; - hardsect_size[MAJOR_NR] = lvm_blocksizes; + hardsect_size[MAJOR_NR] = lvm_hardsectsizes; return; } /* lvm_gen_init() */ + +/* Must have down_write(_pe_lock) when we enqueue buffers */ +static void _queue_io(struct buffer_head *bh, int rw) { + if (bh->b_reqnext) BUG(); + bh->b_reqnext = _pe_requests; + _pe_requests = bh; +} + +/* Must have down_write(_pe_lock) when we dequeue buffers */ +static struct buffer_head *_dequeue_io(void) +{ + struct buffer_head *bh = _pe_requests; + _pe_requests = NULL; + return bh; +} + +/* + * We do not need to hold _pe_lock to flush buffers. bh should be taken from + * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set + * NULL and we drop _pe_lock. Any new buffers defered at this time will be + * added to a new list, and the old buffers can have their I/O restarted + * asynchronously. + * + * If, for some reason, the same PE is locked again before all of these writes + * have finished, then these buffers will just be re-queued (i.e. no danger). + */ +static void _flush_io(struct buffer_head *bh) +{ + while (bh) { + struct buffer_head *next = bh->b_reqnext; + bh->b_reqnext = NULL; + /* resubmit this buffer head */ + generic_make_request(WRITE, bh); + bh = next; + } +} + /* - * return a pointer to a '-' padded uuid + * we must open the pv's before we use them */ -static char *lvm_show_uuid ( char *uuidstr) { - int i, j; - static char uuid[NAME_LEN] = { 0, }; +static int _open_pv(pv_t *pv) { + int err; + struct block_device *bd; - memset ( uuid, 0, NAME_LEN); + if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev)))) + return -ENOMEM; + + err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE); + if (err) + return err; - i = 6; - memcpy ( uuid, uuidstr, i); - uuidstr += i; + pv->bd = bd; + return 0; +} - for ( j = 0; j < 6; j++) { - uuid[i++] = '-'; - memcpy ( &uuid[i], uuidstr, 4); - uuidstr += 4; - i += 4; +static void _close_pv(pv_t *pv) { + if (pv) { + struct block_device *bdev = pv->bd; + pv->bd = NULL; + if (bdev) + blkdev_put(bdev, BDEV_FILE); } +} - memcpy ( &uuid[i], uuidstr, 2 ); +static unsigned long _sectors_to_k(unsigned long sect) +{ + if(SECTOR_SIZE > 1024) { + return sect * (SECTOR_SIZE / 1024); + } - return uuid; + return sect / (1024 / SECTOR_SIZE); } module_init(lvm_init); module_exit(lvm_cleanup); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/pcmcia/wavelan_cs.c b/drivers/net/pcmcia/wavelan_cs.c index 5ef7b5111781..c2efbe4240c7 100644 --- a/drivers/net/pcmcia/wavelan_cs.c +++ b/drivers/net/pcmcia/wavelan_cs.c @@ -4838,4 +4838,4 @@ exit_wavelan_cs(void) module_init(init_wavelan_cs); module_exit(exit_wavelan_cs); -MODULE_LICENSE("BSD without advertisement clause"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index f2b17767ef8b..4ad12acf7d4a 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -53,13 +53,6 @@ static unsigned int pcnet32_portlist[] __initdata = {0x300, 0x320, 0x340, 0x360, static struct pci_device_id pcnet32_pci_tbl[] __devinitdata = { { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, -/* this id is never reached as the match above occurs first. - * However it clearly has significance, so let's not remove it - * until we know what that significance is. -jgarzik - */ -#if 0 - { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, 0x1014, 0x2000, 0, 0, 0 }, -#endif { 0, } }; diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c index 60b9e998564b..bbae0d62254d 100644 --- a/drivers/net/tokenring/lanstreamer.c +++ b/drivers/net/tokenring/lanstreamer.c @@ -329,7 +329,7 @@ err_out: } static void __devexit streamer_remove_one(struct pci_dev *pdev) { - struct net_device *dev=pci_get_drv_data(pdev); + struct net_device *dev=pci_get_drvdata(pdev); struct streamer_private *streamer_priv; #if STREAMER_DEBUG diff --git a/drivers/parport/Config.in b/drivers/parport/Config.in index ec5d550e33c1..b4d643315645 100644 --- a/drivers/parport/Config.in +++ b/drivers/parport/Config.in @@ -45,6 +45,12 @@ if [ "$CONFIG_PARPORT" != "n" ]; then else define_tristate CONFIG_PARPORT_ATARI n fi + if [ "$CONFIG_GSC_LASI" = "y" ]; then + dep_tristate ' LASI/ASP builtin parallel-port' CONFIG_PARPORT_GSC $CONFIG_PARPORT + else + define_tristate CONFIG_PARPORT_GSC n + fi + if [ "$CONFIG_SBUS" = "y" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then dep_tristate ' Sparc hardware (EXPERIMENTAL)' CONFIG_PARPORT_SUNBPP $CONFIG_PARPORT else diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 29e54fcaf310..015b5b0778ae 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1042,7 +1042,7 @@ void __init pci_read_bridge_bases(struct pci_bus *child) } } -static struct pci_bus * __init pci_alloc_bus(void) +static struct pci_bus * __devinit pci_alloc_bus(void) { struct pci_bus *b; @@ -1055,7 +1055,7 @@ static struct pci_bus * __init pci_alloc_bus(void) return b; } -static struct pci_bus * __init pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr) +struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr) { struct pci_bus *child; int i; @@ -1087,7 +1087,7 @@ static struct pci_bus * __init pci_add_new_bus(struct pci_bus *parent, struct pc return child; } -static unsigned int __init pci_do_scan_bus(struct pci_bus *bus); +unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus); /* * If it's a bridge, configure it and scan the bus behind it. @@ -1099,7 +1099,7 @@ static unsigned int __init pci_do_scan_bus(struct pci_bus *bus); * them, we proceed to assigning numbers to the remaining buses in * order to avoid overlaps between old and new bus numbers. */ -static int __init pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass) +static int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass) { unsigned int buses; unsigned short cr; @@ -1255,7 +1255,7 @@ int pci_setup_device(struct pci_dev * dev) * Read the config data for a PCI device, sanity-check it * and fill in the dev structure... */ -static struct pci_dev * __init pci_scan_device(struct pci_dev *temp) +struct pci_dev * __devinit pci_scan_device(struct pci_dev *temp) { struct pci_dev *dev; u32 l; @@ -1285,7 +1285,7 @@ static struct pci_dev * __init pci_scan_device(struct pci_dev *temp) return dev; } -struct pci_dev * __init pci_scan_slot(struct pci_dev *temp) +struct pci_dev * __devinit pci_scan_slot(struct pci_dev *temp) { struct pci_bus *bus = temp->bus; struct pci_dev *dev; @@ -1323,7 +1323,7 @@ struct pci_dev * __init pci_scan_slot(struct pci_dev *temp) return first_dev; } -static unsigned int __init pci_do_scan_bus(struct pci_bus *bus) +unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus) { unsigned int devfn, max, pass; struct list_head *ln; @@ -1367,7 +1367,7 @@ static unsigned int __init pci_do_scan_bus(struct pci_bus *bus) return max; } -int __init pci_bus_exists(const struct list_head *list, int nr) +int __devinit pci_bus_exists(const struct list_head *list, int nr) { const struct list_head *l; @@ -1379,7 +1379,7 @@ int __init pci_bus_exists(const struct list_head *list, int nr) return 0; } -struct pci_bus * __init pci_alloc_primary_bus(int bus) +struct pci_bus * __devinit pci_alloc_primary_bus(int bus) { struct pci_bus *b; @@ -1398,7 +1398,7 @@ struct pci_bus * __init pci_alloc_primary_bus(int bus) return b; } -struct pci_bus * __init pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) +struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *b = pci_alloc_primary_bus(bus); if (b) { @@ -1916,7 +1916,7 @@ pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma) } -void __init pci_init(void) +void __devinit pci_init(void) { struct pci_dev *dev; @@ -1931,7 +1931,7 @@ void __init pci_init(void) #endif } -static int __init pci_setup(char *str) +static int __devinit pci_setup(char *str) { while (str) { char *k = strchr(str, ','); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 6280f1309bd9..0cb5e5e14eea 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -386,6 +386,30 @@ int pci_proc_detach_device(struct pci_dev *dev) return 0; } +int pci_proc_attach_bus(struct pci_bus* bus) +{ + struct proc_dir_entry *de; + char name[16]; + + if (!(de = bus->procdir)) { + sprintf(name, "%02x", bus->number); + de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir); + if (!de) + return -ENOMEM; + } + return 0; +} + +int pci_proc_detach_bus(struct pci_bus* bus) +{ + struct proc_dir_entry *de; + + if (!(de = bus->procdir)) { + remove_proc_entry(de->name, proc_bus_pci_dir); + } + return 0; +} + /* * Backward compatible /proc/pci interface. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 94a767fc13dc..ae1c793d7e4c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -411,6 +411,48 @@ static void __init quirk_cardbus_legacy(struct pci_dev *dev) pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0); } +/* + * The AMD io apic can hang the box when an apic irq is masked. + * We check all revs >= B0 (yet not in the pre production!) as the bug + * is currently marked NoFix + * + * We have multiple reports of hangs with this chipset that went away with + * noapic specified. For the moment we assume its the errata. We may be wrong + * of course. However the advice is demonstrably good even if so.. + */ + +static void __init quirk_amd_ioapic(struct pci_dev *dev) +{ + u8 rev; + + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + if(rev >= 0x02) + { + printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n"); + printk(KERN_WARNING " : booting with the \"noapic\" option.\n"); + } +} + +/* + * Following the PCI ordering rules is optional on the AMD762. I'm not + * sure what the designers were smoking but let's not inhale... + * + * To be fair to AMD, it follows the spec by default, its BIOS people + * who turn it off! + */ + +static void __init quirk_amd_ordering(struct pci_dev *dev) +{ + u32 pcic; + + pci_read_config_dword(dev, 0x42, &pcic); + if((pcic&2)==0) + { + pcic |= 2; + printk(KERN_WARNING "BIOS disabled PCI ordering compliance, so we enabled it again.\n"); + pci_write_config_dword(dev, 0x42, pcic); + } +} /* * The main table of quirks. @@ -463,6 +505,9 @@ static struct pci_fixup pci_fixups[] __initdata = { { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, quirk_via_irqpic }, { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_6, quirk_via_irqpic }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering }, + { 0 } }; diff --git a/drivers/scsi/Config.in b/drivers/scsi/Config.in index bfdc7177bd91..ed5d7589f696 100644 --- a/drivers/scsi/Config.in +++ b/drivers/scsi/Config.in @@ -135,7 +135,16 @@ if [ "$CONFIG_SCSI_NCR53C7xx" != "n" ]; then bool ' allow FAST-SCSI [10MHz]' CONFIG_SCSI_NCR53C7xx_FAST bool ' allow DISCONNECT' CONFIG_SCSI_NCR53C7xx_DISCONNECT fi -if [ "$CONFIG_PCI" = "y" -a "$CONFIG_SCSI_NCR53C7xx" != "y" ]; then +if [ "$CONFIG_PCI" = "y" ]; then + dep_tristate 'SYM53C8XX Version 2 SCSI support' CONFIG_SCSI_SYM53C8XX_2 $CONFIG_SCSI + if [ "$CONFIG_SCSI_SYM53C8XX_2" != "n" ]; then + int ' DMA addressing mode' CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE 1 + int ' default tagged command queue depth' CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS 16 + int ' maximum number of queued commands' CONFIG_SCSI_SYM53C8XX_MAX_TAGS 64 + bool ' use normal IO' CONFIG_SCSI_SYM53C8XX_IOMAPPED + fi +fi +if [ "$CONFIG_PCI" = "y" -a "$CONFIG_SCSI_SYM53C8XX_2" != "y" ]; then dep_tristate 'NCR53C8XX SCSI support' CONFIG_SCSI_NCR53C8XX $CONFIG_SCSI dep_tristate 'SYM53C8XX SCSI support' CONFIG_SCSI_SYM53C8XX $CONFIG_SCSI if [ "$CONFIG_SCSI_NCR53C8XX" != "n" -o "$CONFIG_SCSI_SYM53C8XX" != "n" ]; then diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 9cb5a6f99121..dc7fa3ae3cce 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -87,6 +87,10 @@ obj-$(CONFIG_SCSI_T128) += t128.o obj-$(CONFIG_SCSI_DMX3191D) += dmx3191d.o obj-$(CONFIG_SCSI_DTC3280) += dtc.o obj-$(CONFIG_SCSI_NCR53C7xx) += 53c7,8xx.o +subdir-$(CONFIG_SCSI_SYM53C8XX_2) += sym53c8xx_2 +ifeq ($(CONFIG_SCSI_SYM53C8XX_2),y) + obj-$(CONFIG_SCSI_SYM53C8XX_2) += sym53c8xx_2/sym53c8xx.o +endif obj-$(CONFIG_SCSI_SYM53C8XX) += sym53c8xx.o obj-$(CONFIG_SCSI_NCR53C8XX) += ncr53c8xx.o obj-$(CONFIG_SCSI_EATA_DMA) += eata_dma.o diff --git a/drivers/scsi/sym53c8xx_2/ChangeLog.txt b/drivers/scsi/sym53c8xx_2/ChangeLog.txt new file mode 100644 index 000000000000..c020492c0c8d --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/ChangeLog.txt @@ -0,0 +1,130 @@ +Sat Dec 30 21:30 2000 Gerard Roudier + * version sym-2.1.0-20001230 + - Initial release of SYM-2. + +Mon Jan 08 21:30 2001 Gerard Roudier + * version sym-2.1.1-20010108 + - Change a couple of defines containing ncr or NCR by their + equivalent containing sym or SYM instead. + +Sun Jan 14 22:30 2001 Gerard Roudier + * version sym-2.1.2-20010114 + - Fix a couple of printfs: + * Add the target number to the display of transfer parameters. + * Make the display of TCQ and queue depth clearer. + +Wed Jan 17 23:30 2001 Gerard Roudier + * version sym-2.1.3-20010117 + - Wrong residual values were returned in some situations. + This broke cdrecord with linux-2.4.0, for example. + +Sat Jan 20 18:00 2001 Gerard Roudier + * version sym-2.1.4-20010120 + - Add year 2001 to Copyright. + - A tiny bug in the dma memory freeing path has been fixed. + (Driver unload failed with a bad address reference). + +Wed Jan 24 21:00 2001 Gerard Roudier + * version sym-2.1.5-20010124 + - Make the driver work under Linux-2.4.x when statically linked + with the kernel. + - Check against memory allocation failure for SCRIPTZ and add the + missing free of this memory on instance detach. + - Check against GPIO3 pulled low for HVD controllers (driver did + just the opposite). + Misdetection of BUS mode was triggerred on module reload only, + since BIOS settings were trusted instead on first load. + +Wed Feb 7 21:00 2001 Gerard Roudier + * version sym-2.1.6-20010207 + - Call pci_enable_device() as wished by kernel maintainers. + - Change the sym_queue_scsiio() interface. + This is intended to simplify portability. + - Move the code intended to deal with the dowloading of SCRIPTS + from SCRIPTS :) in the patch method (was wrongly placed in + the SCRIPTS setup method). + - Add a missing cpu_to_scr() (np->abort_tbl.addr) + - Remove a wrong cpu_to_scr() (np->targtbl_ba) + - Cleanup a bit the PPR failure recovery code. + +Sat Mar 3 21:00 2001 Gerard Roudier + - Add option SYM_OPT_ANNOUNCE_TRANSFER_RATE and move the + corresponding code to file sym_misc.c. + Also move the code that sniffes INQUIRY to sym_misc.c. + This allows to share the corresponding code with NetBSD + without polluating the core driver source (sym_hipd.c). + - Add optionnal code that handles IO timeouts from the driver. + (not used under Linux, but required for NetBSD) + - Donnot assume any longer that PAGE_SHIFT and PAGE_SIZE are + defined at compile time, as at least NetBSD uses variables + in memory for that. + - Refine a work-around for the C1010-33 that consists in + disabling internal LOAD/STORE. Was applied up to revision 1. + Is now only applied to revision 0. + - Some code reorganisations due to code moves between files. + +Tues Apr 10 21:00 2001 Gerard Roudier + * version sym-2.1.9-20010412 + - Reset 53C896 and 53C1010 chip according to the manual. + (i.e.: set the ABRT bit in ISTAT if SCRIPTS are running) + - Set #LUN in request sense only if scsi version <= 2 and + #LUN <= 7. + - Set busy_itl in LCB to 1 if the LCB is allocated and a + SCSI command is active. This is a simplification. + - In sym_hcb_free(), donnot scan the free_ccbq if no CCBs + has been allocated. This fixes a panic if attach failed. + - Add DT/ST (double/simple transition) in the transfer + negotiation announce. + - Forces the max number of tasks per LUN to at least 64. + - Use pci_set_dma_mask() for linux-2.4.3 and above. + - A couple of comments fixes. + +Wed May 22:00 2001 Gerard Roudier + * version sym-2.1.10-20010509 + - Mask GPCNTL against 0x1c (was 0xfc) for the reading of the NVRAM. + This ensure LEDC bit will not be set on 896 and later chips. + Fix sent by Chip Salzenberg <chip@perlsupport.com>. + - Define the number of PQS BUSes supported. + Fix sent by Stig Telfer <stig@api-networks.com> + - Miscellaneous common code rearrangements due to NetBSD accel + ioctl support, without impact on Linux (hopefully). + +Mon July 2 12:00 2001 Gerard Roudier + * version sym-2.1.11-20010702 + - Add Tekram 390 U2B/U2W SCSI LED handling. + Submitted by Chip Salzenberg <chip@valinux.com> + - Add call to scsi_set_pci_device() for kernels >= 2.4.4. + - Check pci dma mapping failures and complete the IO with some + error when such mapping fails. + - Fill in instance->max_cmd_len for kernels > 2.4.0. + - A couple of tiny fixes ... + +Sun Sep 9 18:00 2001 Gerard Roudier + * version sym-2.1.12-20010909 + - Change my email address. + - Add infrastructure for the forthcoming 64 bit DMA adressing support. + (Based on PCI 64 bit patch from David S. Miller) + - Donnot use anymore vm_offset_t type. + +Sat Sep 15 20:00 2001 Gerard Roudier + * version sym-2.1.13-20010916 + - Add support for 64 bit DMA addressing using segment registers. + 16 registers for up to 4 GB x 16 -> 64 GB. + +Sat Sep 22 12:00 2001 Gerard Roudier + * version sym-2.1.14-20010922 + - Complete rewrite of the eh handling. The driver is now using a + semaphore in order to behave synchronously as required by the eh + threads. A timer is also used to prevent from waiting indefinitely. + +Sun Sep 30 17:00 2001 Gerard Roudier + * version sym-2.1.15-20010930 + - Include <linux/module.h> unconditionnaly as expected by latest + kernels. + - Use del_timer_sync() for recent kernels to kill the driver timer + on module release. + +Sun Oct 28 15:00 2001 Gerard Roudier + * version sym-2.1.16-20011028 + - Slightly simplify driver configuration. + - Prepare a new patch against linux-2.4.13. diff --git a/drivers/scsi/sym53c8xx_2/Documentation.txt b/drivers/scsi/sym53c8xx_2/Documentation.txt new file mode 100644 index 000000000000..713afc8f919b --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/Documentation.txt @@ -0,0 +1,1149 @@ +The Linux SYM-2 driver documentation file + +Written by Gerard Roudier <groudier@free.fr> +21 Rue Carnot +95170 DEUIL LA BARRE - FRANCE + +Decembre 28 2000 +=============================================================================== + +1. Introduction +2. Supported chips and SCSI features +3. Advantages of this driver for newer chips. + 3.1 Optimized SCSI SCRIPTS + 3.2 New features appeared with the SYM53C896 +4. Memory mapped I/O versus normal I/O +5. Tagged command queueing +6. Parity checking +7. Profiling information +8. Control commands + 8.1 Set minimum synchronous period + 8.2 Set wide size + 8.3 Set maximum number of concurrent tagged commands + 8.4 Set debug mode + 8.5 Set flag (no_disc) + 8.6 Set verbose level + 8.7 Reset all logical units of a target + 8.8 Abort all tasks of all logical units of a target +9. Configuration parameters +10. Boot setup commands + 10.1 Syntax + 10.2 Available arguments + 10.2.1 Master parity checking + 10.2.2 Scsi parity checking + 10.2.3 Default number of tagged commands + 10.2.4 Default synchronous period factor + 10.2.5 Verbosity level + 10.2.6 Debug mode + 10.2.7 Burst max + 10.2.8 LED support + 10.2.9 Max wide + 10.2.10 Differential mode + 10.2.11 IRQ mode + 10.2.12 Reverse probe + 10.2.13 Fix up PCI configuration space + 10.2.14 Serial NVRAM + 10.2.15 Check SCSI BUS + 10.2.16 Exclude a host from being attached + 10.2.17 Suggest a default SCSI id for hosts + 10.3 PCI configuration fix-up boot option + 10.4 Serial NVRAM support boot option + 10.5 SCSI BUS checking boot option +11. SCSI problem troubleshooting + 15.1 Problem tracking + 15.2 Understanding hardware error reports +12. Serial NVRAM support (by Richard Waltham) + 17.1 Features + 17.2 Symbios NVRAM layout + 17.3 Tekram NVRAM layout + +=============================================================================== + +1. Introduction + +This driver supports the whole SYM53C8XX family of PCI-SCSI controllers. +It also support the subset of LSI53C10XX PCI-SCSI controllers that are based +on the SYM53C8XX SCRIPTS language. + +It replaces the sym53c8xx+ncr53c8xx driver bundle and shares its core code +with the FreeBSD SYM-2 driver. The `glue' that allows this driver to work +under Linux is contained in 2 files named sym_glue.h and sym_glue.c. +Other drivers files are intended not to depend on the Operating System +on which the driver is used. + +The history of this driver can be summerized as follows: + +1993: ncr driver written for 386bsd and FreeBSD by: + Wolfgang Stanglmeier <wolf@cologne.de> + Stefan Esser <se@mi.Uni-Koeln.de> + +1996: port of the ncr driver to Linux-1.2.13 and rename it ncr53c8xx. + Gerard Roudier + +1998: new sym53c8xx driver for Linux based on LOAD/STORE instruction and that + adds full support for the 896 but drops support for early NCR devices. + Gerard Roudier + +1999: port of the sym53c8xx driver to FreeBSD and support for the LSI53C1010 + 33 MHz and 66MHz Ultra-3 controllers. The new driver is named `sym'. + Gerard Roudier + +2000: Add support for early NCR devices to FreeBSD `sym' driver. + Break the driver into several sources and separate the OS glue + code from the core code that can be shared among different O/Ses. + Write a glue code for Linux. + Gerard Roudier + +This README file addresses the Linux version of the driver. Under FreeBSD, +the driver documentation is the sym.8 man page. + +Information about new chips is available at LSILOGIC web server: + + http://www.lsilogic.com/ + +SCSI standard documentations are available at T10 site: + + http://www.t10.org/ + +Useful SCSI tools written by Eric Youngdale are part of most Linux +distributions: + scsiinfo: command line tool + scsi-config: TCL/Tk tool using scsiinfo + +2. Supported chips and SCSI features + +The following features are supported for all chips: + + Synchronous negotiation + Disconnection + Tagged command queuing + SCSI parity checking + PCI Master parity checking + +Other features depends on chip capabilities. +The driver notably uses optimized SCRIPTS for devices that support +LOAD/STORE and handles PHASE MISMATCH from SCRIPTS for devices that +support the corresponding feature. + +The following table shows some characteristics of the chip family. + + On board LOAD/STORE HARDWARE +Chip SDMS BIOS Wide SCSI std. Max. sync SCRIPTS PHASE MISMATCH +---- --------- ---- --------- ---------- ---------- -------------- +810 N N FAST10 10 MB/s N N +810A N N FAST10 10 MB/s Y N +815 Y N FAST10 10 MB/s N N +825 Y Y FAST10 20 MB/s N N +825A Y Y FAST10 20 MB/s Y N +860 N N FAST20 20 MB/s Y N +875 Y Y FAST20 40 MB/s Y N +875A Y Y FAST20 40 MB/s Y Y +876 Y Y FAST20 40 MB/s Y N +895 Y Y FAST40 80 MB/s Y N +895A Y Y FAST40 80 MB/s Y Y +896 Y Y FAST40 80 MB/s Y Y +897 Y Y FAST40 80 MB/s Y Y +1510D Y Y FAST40 80 MB/s Y Y +1010 Y Y FAST80 160 MB/s Y Y +1010_66* Y Y FAST80 160 MB/s Y Y + +* Chip supports 33MHz and 66MHz PCI bus clock. + + +Summary of other supported features: + +Module: allow to load the driver +Memory mapped I/O: increases performance +Control commands: write operations to the proc SCSI file system +Debugging information: written to syslog (expert only) +Scatter / gather +Shared interrupt +Boot setup commands +Serial NVRAM: Symbios and Tekram formats + + +3. Advantages of this driver for newer chips. + +3.1 Optimized SCSI SCRIPTS. + +All chips except the 810, 815 and 825, support new SCSI SCRIPTS instructions +named LOAD and STORE that allow to move up to 1 DWORD from/to an IO register +to/from memory much faster that the MOVE MEMORY instruction that is supported +by the 53c7xx and 53c8xx family. + +The LOAD/STORE instructions support absolute and DSA relative addressing +modes. The SCSI SCRIPTS had been entirely rewritten using LOAD/STORE instead +of MOVE MEMORY instructions. + +Due to the lack of LOAD/STORE SCRIPTS instructions by earlier chips, this +driver also incorporates a different SCRIPTS set based on MEMORY MOVE, in +order to provide support for the entire SYM53C8XX chips family. + +3.2 New features appeared with the SYM53C896 + +Newer chips (see above) allows handling of the phase mismatch context from +SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor +until the C code has saved the context of the transfer). + +The 896 and 1010 chips support 64 bit PCI transactions and addressing, +while the 895A supports 32 bit PCI transactions and 64 bit addressing. +The SCRIPTS processor of these chips is not true 64 bit, but uses segment +registers for bit 32-63. Another interesting feature is that LOAD/STORE +instructions that address the on-chip RAM (8k) remain internal to the chip. + +4. Memory mapped I/O versus normal I/O + +Memory mapped I/O has less latency than normal I/O and is the recommended +way for doing IO with PCI devices. Memory mapped I/O seems to work fine on +most hardware configurations, but some poorly designed chipsets may break +this feature. A configuration option is provided for normal I/O to be +used but the driver defaults to MMIO. + +5. Tagged command queueing + +Queuing more than 1 command at a time to a device allows it to perform +optimizations based on actual head positions and its mechanical +characteristics. This feature may also reduce average command latency. +In order to really gain advantage of this feature, devices must have +a reasonnable cache size (No miracle is to be expected for a low-end +hard disk with 128 KB or less). +Some kown old SCSI devices do not properly support tagged command queuing. +Generally, firmware revisions that fix this kind of problems are available +at respective vendor web/ftp sites. +All I can say is that I never have had problem with tagged queuing using +this driver and its predecessors. Hard disks that behaved correctly for +me using tagged commands are the following: + +- IBM S12 0662 +- Conner 1080S +- Quantum Atlas I +- Quantum Atlas II +- Seagate Cheetah I +- Quantum Viking II +- IBM DRVS +- Quantum Atlas IV +- Seagate Cheetah II + +If your controller has NVRAM, you can configure this feature per target +from the user setup tool. The Tekram Setup program allows to tune the +maximum number of queued commands up to 32. The Symbios Setup only allows +to enable or disable this feature. + +The maximum number of simultaneous tagged commands queued to a device +is currently set to 16 by default. This value is suitable for most SCSI +disks. With large SCSI disks (>= 2GB, cache >= 512KB, average seek time +<= 10 ms), using a larger value may give better performances. + +This driver supports up to 255 commands per device, and but using more than +64 is generally not worth-while, unless you are using a very large disk or +disk arrays. It is noticeable that most of recent hard disks seem not to +accept more than 64 simultaneous commands. So, using more than 64 queued +commands is probably just resource wasting. + +If your controller does not have NVRAM or if it is managed by the SDMS +BIOS/SETUP, you can configure tagged queueing feature and device queue +depths from the boot command-line. For example: + + sym53c8xx=tags:4/t2t3q15-t4q7/t1u0q32 + +will set tagged commands queue depths as follow: + +- target 2 all luns on controller 0 --> 15 +- target 3 all luns on controller 0 --> 15 +- target 4 all luns on controller 0 --> 7 +- target 1 lun 0 on controller 1 --> 32 +- all other target/lun --> 4 + +In some special conditions, some SCSI disk firmwares may return a +QUEUE FULL status for a SCSI command. This behaviour is managed by the +driver using the following heuristic: + +- Each time a QUEUE FULL status is returned, tagged queue depth is reduced + to the actual number of disconnected commands. + +- Every 200 successfully completed SCSI commands, if allowed by the + current limit, the maximum number of queueable commands is incremented. + +Since QUEUE FULL status reception and handling is resource wasting, the +driver notifies by default this problem to user by indicating the actual +number of commands used and their status, as well as its decision on the +device queue depth change. +The heuristic used by the driver in handling QUEUE FULL ensures that the +impact on performances is not too bad. You can get rid of the messages by +setting verbose level to zero, as follow: + +1st method: boot your system using 'sym53c8xx=verb:0' option. +2nd method: apply "setverbose 0" control command to the proc fs entry + corresponding to your controller after boot-up. + +6. Parity checking + +The driver supports SCSI parity checking and PCI bus master parity +checking. These features must be enabled in order to ensure safe data +transfers. However, some flawed devices or mother boards will have +problems with parity. You can disable either PCI parity or SCSI parity +checking by entering appropriate options from the boot command line. +(See 10: Boot setup commands). + +7. Profiling information + +This driver does not provide profiling informations as did its predecessors. +This feature was not this useful and added complexity to the code. +As the driver code got more complex, I have decided to remove everything +that didn't seem actually useful. + +8. Control commands + +Control commands can be sent to the driver with write operations to +the proc SCSI file system. The generic command syntax is the +following: + + echo "<verb> <parameters>" >/proc/scsi/sym53c8xx/0 + (assumes controller number is 0) + +Using "all" for "<target>" parameter with the commands below will +apply to all targets of the SCSI chain (except the controller). + +Available commands: + +8.1 Set minimum synchronous period factor + + setsync <target> <period factor> + + target: target number + period: minimum synchronous period. + Maximum speed = 1000/(4*period factor) except for special + cases below. + + Specify a period of 0, to force asynchronous transfer mode. + + 9 means 12.5 nano-seconds synchronous period + 10 means 25 nano-seconds synchronous period + 11 means 30 nano-seconds synchronous period + 12 means 50 nano-seconds synchronous period + +8.2 Set wide size + + setwide <target> <size> + + target: target number + size: 0=8 bits, 1=16bits + +8.3 Set maximum number of concurrent tagged commands + + settags <target> <tags> + + target: target number + tags: number of concurrent tagged commands + must not be greater than configured (default: 16) + +8.4 Set debug mode + + setdebug <list of debug flags> + + Available debug flags: + alloc: print info about memory allocations (ccb, lcb) + queue: print info about insertions into the command start queue + result: print sense data on CHECK CONDITION status + scatter: print info about the scatter process + scripts: print info about the script binding process + tiny: print minimal debugging information + timing: print timing information of the NCR chip + nego: print information about SCSI negotiations + phase: print information on script interruptions + + Use "setdebug" with no argument to reset debug flags. + + +8.5 Set flag (no_disc) + + setflag <target> <flag> + + target: target number + + For the moment, only one flag is available: + + no_disc: not allow target to disconnect. + + Do not specify any flag in order to reset the flag. For example: + - setflag 4 + will reset no_disc flag for target 4, so will allow it disconnections. + - setflag all + will allow disconnection for all devices on the SCSI bus. + + +8.6 Set verbose level + + setverbose #level + + The driver default verbose level is 1. This command allows to change + th driver verbose level after boot-up. + +8.7 Reset all logical units of a target + + resetdev <target> + + target: target number + The driver will try to send a BUS DEVICE RESET message to the target. + +8.8 Abort all tasks of all logical units of a target + + cleardev <target> + + target: target number + The driver will try to send a ABORT message to all the logical units + of the target. + + +9. Configuration parameters + +Under kernel configuration tools (make menuconfig, for example), it is +possible to change some default driver configuration parameters. +If the firmware of all your devices is perfect enough, all the +features supported by the driver can be enabled at start-up. However, +if only one has a flaw for some SCSI feature, you can disable the +support by the driver of this feature at linux start-up and enable +this feature after boot-up only for devices that support it safely. + +Configuration parameters: + +Use normal IO (default answer: n) + Answer "y" if you suspect your mother board to not allow memory mapped I/O. + May slow down performance a little. + +Default tagged command queue depth (default answer: 16) + Entering 0 defaults to tagged commands not being used. + This parameter can be specified from the boot command line. + +Maximum number of queued commands (default answer: 32) + This option allows you to specify the maximum number of tagged commands + that can be queued to a device. The maximum supported value is 255. + +Synchronous transfers frequency (default answer: 80) + This option allows you to specify the frequency in MHz the driver + will use at boot time for synchronous data transfer negotiations. + 0 means "asynchronous data transfers". + +10. Boot setup commands + +10.1 Syntax + +Setup commands can be passed to the driver either at boot time or as a +string variable using 'insmod'. + +A boot setup command for this driver begins with the driver name "sym53c8xx=". +The kernel syntax parser then expects an optionnal list of integers separated +with comma followed by an optional list of comma-separated strings. + +Example of boot setup command under lilo prompt: + +lilo: linux root=/dev/sda2 sym53c8xx=tags:4,sync:10,debug:0x200 + +- enable tagged commands, up to 4 tagged commands queued. +- set synchronous negotiation speed to 10 Mega-transfers / second. +- set DEBUG_NEGO flag. + +Since comma seems not to be allowed when defining a string variable using +'insmod', the driver also accepts <space> as option separator. +The following command will install driver module with the same options as +above. + + insmod sym53c8xx.o sym53c8xx="tags:4 sync:10 debug:0x200" + +The integer list of arguments is discarded by the driver. + +Each string argument must be specified as "keyword:value". Only lower-case +characters and digits are allowed. + +10.2 Available arguments + +10.2.1 Master parity checking + mpar:y enabled + mpar:n disabled + +10.2.2 Scsi parity checking + spar:y enabled + spar:n disabled + +10.2.3 Default number of tagged commands + tags:0 (or tags:1 ) tagged command queuing disabled + tags:#tags (#tags > 1) tagged command queuing enabled + #tags will be truncated to the max queued commands configuration parameter. + This option also allows to specify a command queue depth for each device + that support tagged command queueing. + Example: + sym53c8xx=tags:10/t2t3q16-t5q24/t1u2q32 + will set devices queue depth as follow: + - controller #0 target #2 and target #3 -> 16 commands, + - controller #0 target #5 -> 24 commands, + - controller #1 target #1 logical unit #2 -> 32 commands, + - all other logical units (all targets, all controllers) -> 10 commands. + +10.2.4 Default synchronous period factor + sync:255 disabled (asynchronous transfer mode) + sync:#factor + #factor = 9 Ultra-3 SCSI 80 Mega-transfers / second (Wide only) + #factor = 10 Ultra-2 SCSI 40 Mega-transfers / second + #factor = 11 Ultra-2 SCSI 33 Mega-transfers / second + #factor < 25 Ultra SCSI 20 Mega-transfers / second + #factor < 50 Fast SCSI-2 + + In all cases, the driver will use the minimum transfer period supported by + controllers according to SYM53C8XX chip type. + +10.2.5 Verbosity level + verb:0 minimal + verb:1 normal + verb:2 too much + +10.2.6 Debug mode + debug:0 clear debug flags + debug:#x set debug flags + #x is an integer value combining the following power-of-2 values: + DEBUG_ALLOC 0x1 + DEBUG_PHASE 0x2 + DEBUG_POLL 0x4 + DEBUG_QUEUE 0x8 + DEBUG_RESULT 0x10 + DEBUG_SCATTER 0x20 + DEBUG_SCRIPT 0x40 + DEBUG_TINY 0x80 + DEBUG_TIMING 0x100 + DEBUG_NEGO 0x200 + DEBUG_TAGS 0x400 + DEBUG_FREEZE 0x800 + DEBUG_RESTART 0x1000 + + You can play safely with DEBUG_NEGO. However, some of these flags may + generate bunches of syslog messages. + +10.2.7 Burst max + burst:0 burst disabled + burst:255 get burst length from initial IO register settings. + burst:#x burst enabled (1<<#x burst transfers max) + #x is an integer value which is log base 2 of the burst transfers max. + By default the driver uses the maximum value supported by the chip. + +10.2.8 LED support + led:1 enable LED support + led:0 disable LED support + Donnot enable LED support if your scsi board does not use SDMS BIOS. + (See 'Configuration parameters') + +10.2.9 Max wide + wide:1 wide scsi enabled + wide:0 wide scsi disabled + Some scsi boards use a 875 (ultra wide) and only supply narrow connectors. + If you have connected a wide device with a 50 pins to 68 pins cable + converter, any accepted wide negotiation will break further data transfers. + In such a case, using "wide:0" in the bootup command will be helpfull. + +10.2.10 Differential mode + diff:0 never set up diff mode + diff:1 set up diff mode if BIOS set it + diff:2 always set up diff mode + diff:3 set diff mode if GPIO3 is not set + +10.2.11 IRQ mode + irqm:0 always open drain + irqm:1 same as initial settings (assumed BIOS settings) + irqm:2 always totem pole + +10.2.12 Reverse probe + revprob:n probe chip ids from the PCI configuration in this order: + 810, 815, 825, 860, 875, 885, 875A, 895, 896, 895A, + 1510D, 1010-33, 1010-66. + revprob:y probe chip ids in the reverse order. + +10.2.13 Fix up PCI configuration space + pcifix:<option bits> + + Available option bits: + 0x0: No attempt to fix PCI configuration space registers values. + 0x1: Set PCI cache-line size register if not set. + 0x2: Set write and invalidate bit in PCI command register. + +10.2.14 Serial NVRAM + nvram:n do not look for serial NVRAM + nvram:y test controllers for onboard serial NVRAM + (alternate binary form) + mvram=<bits options> + 0x01 look for NVRAM (equivalent to nvram=y) + 0x02 ignore NVRAM "Synchronous negotiation" parameters for all devices + 0x04 ignore NVRAM "Wide negotiation" parameter for all devices + 0x08 ignore NVRAM "Scan at boot time" parameter for all devices + 0x80 also attach controllers set to OFF in the NVRAM (sym53c8xx only) + +10.2.15 Check SCSI BUS + buschk:<option bits> + + Available option bits: + 0x0: No check. + 0x1: Check and donnot attach the controller on error. + 0x2: Check and just warn on error. + +10.2.16 Exclude a host from being attached + excl=<io_address> + + Prevent host at a given io address from being attached. + For example 'sym53c8xx=excl:0xb400,excl:0xc000' indicate to the + driver not to attach hosts at address 0xb400 and 0xc000. + +10.2.17 Suggest a default SCSI id for hosts + hostid:255 no id suggested. + hostid:#x (0 < x < 7) x suggested for hosts SCSI id. + + If a host SCSI id is available from the NVRAM, the driver will ignore + any value suggested as boot option. Otherwise, if a suggested value + different from 255 has been supplied, it will use it. Otherwise, it will + try to deduce the value previously set in the hardware and use value + 7 if the hardware value is zero. + +10.3 PCI configuration fix-up boot option + +pcifix:<option bits> + +Available option bits: + 0x1: Set PCI cache-line size register if not set. + 0x2: Set write and invalidate bit in PCI command register. + +Use 'pcifix:3' in order to allow the driver to fix both PCI features. + +Recent SYMBIOS 53C8XX scsi processors are able to use PCI read multiple +and PCI write and invalidate commands. These features require the +cache line size register to be properly set in the PCI configuration +space of the chips. On the other hand, chips will use PCI write and +invalidate commands only if the corresponding bit is set to 1 in the +PCI command register. + +Not all PCI bioses set the PCI cache line register and the PCI write and +invalidate bit in the PCI configuration space of 53C8XX chips. +Optimized PCI accesses may be broken for some PCI/memory controllers or +make problems with some PCI boards. + +10.4 Serial NVRAM support boot option + +nvram:n do not look for serial NVRAM +nvram:y test controllers for onboard serial NVRAM + +This option can also been entered as an hexadecimal value that allows +to control what information the driver will get from the NVRAM and what +information it will ignore. +For details see '17. Serial NVRAM support'. + +When this option is enabled, the driver tries to detect all boards using +a Serial NVRAM. This memory is used to hold user set up parameters. + +The parameters the driver is able to get from the NVRAM depend on the +data format used, as follow: + + Tekram format Symbios format +General and host parameters + Boot order N Y + Host SCSI ID Y Y + SCSI parity checking Y Y + Verbose boot messages N Y +SCSI devices parameters + Synchronous transfer speed Y Y + Wide 16 / Narrow Y Y + Tagged Command Queuing enabled Y Y + Disconnections enabled Y Y + Scan at boot time N Y + +In order to speed up the system boot, for each device configured without +the "scan at boot time" option, the driver forces an error on the +first TEST UNIT READY command received for this device. + +Some SDMS BIOS revisions seem to be unable to boot cleanly with very fast +hard disks. In such a situation you cannot configure the NVRAM with +optimized parameters value. + +The 'nvram' boot option can be entered in hexadecimal form in order +to ignore some options configured in the NVRAM, as follow: + +mvram=<bits options> + 0x01 look for NVRAM (equivalent to nvram=y) + 0x02 ignore NVRAM "Synchronous negotiation" parameters for all devices + 0x04 ignore NVRAM "Wide negotiation" parameter for all devices + 0x08 ignore NVRAM "Scan at boot time" parameter for all devices + 0x80 also attach controllers set to OFF in the NVRAM (sym53c8xx only) + +Option 0x80 is disabled by default. +Result is that, by default (option not set), the sym53c8xx driver will not +attach controllers set to OFF in the NVRAM. + +10.5 SCSI BUS checking boot option. + +When this option is set to a non-zero value, the driver checks SCSI lines +logic state, 100 micro-seconds after having asserted the SCSI RESET line. +The driver just reads SCSI lines and checks all lines read FALSE except RESET. +Since SCSI devices shall release the BUS at most 800 nano-seconds after SCSI +RESET has been asserted, any signal to TRUE may indicate a SCSI BUS problem. +Unfortunately, the following common SCSI BUS problems are not detected: +- Only 1 terminator installed. +- Misplaced terminators. +- Bad quality terminators. +On the other hand, either bad cabling, broken devices, not conformant +devices, ... may cause a SCSI signal to be wrong when te driver reads it. + +15. SCSI problem troubleshooting + +15.1 Problem tracking + +Most SCSI problems are due to a non conformant SCSI bus or too buggy +devices. If infortunately you have SCSI problems, you can check the +following things: + +- SCSI bus cables +- terminations at both end of the SCSI chain +- linux syslog messages (some of them may help you) + +If you donnot find the source of problems, you can configure the +driver or devices in the NVRAM with minimal features. + +- only asynchronous data transfers +- tagged commands disabled +- disconnections not allowed + +Now, if your SCSI bus is ok, your system has every chance to work +with this safe configuration but performances will not be optimal. + +If it still fails, then you can send your problem description to +appropriate mailing lists or news-groups. Send me a copy in order to +be sure I will receive it. Obviously, a bug in the driver code is +possible. + + My cyrrent email address: Gerard Roudier <groudier@free.fr> + +Allowing disconnections is important if you use several devices on +your SCSI bus but often causes problems with buggy devices. +Synchronous data transfers increases throughput of fast devices like +hard disks. Good SCSI hard disks with a large cache gain advantage of +tagged commands queuing. + +15.2 Understanding hardware error reports + +When the driver detects an unexpected error condition, it may display a +message of the following pattern. + +sym0:1: ERROR (0:48) (1-21-65) (f/95/0) @ (script 7c0:19000000). +sym0: script cmd = 19000000 +sym0: regdump: da 10 80 95 47 0f 01 07 75 01 81 21 80 01 09 00. + +Some fields in such a message may help you understand the cause of the +problem, as follows: + +sym0:1: ERROR (0:48) (1-21-65) (f/95/0) @ (script 7c0:19000000). +.....A.........B.C....D.E..F....G.H..I.......J.....K...L....... + +Field A : target number. + SCSI ID of the device the controller was talking with at the moment the + error occurs. + +Field B : DSTAT io register (DMA STATUS) + Bit 0x40 : MDPE Master Data Parity Error + Data parity error detected on the PCI BUS. + Bit 0x20 : BF Bus Fault + PCI bus fault condition detected + Bit 0x01 : IID Illegal Instruction Detected + Set by the chip when it detects an Illegal Instruction format + on some condition that makes an instruction illegal. + Bit 0x80 : DFE Dma Fifo Empty + Pure status bit that does not indicate an error. + If the reported DSTAT value contains a combination of MDPE (0x40), + BF (0x20), then the cause may be likely due to a PCI BUS problem. + +Field C : SIST io register (SCSI Interrupt Status) + Bit 0x08 : SGE SCSI GROSS ERROR + Indicates that the chip detected a severe error condition + on the SCSI BUS that prevents the SCSI protocol from functionning + properly. + Bit 0x04 : UDC Undexpected Disconnection + Indicates that the device released the SCSI BUS when the chip + was not expecting this to happen. A device may behave so to + indicate the SCSI initiator that an error condition not reportable using the SCSI protocol has occured. + Bit 0x02 : RST SCSI BUS Reset + Generally SCSI targets donnot reset the SCSI BUS, although any + device on the BUS can reset it at any time. + Bit 0x01 : PAR Parity + SCSI parity error detected. + On a faulty SCSI BUS, any error condition among SGE (0x08), UDC (0x04) and + PAR (0x01) may be detected by the chip. If your SCSI system sometimes + encounters such error conditions, especially SCSI GROSS ERROR, then a SCSI + BUS problem is likely the cause of these errors. + +For fields D,E,F,G and H, you may look into the sym53c8xx_defs.h file +that contains some minimal comments on IO register bits. +Field D : SOCL Scsi Output Control Latch + This register reflects the state of the SCSI control lines the + chip want to drive or compare against. +Field E : SBCL Scsi Bus Control Lines + Actual value of control lines on the SCSI BUS. +Field F : SBDL Scsi Bus Data Lines + Actual value of data lines on the SCSI BUS. +Field G : SXFER SCSI Transfer + Contains the setting of the Synchronous Period for output and + the current Synchronous offset (offset 0 means asynchronous). +Field H : SCNTL3 Scsi Control Register 3 + Contains the setting of timing values for both asynchronous and + synchronous data transfers. +Field I : SCNTL4 Scsi Control Register 4 + Only meaninful for 53C1010 Ultra3 controllers. + +Understanding Fields J, K, L and dumps requires to have good knowledge of +SCSI standards, chip cores functionnals and internal driver data structures. +You are not required to decode and understand them, unless you want to help +maintain the driver code. + +17. Serial NVRAM (added by Richard Waltham: dormouse@farsrobt.demon.co.uk) + +17.1 Features + +Enabling serial NVRAM support enables detection of the serial NVRAM included +on Symbios and some Symbios compatible host adaptors, and Tekram boards. The +serial NVRAM is used by Symbios and Tekram to hold set up parameters for the +host adaptor and it's attached drives. + +The Symbios NVRAM also holds data on the boot order of host adaptors in a +system with more than one host adaptor. This enables the order of scanning +the cards for drives to be changed from the default used during host adaptor +detection. + +This can be done to a limited extent at the moment using "reverse probe" but +this only changes the order of detection of different types of cards. The +NVRAM boot order settings can do this as well as change the order the same +types of cards are scanned in, something "reverse probe" cannot do. + +Tekram boards using Symbios chips, DC390W/F/U, which have NVRAM are detected +and this is used to distinguish between Symbios compatible and Tekram host +adaptors. This is used to disable the Symbios compatible "diff" setting +incorrectly set on Tekram boards if the CONFIG_SCSI_53C8XX_SYMBIOS_COMPAT +configuration parameter is set enabling both Symbios and Tekram boards to be +used together with the Symbios cards using all their features, including +"diff" support. ("led pin" support for Symbios compatible cards can remain +enabled when using Tekram cards. It does nothing useful for Tekram host +adaptors but does not cause problems either.) + + +17.2 Symbios NVRAM layout + +typical data at NVRAM address 0x100 (53c810a NVRAM) +----------------------------------------------------------- +00 00 +64 01 +8e 0b + +00 30 00 00 00 00 07 00 00 00 00 00 00 00 07 04 10 04 00 00 + +04 00 0f 00 00 10 00 50 00 00 01 00 00 62 +04 00 03 00 00 10 00 58 00 00 01 00 00 63 +04 00 01 00 00 10 00 48 00 00 01 00 00 61 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 + +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 + +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 + +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 + +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 + +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 + +fe fe +00 00 +00 00 +----------------------------------------------------------- +NVRAM layout details + +NVRAM Address 0x000-0x0ff not used + 0x100-0x26f initialised data + 0x270-0x7ff not used + +general layout + + header - 6 bytes, + data - 356 bytes (checksum is byte sum of this data) + trailer - 6 bytes + --- + total 368 bytes + +data area layout + + controller set up - 20 bytes + boot configuration - 56 bytes (4x14 bytes) + device set up - 128 bytes (16x8 bytes) + unused (spare?) - 152 bytes (19x8 bytes) + --- + total 356 bytes + +----------------------------------------------------------- +header + +00 00 - ?? start marker +64 01 - byte count (lsb/msb excludes header/trailer) +8e 0b - checksum (lsb/msb excludes header/trailer) +----------------------------------------------------------- +controller set up + +00 30 00 00 00 00 07 00 00 00 00 00 00 00 07 04 10 04 00 00 + | | | | + | | | -- host ID + | | | + | | --Removable Media Support + | | 0x00 = none + | | 0x01 = Bootable Device + | | 0x02 = All with Media + | | + | --flag bits 2 + | 0x00000001= scan order hi->low + | (default 0x00 - scan low->hi) + --flag bits 1 + 0x00000001 scam enable + 0x00000010 parity enable + 0x00000100 verbose boot msgs + +remaining bytes unknown - they do not appear to change in my +current set up for any of the controllers. + +default set up is identical for 53c810a and 53c875 NVRAM +(Removable Media added Symbios BIOS version 4.09) +----------------------------------------------------------- +boot configuration + +boot order set by order of the devices in this table + +04 00 0f 00 00 10 00 50 00 00 01 00 00 62 -- 1st controller +04 00 03 00 00 10 00 58 00 00 01 00 00 63 2nd controller +04 00 01 00 00 10 00 48 00 00 01 00 00 61 3rd controller +00 00 00 00 00 00 00 00 00 00 00 00 00 00 4th controller + | | | | | | | | + | | | | | | ---- PCI io port adr + | | | | | --0x01 init/scan at boot time + | | | | --PCI device/function number (0xdddddfff) + | | ----- ?? PCI vendor ID (lsb/msb) + ----PCI device ID (lsb/msb) + +?? use of this data is a guess but seems reasonable + +remaining bytes unknown - they do not appear to change in my +current set up + +default set up is identical for 53c810a and 53c875 NVRAM +----------------------------------------------------------- +device set up (up to 16 devices - includes controller) + +0f 00 08 08 64 00 0a 00 - id 0 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 + +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 +0f 00 08 08 64 00 0a 00 - id 15 + | | | | | | + | | | | ----timeout (lsb/msb) + | | | --synch period (0x?? 40 Mtrans/sec- fast 40) (probably 0x28) + | | | (0x30 20 Mtrans/sec- fast 20) + | | | (0x64 10 Mtrans/sec- fast ) + | | | (0xc8 5 Mtrans/sec) + | | | (0x00 asynchronous) + | | -- ?? max sync offset (0x08 in NVRAM on 53c810a) + | | (0x10 in NVRAM on 53c875) + | --device bus width (0x08 narrow) + | (0x10 16 bit wide) + --flag bits + 0x00000001 - disconnect enabled + 0x00000010 - scan at boot time + 0x00000100 - scan luns + 0x00001000 - queue tags enabled + +remaining bytes unknown - they do not appear to change in my +current set up + +?? use of this data is a guess but seems reasonable +(but it could be max bus width) + +default set up for 53c810a NVRAM +default set up for 53c875 NVRAM - bus width - 0x10 + - sync offset ? - 0x10 + - sync period - 0x30 +----------------------------------------------------------- +?? spare device space (32 bit bus ??) + +00 00 00 00 00 00 00 00 (19x8bytes) +. +. +00 00 00 00 00 00 00 00 + +default set up is identical for 53c810a and 53c875 NVRAM +----------------------------------------------------------- +trailer + +fe fe - ? end marker ? +00 00 +00 00 + +default set up is identical for 53c810a and 53c875 NVRAM +----------------------------------------------------------- + + + +17.3 Tekram NVRAM layout + +nvram 64x16 (1024 bit) + +Drive settings + +Drive ID 0-15 (addr 0x0yyyy0 = device setup, yyyy = ID) + (addr 0x0yyyy1 = 0x0000) + + x x x x x x x x x x x x x x x x + | | | | | | | | | + | | | | | | | | ----- parity check 0 - off + | | | | | | | | 1 - on + | | | | | | | | + | | | | | | | ------- sync neg 0 - off + | | | | | | | 1 - on + | | | | | | | + | | | | | | --------- disconnect 0 - off + | | | | | | 1 - on + | | | | | | + | | | | | ----------- start cmd 0 - off + | | | | | 1 - on + | | | | | + | | | | -------------- tagged cmds 0 - off + | | | | 1 - on + | | | | + | | | ---------------- wide neg 0 - off + | | | 1 - on + | | | + --------------------------- sync rate 0 - 10.0 Mtrans/sec + 1 - 8.0 + 2 - 6.6 + 3 - 5.7 + 4 - 5.0 + 5 - 4.0 + 6 - 3.0 + 7 - 2.0 + 7 - 2.0 + 8 - 20.0 + 9 - 16.7 + a - 13.9 + b - 11.9 + +Global settings + +Host flags 0 (addr 0x100000, 32) + + x x x x x x x x x x x x x x x x + | | | | | | | | | | | | + | | | | | | | | ----------- host ID 0x00 - 0x0f + | | | | | | | | + | | | | | | | ----------------------- support for 0 - off + | | | | | | | > 2 drives 1 - on + | | | | | | | + | | | | | | ------------------------- support drives 0 - off + | | | | | | > 1Gbytes 1 - on + | | | | | | + | | | | | --------------------------- bus reset on 0 - off + | | | | | power on 1 - on + | | | | | + | | | | ----------------------------- active neg 0 - off + | | | | 1 - on + | | | | + | | | -------------------------------- imm seek 0 - off + | | | 1 - on + | | | + | | ---------------------------------- scan luns 0 - off + | | 1 - on + | | + -------------------------------------- removable 0 - disable + as BIOS dev 1 - boot device + 2 - all + +Host flags 1 (addr 0x100001, 33) + + x x x x x x x x x x x x x x x x + | | | | | | + | | | --------- boot delay 0 - 3 sec + | | | 1 - 5 + | | | 2 - 10 + | | | 3 - 20 + | | | 4 - 30 + | | | 5 - 60 + | | | 6 - 120 + | | | + --------------------------- max tag cmds 0 - 2 + 1 - 4 + 2 - 8 + 3 - 16 + 4 - 32 + +Host flags 2 (addr 0x100010, 34) + + x x x x x x x x x x x x x x x x + | + ----- F2/F6 enable 0 - off ??? + 1 - on ??? + +checksum (addr 0x111111) + +checksum = 0x1234 - (sum addr 0-63) + +---------------------------------------------------------------------------- + +default nvram data: + +0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 +0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 +0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 +0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 + +0x0f07 0x0400 0x0001 0x0000 0x0000 0x0000 0x0000 0x0000 +0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 +0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 +0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0xfbbc + + +=============================================================================== +End of Linux SYM-2 driver documentation file diff --git a/drivers/scsi/sym53c8xx_2/Makefile b/drivers/scsi/sym53c8xx_2/Makefile new file mode 100644 index 000000000000..b352b03861e2 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/Makefile @@ -0,0 +1,16 @@ +# File: drivers/sym53c8xx/Makefile +# Makefile for the NCR/SYMBIOS/LSI 53C8XX PCI SCSI controllers driver. + +list-multi := sym53c8xx.o +sym53c8xx-objs := sym_fw.o sym_glue.o sym_hipd.o sym_malloc.o sym_misc.o sym_nvram.o +obj-$(CONFIG_SCSI_SYM53C8XX_2) := sym53c8xx.o + +EXTRA_CFLAGS += -I. + +sym53c8xx.o: $(sym53c8xx-objs) + $(LD) -r -o $@ $(sym53c8xx-objs) + +include $(TOPDIR)/Rules.make + +clean: + rm -f *.o diff --git a/drivers/scsi/sym53c8xx_2/sym53c8xx.h b/drivers/scsi/sym53c8xx_2/sym53c8xx.h new file mode 100644 index 000000000000..c7963af18ff1 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym53c8xx.h @@ -0,0 +1,370 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM53C8XX_H +#define SYM53C8XX_H + +#if !defined(LINUX_VERSION_CODE) +#include <linux/version.h> +#endif +#include <linux/config.h> + +/* + * Compatibility with ncr53c8xx and sym53c8xx configuration options. + */ +#ifndef CONFIG_SCSI_SYM53C8XX_IOMAPPED +#ifdef CONFIG_SCSI_NCR53C8XX_IOMAPPED +#define CONFIG_SCSI_SYM53C8XX_IOMAPPED CONFIG_SCSI_NCR53C8XX_IOMAPPED +#endif +#endif + +#ifndef CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS +#ifdef CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS +#define CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS +#endif +#endif + +#ifndef CONFIG_SCSI_SYM53C8XX_MAX_TAGS +#ifdef CONFIG_SCSI_NCR53C8XX_MAX_TAGS +#define CONFIG_SCSI_SYM53C8XX_MAX_TAGS CONFIG_SCSI_NCR53C8XX_MAX_TAGS +#endif +#endif + +int sym53c8xx_detect(Scsi_Host_Template *tpnt); +const char *sym53c8xx_info(struct Scsi_Host *host); + +int sym53c8xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); + +int sym53c8xx_eh_abort_handler(Scsi_Cmnd *); +int sym53c8xx_eh_device_reset_handler(Scsi_Cmnd *); +int sym53c8xx_eh_bus_reset_handler(Scsi_Cmnd *); +int sym53c8xx_eh_host_reset_handler(Scsi_Cmnd *); + +#ifdef MODULE +int sym53c8xx_release(struct Scsi_Host *); +#else +#define sym53c8xx_release NULL +#endif + + +/* + * Host template defintion + */ +#if (LINUX_VERSION_CODE >= 0x020400) || defined(HOSTS_C) || defined(MODULE) + +#include <scsi/scsicam.h> + +#define SYM53C8XX { \ + name: "sym53c8xx", \ + detect: sym53c8xx_detect, \ + release: sym53c8xx_release, \ + info: sym53c8xx_info, \ + queuecommand: sym53c8xx_queue_command, \ + use_new_eh_code: 1, \ + eh_abort_handler: sym53c8xx_eh_abort_handler, \ + eh_device_reset_handler:sym53c8xx_eh_device_reset_handler, \ + eh_bus_reset_handler: sym53c8xx_eh_bus_reset_handler, \ + eh_host_reset_handler: sym53c8xx_eh_host_reset_handler, \ + bios_param: scsicam_bios_param, \ + can_queue: 0, \ + this_id: 7, \ + sg_tablesize: 0, \ + cmd_per_lun: 0, \ + use_clustering: DISABLE_CLUSTERING} + +#endif /* defined(HOSTS_C) || defined(MODULE) */ + +/* + * Translate kernel configuration parameters + * into corresponding driver parameters. + */ +#if !defined(HOSTS_C) + +/* + * Use normal IO if configured. Forced for alpha and powerpc. + * Powerpc fails copying to on-chip RAM using memcpy_toio(). + * Forced to MMIO for sparc. + */ +#if defined(__alpha__) +#define SYM_CONF_IOMAPPED +#elif defined(__powerpc__) +#define SYM_CONF_IOMAPPED +#define SYM_OPT_NO_BUS_MEMORY_MAPPING +#elif defined(__sparc__) +#undef SYM_CONF_IOMAPPED +#elif defined(CONFIG_SCSI_SYM53C8XX_IOMAPPED) +#define SYM_CONF_IOMAPPED +#endif + +/* + * DMA addressing mode. + * + * 0 : 32 bit addressing for all chips. + * 1 : 40 bit addressing when supported by chip. + * 2 : 64 bit addressing when supported by chip, + * limited to 16 segments of 4 GB -> 64 GB max. + */ +#ifdef CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE +#define SYM_CONF_DMA_ADDRESSING_MODE CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE +#endif + +/* + * NCR PQS/PDS special device support. + */ +#if 1 +#define SYM_CONF_PQS_PDS_SUPPORT +#endif + +/* + * NVRAM support. + */ +#if 1 +#define SYM_CONF_NVRAM_SUPPORT (1) +#define SYM_SETUP_SYMBIOS_NVRAM (1) +#define SYM_SETUP_TEKRAM_NVRAM (1) +#endif + +/* + * These options are not tunable from 'make config' + */ +#if 1 +#define SYM_LINUX_PROC_INFO_SUPPORT +#define SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT +#define SYM_LINUX_USER_COMMAND_SUPPORT +#define SYM_LINUX_USER_INFO_SUPPORT +#define SYM_LINUX_DEBUG_CONTROL_SUPPORT +#endif + +/* + * Also handle old NCR chips if not (0). + */ +#define SYM_CONF_GENERIC_SUPPORT (1) + +/* + * Allow tags from 2 to 256, default 8 + */ +#ifndef CONFIG_SCSI_SYM53C8XX_MAX_TAGS +#define CONFIG_SCSI_SYM53C8XX_MAX_TAGS (8) +#endif + +#if CONFIG_SCSI_SYM53C8XX_MAX_TAGS < 2 +#define SYM_CONF_MAX_TAG (2) +#elif CONFIG_SCSI_SYM53C8XX_MAX_TAGS > 256 +#define SYM_CONF_MAX_TAG (256) +#else +#define SYM_CONF_MAX_TAG CONFIG_SCSI_SYM53C8XX_MAX_TAGS +#endif + +#ifndef CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS +#define CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS SYM_CONF_MAX_TAG +#endif + +/* + * Anyway, we configure the driver for at least 64 tags per LUN. :) + */ +#if SYM_CONF_MAX_TAG <= 64 +#define SYM_CONF_MAX_TAG_ORDER (6) +#elif SYM_CONF_MAX_TAG <= 128 +#define SYM_CONF_MAX_TAG_ORDER (7) +#else +#define SYM_CONF_MAX_TAG_ORDER (8) +#endif + +/* + * Sync transfer frequency at startup. + * Allow up to ULTRA-160. The driver will scale the value + * according to controller capabilities. + */ +#define CONFIG_SCSI_SYM53C8XX_DEFAULT_SYNC (9) + +/* + * Max number of SG entries. + */ +#define SYM_CONF_MAX_SG (96) + +/* + * Max number of LUNs per target. + */ +#if 1 /* defined CONFIG_SCSI_MULTI_LUN */ +#define CONFIG_SCSI_SYM53C8XX_MAX_LUN (16) +#else +#define CONFIG_SCSI_SYM53C8XX_MAX_LUN (1) +#endif + +/* + * Driver setup structure. + * + * This structure is initialized from linux config options. + * It can be overridden at boot-up by the boot command line. + */ +struct sym_driver_setup { + u_char pci_parity; + u_char scsi_parity; + u_short max_tag; + u_char min_sync; + u_char burst_order; + u_char scsi_led; + u_char max_wide; + u_char scsi_diff; + u_char irq_mode; + u_char scsi_bus_check; + u_char host_id; + u_char max_offs; + u_char max_lun; + u_char pci_fix_up; + + u_char reverse_probe; + u_char verbose; + u_short debug; + u_char settle_delay; + u_char use_nvram; + u_long excludes[8]; + char tag_ctrl[100]; +}; + +#define SYM_SETUP_PCI_PARITY sym_driver_setup.pci_parity +#define SYM_SETUP_SCSI_PARITY sym_driver_setup.scsi_parity +#define SYM_SETUP_MAX_TAG sym_driver_setup.max_tag +#define SYM_SETUP_MIN_SYNC sym_driver_setup.min_sync +#define SYM_SETUP_BURST_ORDER sym_driver_setup.burst_order +#define SYM_SETUP_SCSI_LED sym_driver_setup.scsi_led +#define SYM_SETUP_MAX_WIDE sym_driver_setup.max_wide +#define SYM_SETUP_SCSI_DIFF sym_driver_setup.scsi_diff +#define SYM_SETUP_IRQ_MODE sym_driver_setup.irq_mode +#define SYM_SETUP_SCSI_BUS_CHECK sym_driver_setup.scsi_bus_check +#define SYM_SETUP_HOST_ID sym_driver_setup.host_id +#define SYM_SETUP_MAX_OFFS sym_driver_setup.max_offs +#define SYM_SETUP_MAX_LUN sym_driver_setup.max_lun +#define SYM_SETUP_PCI_FIX_UP sym_driver_setup.pci_fix_up + +/* + * Initial setup. + * + * Can be overriden at startup by a command line. + */ +#define SYM_LINUX_DRIVER_SETUP \ +{ \ + 1, /* pci_parity */ \ + 1, /* scsi_parity */ \ + CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS, \ + CONFIG_SCSI_SYM53C8XX_DEFAULT_SYNC, \ + 7, /* burst_order */ \ + 1, /* scsi_led */ \ + 1, /* max_wide */ \ + 1, /* scsi_diff */ \ + 0, /* irq_mode */ \ + 1, /* scsi_bus_check */ \ + 7, /* host_id */ \ + 62, /* max_offs */ \ + CONFIG_SCSI_SYM53C8XX_MAX_LUN, \ + 3, /* pci_fix_up */ \ + 0, /* reverse_probe */ \ + 0, /* verbose */ \ + 0, /* debug */ \ + 3, /* settle_delay */ \ + 1, /* use_nvram */ \ +} + +/* + * Boot fail safe setup. + * + * Override initial setup from boot command line: + * sym53c8xx=safe:y + */ +#define SYM_LINUX_DRIVER_SAFE_SETUP \ +{ \ + 0, /* pci_parity */ \ + 0, /* scsi_parity */ \ + 0, /* max_tag */ \ + 50, /* min_sync */ \ + 0, /* burst_order */ \ + 0, /* scsi_led */ \ + 1, /* max_wide */ \ + 1, /* scsi_diff */ \ + 0, /* irq_mode */ \ + 2, /* scsi_bus_check */ \ + 7, /* host_id */ \ + 15, /* max_offs */ \ + 1, /* max_lun */ \ + 0, /* pci_fix_up */ \ + 0, /* reverse_probe */ \ + 2, /* verbose */ \ + 0, /* debug */ \ + 10, /* settle_delay */ \ + 1, /* use_nvram */ \ +} + +/* + * This structure is initialized from linux config options. + * It can be overridden at boot-up by the boot command line. + */ +#ifdef SYM_GLUE_C +struct sym_driver_setup + sym_driver_setup = SYM_LINUX_DRIVER_SETUP; +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT +u_int sym_debug_flags = 0; +#endif +#else +extern struct sym_driver_setup sym_driver_setup; +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT +extern u_int sym_debug_flags; +#endif +#endif /* SYM_GLUE_C */ + +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT +#define DEBUG_FLAGS sym_debug_flags +#endif +#define boot_verbose sym_driver_setup.verbose + +#endif /* !defined(HOSTS_C) */ + +#endif /* SYM53C8XX_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_conf.h b/drivers/scsi/sym53c8xx_2/sym_conf.h new file mode 100644 index 000000000000..0877411a8fe7 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_conf.h @@ -0,0 +1,329 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM_CONF_H +#define SYM_CONF_H + +/*------------------------------------------------------------------- + * Static configuration. + *------------------------------------------------------------------- + */ + +/* + * Also support early NCR 810, 815 and 825 chips. + */ +#ifndef SYM_CONF_GENERIC_SUPPORT +#define SYM_CONF_GENERIC_SUPPORT (1) +#endif + +/* + * Use Normal IO instead of MMIO. + */ +/* #define SYM_CONF_IOMAPPED */ + +/* + * Max tags for a device (logical unit) + * We use a power of 2, (7) means 2<<7=128 + * Maximum is 8 -> 256 tags + */ +#ifndef SYM_CONF_MAX_TAG_ORDER +#define SYM_CONF_MAX_TAG_ORDER (6) +#endif + +/* + * Max number of scatter/gather entries for en IO. + * Each entry costs 8 bytes in the internal CCB data structure. + */ +#ifndef SYM_CONF_MAX_SG +#define SYM_CONF_MAX_SG (33) +#endif + +/* + * Max number of targets. + * Maximum is 16 and you are advised not to change this value. + */ +#ifndef SYM_CONF_MAX_TARGET +#define SYM_CONF_MAX_TARGET (16) +#endif + +/* + * Max number of logical units. + * SPI-2 allows up to 64 logical units, but in real life, target + * that implements more that 7 logical units are pretty rare. + * Anyway, the cost of accepting up to 64 logical unit is low in + * this driver, thus going with the maximum is acceptable. + */ +#ifndef SYM_CONF_MAX_LUN +#define SYM_CONF_MAX_LUN (64) +#endif + +/* + * Max number of IO control blocks queued to the controller. + * Each entry needs 8 bytes and the queues are allocated contiguously. + * Since we donnot want to allocate more than a page, the theorical + * maximum is PAGE_SIZE/8. For safety, we announce a bit less to the + * access method. :) + * When not supplied, as it is suggested, the driver compute some + * good value for this parameter. + */ +/* #define SYM_CONF_MAX_START (PAGE_SIZE/8 - 16) */ + +/* + * Support for NVRAM. + */ +#ifndef SYM_CONF_NVRAM_SUPPORT +#define SYM_CONF_NVRAM_SUPPORT (1) +#endif + +/* + * Support for Immediate Arbitration. + * Not advised. + */ +/* #define SYM_CONF_IARB_SUPPORT */ + +/* + * Support for some PCI fix-ups (or assumed so). + */ +#define SYM_CONF_PCI_FIX_UP + +/* + * Number of lists for the optimization of the IO timeout handling. + * Not used under FreeBSD and Linux. + */ +#ifndef SYM_CONF_TIMEOUT_ORDER_MAX +#define SYM_CONF_TIMEOUT_ORDER_MAX (8) +#endif + +/* + * How the driver handles DMA addressing of user data. + * 0 : 32 bit addressing + * 1 : 40 bit addressing + * 2 : 64 bit addressing using segment registers + */ +#ifndef SYM_CONF_DMA_ADDRESSING_MODE +#define SYM_CONF_DMA_ADDRESSING_MODE (0) +#endif + +/*------------------------------------------------------------------- + * Configuration that could be dynamic if it was possible + * to pass arguments to the driver. + *------------------------------------------------------------------- + */ + +/* + * HOST default scsi id. + */ +#ifndef SYM_SETUP_HOST_ID +#define SYM_SETUP_HOST_ID 7 +#endif + +/* + * Max synchronous transfers. + */ +#ifndef SYM_SETUP_MIN_SYNC +#define SYM_SETUP_MIN_SYNC (9) +#endif + +/* + * Max wide order. + */ +#ifndef SYM_SETUP_MAX_WIDE +#define SYM_SETUP_MAX_WIDE (1) +#endif + +/* + * Max SCSI offset. + */ +#ifndef SYM_SETUP_MAX_OFFS +#define SYM_SETUP_MAX_OFFS (63) +#endif + +/* + * Default number of tags. + */ +#ifndef SYM_SETUP_MAX_TAG +#define SYM_SETUP_MAX_TAG (1<<SYM_CONF_MAX_TAG_ORDER) +#endif + +/* + * SYMBIOS NVRAM format support. + */ +#ifndef SYM_SETUP_SYMBIOS_NVRAM +#define SYM_SETUP_SYMBIOS_NVRAM (1) +#endif + +/* + * TEKRAM NVRAM format support. + */ +#ifndef SYM_SETUP_TEKRAM_NVRAM +#define SYM_SETUP_TEKRAM_NVRAM (1) +#endif + +/* + * PCI parity checking. + * It should not be an option, but some poor or broken + * PCI-HOST bridges have been reported to make problems + * when this feature is enabled. + * Setting this option to 0 tells the driver not to + * enable the checking against PCI parity. + */ +#ifndef SYM_SETUP_PCI_PARITY +#define SYM_SETUP_PCI_PARITY (2) +#endif + +/* + * SCSI parity checking. + */ +#ifndef SYM_SETUP_SCSI_PARITY +#define SYM_SETUP_SCSI_PARITY (1) +#endif + +/* + * SCSI activity LED. + */ +#ifndef SYM_SETUP_SCSI_LED +#define SYM_SETUP_SCSI_LED (0) +#endif + +/* + * SCSI High Voltage Differential support. + * + * HVD/LVD/SE capable controllers (895, 895A, 896, 1010) + * report the actual SCSI BUS mode from the STEST4 IO + * register. + * + * But for HVD/SE only capable chips (825a, 875, 885), + * the driver uses some heuristic to probe against HVD. + * Normally, the chip senses the DIFFSENS signal and + * should switch its BUS tranceivers to high impedance + * in situation of the driver having been wrong about + * the actual BUS mode. May-be, the BUS mode probing of + * the driver is safe, but, given that it may be partially + * based on some previous IO register settings, it + * cannot be stated so. Thus, decision has been taken + * to require a user option to be set for the DIFF probing + * to be applied for the 825a, 875 and 885 chips. + * + * This setup option works as follows: + * + * 0 -> HVD only supported for 895, 895A, 896, 1010. + * 1 -> HVD probed for 825A, 875, 885. + * 2 -> HVD assumed for 825A, 875, 885 (not advised). + */ +#ifndef SYM_SETUP_SCSI_DIFF +#define SYM_SETUP_SCSI_DIFF (0) +#endif + +/* + * IRQ mode. + */ +#ifndef SYM_SETUP_IRQ_MODE +#define SYM_SETUP_IRQ_MODE (0) +#endif + +/* + * Check SCSI BUS signal on reset. + */ +#ifndef SYM_SETUP_SCSI_BUS_CHECK +#define SYM_SETUP_SCSI_BUS_CHECK (1) +#endif + +/* + * Max burst for PCI (1<<value) + * 7 means: (1<<7) = 128 DWORDS. + */ +#ifndef SYM_SETUP_BURST_ORDER +#define SYM_SETUP_BURST_ORDER (7) +#endif + +/* + * Only relevant if IARB support configured. + * - Max number of successive settings of IARB hints. + * - Set IARB on arbitration lost. + */ +#define SYM_CONF_IARB_MAX 3 +#define SYM_CONF_SET_IARB_ON_ARB_LOST 1 + +/* + * Returning wrong residuals may make problems. + * When zero, this define tells the driver to + * always return 0 as transfer residual. + * Btw, all my testings of residuals have succeeded. + */ +#define SYM_SETUP_RESIDUAL_SUPPORT 1 + +/* + * Supported maximum number of LUNs to announce to + * the access method. + * The driver supports up to 64 LUNs per target as + * required by SPI-2/SPI-3. However some SCSI devices + * designed prior to these specifications or not being + * conformant may be highly confused when they are + * asked about a LUN > 7. + */ +#ifndef SYM_SETUP_MAX_LUN +#define SYM_SETUP_MAX_LUN (8) +#endif + +/* + * Bits indicating what kind of fix-ups we want. + * + * Bit 0 (1) : cache line size configuration register. + * Bit 1 (2) : MWI bit in command register. + * Bit 2 (4) : latency timer if seems too low. + */ + +#ifndef SYM_SETUP_PCI_FIX_UP +#define SYM_SETUP_PCI_FIX_UP (3) +#endif + +#endif /* SYM_CONF_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_defs.h b/drivers/scsi/sym53c8xx_2/sym_defs.h new file mode 100644 index 000000000000..70649d107d6a --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_defs.h @@ -0,0 +1,957 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM_DEFS_H +#define SYM_DEFS_H + +/* + * Vendor. + */ +#define PCI_VENDOR_NCR 0x1000 + +/* + * PCI device identifier of SYMBIOS chips. + */ +#define PCI_ID_SYM53C810 1 +#define PCI_ID_SYM53C810AP 5 +#define PCI_ID_SYM53C815 4 +#define PCI_ID_SYM53C820 2 +#define PCI_ID_SYM53C825 3 +#define PCI_ID_SYM53C860 6 +#define PCI_ID_SYM53C875 0xf +#define PCI_ID_SYM53C875_2 0x8f +#define PCI_ID_SYM53C885 0xd +#define PCI_ID_SYM53C895 0xc +#define PCI_ID_SYM53C896 0xb +#define PCI_ID_SYM53C895A 0x12 +#define PCI_ID_SYM53C875A 0x13 +#define PCI_ID_LSI53C1010 0x20 +#define PCI_ID_LSI53C1010_2 0x21 +#define PCI_ID_LSI53C1510D 0xa + +/* + * SYM53C8XX device features descriptor. + */ +struct sym_pci_chip { + u_short device_id; + u_short revision_id; + char *name; + u_char burst_max; /* log-base-2 of max burst */ + u_char offset_max; + u_char nr_divisor; + u_char lp_probe_bit; + u_int features; +#define FE_LED0 (1<<0) +#define FE_WIDE (1<<1) /* Wide data transfers */ +#define FE_ULTRA (1<<2) /* Ultra speed 20Mtrans/sec */ +#define FE_ULTRA2 (1<<3) /* Ultra 2 - 40 Mtrans/sec */ +#define FE_DBLR (1<<4) /* Clock doubler present */ +#define FE_QUAD (1<<5) /* Clock quadrupler present */ +#define FE_ERL (1<<6) /* Enable read line */ +#define FE_CLSE (1<<7) /* Cache line size enable */ +#define FE_WRIE (1<<8) /* Write & Invalidate enable */ +#define FE_ERMP (1<<9) /* Enable read multiple */ +#define FE_BOF (1<<10) /* Burst opcode fetch */ +#define FE_DFS (1<<11) /* DMA fifo size */ +#define FE_PFEN (1<<12) /* Prefetch enable */ +#define FE_LDSTR (1<<13) /* Load/Store supported */ +#define FE_RAM (1<<14) /* On chip RAM present */ +#define FE_VARCLK (1<<15) /* Clock frequency may vary */ +#define FE_RAM8K (1<<16) /* On chip RAM sized 8Kb */ +#define FE_64BIT (1<<17) /* 64-bit PCI BUS interface */ +#define FE_IO256 (1<<18) /* Requires full 256 bytes in PCI space */ +#define FE_NOPM (1<<19) /* Scripts handles phase mismatch */ +#define FE_LEDC (1<<20) /* Hardware control of LED */ +#define FE_ULTRA3 (1<<21) /* Ultra 3 - 80 Mtrans/sec DT */ +#define FE_66MHZ (1<<22) /* 66MHz PCI support */ +#define FE_CRC (1<<23) /* CRC support */ +#define FE_DIFF (1<<24) /* SCSI HVD support */ +#define FE_DFBC (1<<25) /* Have DFBC register */ +#define FE_LCKFRQ (1<<26) /* Have LCKFRQ */ +#define FE_C10 (1<<27) /* Various C10 core (mis)features */ +#define FE_U3EN (1<<28) /* U3EN bit usable */ +#define FE_DAC (1<<29) /* Support PCI DAC (64 bit addressing) */ +#define FE_ISTAT1 (1<<30) /* Have ISTAT1, MBOX0, MBOX1 registers */ + +#define FE_CACHE_SET (FE_ERL|FE_CLSE|FE_WRIE|FE_ERMP) +#define FE_CACHE0_SET (FE_CACHE_SET & ~FE_ERL) +}; + +/* + * Symbios NVRAM data format + */ +#define SYMBIOS_NVRAM_SIZE 368 +#define SYMBIOS_NVRAM_ADDRESS 0x100 + +struct Symbios_nvram { +/* Header 6 bytes */ + u_short type; /* 0x0000 */ + u_short byte_count; /* excluding header/trailer */ + u_short checksum; + +/* Controller set up 20 bytes */ + u_char v_major; /* 0x00 */ + u_char v_minor; /* 0x30 */ + u32 boot_crc; + u_short flags; +#define SYMBIOS_SCAM_ENABLE (1) +#define SYMBIOS_PARITY_ENABLE (1<<1) +#define SYMBIOS_VERBOSE_MSGS (1<<2) +#define SYMBIOS_CHS_MAPPING (1<<3) +#define SYMBIOS_NO_NVRAM (1<<3) /* ??? */ + u_short flags1; +#define SYMBIOS_SCAN_HI_LO (1) + u_short term_state; +#define SYMBIOS_TERM_CANT_PROGRAM (0) +#define SYMBIOS_TERM_ENABLED (1) +#define SYMBIOS_TERM_DISABLED (2) + u_short rmvbl_flags; +#define SYMBIOS_RMVBL_NO_SUPPORT (0) +#define SYMBIOS_RMVBL_BOOT_DEVICE (1) +#define SYMBIOS_RMVBL_MEDIA_INSTALLED (2) + u_char host_id; + u_char num_hba; /* 0x04 */ + u_char num_devices; /* 0x10 */ + u_char max_scam_devices; /* 0x04 */ + u_char num_valid_scam_devices; /* 0x00 */ + u_char flags2; +#define SYMBIOS_AVOID_BUS_RESET (1<<2) + +/* Boot order 14 bytes * 4 */ + struct Symbios_host{ + u_short type; /* 4:8xx / 0:nok */ + u_short device_id; /* PCI device id */ + u_short vendor_id; /* PCI vendor id */ + u_char bus_nr; /* PCI bus number */ + u_char device_fn; /* PCI device/function number << 3*/ + u_short word8; + u_short flags; +#define SYMBIOS_INIT_SCAN_AT_BOOT (1) + u_short io_port; /* PCI io_port address */ + } host[4]; + +/* Targets 8 bytes * 16 */ + struct Symbios_target { + u_char flags; +#define SYMBIOS_DISCONNECT_ENABLE (1) +#define SYMBIOS_SCAN_AT_BOOT_TIME (1<<1) +#define SYMBIOS_SCAN_LUNS (1<<2) +#define SYMBIOS_QUEUE_TAGS_ENABLED (1<<3) + u_char rsvd; + u_char bus_width; /* 0x08/0x10 */ + u_char sync_offset; + u_short sync_period; /* 4*period factor */ + u_short timeout; + } target[16]; +/* Scam table 8 bytes * 4 */ + struct Symbios_scam { + u_short id; + u_short method; +#define SYMBIOS_SCAM_DEFAULT_METHOD (0) +#define SYMBIOS_SCAM_DONT_ASSIGN (1) +#define SYMBIOS_SCAM_SET_SPECIFIC_ID (2) +#define SYMBIOS_SCAM_USE_ORDER_GIVEN (3) + u_short status; +#define SYMBIOS_SCAM_UNKNOWN (0) +#define SYMBIOS_SCAM_DEVICE_NOT_FOUND (1) +#define SYMBIOS_SCAM_ID_NOT_SET (2) +#define SYMBIOS_SCAM_ID_VALID (3) + u_char target_id; + u_char rsvd; + } scam[4]; + + u_char spare_devices[15*8]; + u_char trailer[6]; /* 0xfe 0xfe 0x00 0x00 0x00 0x00 */ +}; +typedef struct Symbios_nvram Symbios_nvram; +typedef struct Symbios_host Symbios_host; +typedef struct Symbios_target Symbios_target; +typedef struct Symbios_scam Symbios_scam; + +/* + * Tekram NvRAM data format. + */ +#define TEKRAM_NVRAM_SIZE 64 +#define TEKRAM_93C46_NVRAM_ADDRESS 0 +#define TEKRAM_24C16_NVRAM_ADDRESS 0x40 + +struct Tekram_nvram { + struct Tekram_target { + u_char flags; +#define TEKRAM_PARITY_CHECK (1) +#define TEKRAM_SYNC_NEGO (1<<1) +#define TEKRAM_DISCONNECT_ENABLE (1<<2) +#define TEKRAM_START_CMD (1<<3) +#define TEKRAM_TAGGED_COMMANDS (1<<4) +#define TEKRAM_WIDE_NEGO (1<<5) + u_char sync_index; + u_short word2; + } target[16]; + u_char host_id; + u_char flags; +#define TEKRAM_MORE_THAN_2_DRIVES (1) +#define TEKRAM_DRIVES_SUP_1GB (1<<1) +#define TEKRAM_RESET_ON_POWER_ON (1<<2) +#define TEKRAM_ACTIVE_NEGATION (1<<3) +#define TEKRAM_IMMEDIATE_SEEK (1<<4) +#define TEKRAM_SCAN_LUNS (1<<5) +#define TEKRAM_REMOVABLE_FLAGS (3<<6) /* 0: disable; */ + /* 1: boot device; 2:all */ + u_char boot_delay_index; + u_char max_tags_index; + u_short flags1; +#define TEKRAM_F2_F6_ENABLED (1) + u_short spare[29]; +}; +typedef struct Tekram_nvram Tekram_nvram; +typedef struct Tekram_target Tekram_target; + +/* + * SYM53C8XX IO register data structure. + */ +struct sym_reg { +/*00*/ u8 nc_scntl0; /* full arb., ena parity, par->ATN */ + +/*01*/ u8 nc_scntl1; /* no reset */ + #define ISCON 0x10 /* connected to scsi */ + #define CRST 0x08 /* force reset */ + #define IARB 0x02 /* immediate arbitration */ + +/*02*/ u8 nc_scntl2; /* no disconnect expected */ + #define SDU 0x80 /* cmd: disconnect will raise error */ + #define CHM 0x40 /* sta: chained mode */ + #define WSS 0x08 /* sta: wide scsi send [W]*/ + #define WSR 0x01 /* sta: wide scsi received [W]*/ + +/*03*/ u8 nc_scntl3; /* cnf system clock dependent */ + #define EWS 0x08 /* cmd: enable wide scsi [W]*/ + #define ULTRA 0x80 /* cmd: ULTRA enable */ + /* bits 0-2, 7 rsvd for C1010 */ + +/*04*/ u8 nc_scid; /* cnf host adapter scsi address */ + #define RRE 0x40 /* r/w:e enable response to resel. */ + #define SRE 0x20 /* r/w:e enable response to select */ + +/*05*/ u8 nc_sxfer; /* ### Sync speed and count */ + /* bits 6-7 rsvd for C1010 */ + +/*06*/ u8 nc_sdid; /* ### Destination-ID */ + +/*07*/ u8 nc_gpreg; /* ??? IO-Pins */ + +/*08*/ u8 nc_sfbr; /* ### First byte received */ + +/*09*/ u8 nc_socl; + #define CREQ 0x80 /* r/w: SCSI-REQ */ + #define CACK 0x40 /* r/w: SCSI-ACK */ + #define CBSY 0x20 /* r/w: SCSI-BSY */ + #define CSEL 0x10 /* r/w: SCSI-SEL */ + #define CATN 0x08 /* r/w: SCSI-ATN */ + #define CMSG 0x04 /* r/w: SCSI-MSG */ + #define CC_D 0x02 /* r/w: SCSI-C_D */ + #define CI_O 0x01 /* r/w: SCSI-I_O */ + +/*0a*/ u8 nc_ssid; + +/*0b*/ u8 nc_sbcl; + +/*0c*/ u8 nc_dstat; + #define DFE 0x80 /* sta: dma fifo empty */ + #define MDPE 0x40 /* int: master data parity error */ + #define BF 0x20 /* int: script: bus fault */ + #define ABRT 0x10 /* int: script: command aborted */ + #define SSI 0x08 /* int: script: single step */ + #define SIR 0x04 /* int: script: interrupt instruct. */ + #define IID 0x01 /* int: script: illegal instruct. */ + +/*0d*/ u8 nc_sstat0; + #define ILF 0x80 /* sta: data in SIDL register lsb */ + #define ORF 0x40 /* sta: data in SODR register lsb */ + #define OLF 0x20 /* sta: data in SODL register lsb */ + #define AIP 0x10 /* sta: arbitration in progress */ + #define LOA 0x08 /* sta: arbitration lost */ + #define WOA 0x04 /* sta: arbitration won */ + #define IRST 0x02 /* sta: scsi reset signal */ + #define SDP 0x01 /* sta: scsi parity signal */ + +/*0e*/ u8 nc_sstat1; + #define FF3210 0xf0 /* sta: bytes in the scsi fifo */ + +/*0f*/ u8 nc_sstat2; + #define ILF1 0x80 /* sta: data in SIDL register msb[W]*/ + #define ORF1 0x40 /* sta: data in SODR register msb[W]*/ + #define OLF1 0x20 /* sta: data in SODL register msb[W]*/ + #define DM 0x04 /* sta: DIFFSENS mismatch (895/6 only) */ + #define LDSC 0x02 /* sta: disconnect & reconnect */ + +/*10*/ u8 nc_dsa; /* --> Base page */ +/*11*/ u8 nc_dsa1; +/*12*/ u8 nc_dsa2; +/*13*/ u8 nc_dsa3; + +/*14*/ u8 nc_istat; /* --> Main Command and status */ + #define CABRT 0x80 /* cmd: abort current operation */ + #define SRST 0x40 /* mod: reset chip */ + #define SIGP 0x20 /* r/w: message from host to script */ + #define SEM 0x10 /* r/w: message between host + script */ + #define CON 0x08 /* sta: connected to scsi */ + #define INTF 0x04 /* sta: int on the fly (reset by wr)*/ + #define SIP 0x02 /* sta: scsi-interrupt */ + #define DIP 0x01 /* sta: host/script interrupt */ + +/*15*/ u8 nc_istat1; /* 896 only */ + #define FLSH 0x04 /* sta: chip is flushing */ + #define SCRUN 0x02 /* sta: scripts are running */ + #define SIRQD 0x01 /* r/w: disable INT pin */ + +/*16*/ u8 nc_mbox0; /* 896 only */ +/*17*/ u8 nc_mbox1; /* 896 only */ + +/*18*/ u8 nc_ctest0; +/*19*/ u8 nc_ctest1; + +/*1a*/ u8 nc_ctest2; + #define CSIGP 0x40 + /* bits 0-2,7 rsvd for C1010 */ + +/*1b*/ u8 nc_ctest3; + #define FLF 0x08 /* cmd: flush dma fifo */ + #define CLF 0x04 /* cmd: clear dma fifo */ + #define FM 0x02 /* mod: fetch pin mode */ + #define WRIE 0x01 /* mod: write and invalidate enable */ + /* bits 4-7 rsvd for C1010 */ + +/*1c*/ u32 nc_temp; /* ### Temporary stack */ + +/*20*/ u8 nc_dfifo; +/*21*/ u8 nc_ctest4; + #define BDIS 0x80 /* mod: burst disable */ + #define MPEE 0x08 /* mod: master parity error enable */ + +/*22*/ u8 nc_ctest5; + #define DFS 0x20 /* mod: dma fifo size */ + /* bits 0-1, 3-7 rsvd for C1010 */ + +/*23*/ u8 nc_ctest6; + +/*24*/ u32 nc_dbc; /* ### Byte count and command */ +/*28*/ u32 nc_dnad; /* ### Next command register */ +/*2c*/ u32 nc_dsp; /* --> Script Pointer */ +/*30*/ u32 nc_dsps; /* --> Script pointer save/opcode#2 */ + +/*34*/ u8 nc_scratcha; /* Temporary register a */ +/*35*/ u8 nc_scratcha1; +/*36*/ u8 nc_scratcha2; +/*37*/ u8 nc_scratcha3; + +/*38*/ u8 nc_dmode; + #define BL_2 0x80 /* mod: burst length shift value +2 */ + #define BL_1 0x40 /* mod: burst length shift value +1 */ + #define ERL 0x08 /* mod: enable read line */ + #define ERMP 0x04 /* mod: enable read multiple */ + #define BOF 0x02 /* mod: burst op code fetch */ + +/*39*/ u8 nc_dien; +/*3a*/ u8 nc_sbr; + +/*3b*/ u8 nc_dcntl; /* --> Script execution control */ + #define CLSE 0x80 /* mod: cache line size enable */ + #define PFF 0x40 /* cmd: pre-fetch flush */ + #define PFEN 0x20 /* mod: pre-fetch enable */ + #define SSM 0x10 /* mod: single step mode */ + #define IRQM 0x08 /* mod: irq mode (1 = totem pole !) */ + #define STD 0x04 /* cmd: start dma mode */ + #define IRQD 0x02 /* mod: irq disable */ + #define NOCOM 0x01 /* cmd: protect sfbr while reselect */ + /* bits 0-1 rsvd for C1010 */ + +/*3c*/ u32 nc_adder; + +/*40*/ u16 nc_sien; /* -->: interrupt enable */ +/*42*/ u16 nc_sist; /* <--: interrupt status */ + #define SBMC 0x1000/* sta: SCSI Bus Mode Change (895/6 only) */ + #define STO 0x0400/* sta: timeout (select) */ + #define GEN 0x0200/* sta: timeout (general) */ + #define HTH 0x0100/* sta: timeout (handshake) */ + #define MA 0x80 /* sta: phase mismatch */ + #define CMP 0x40 /* sta: arbitration complete */ + #define SEL 0x20 /* sta: selected by another device */ + #define RSL 0x10 /* sta: reselected by another device*/ + #define SGE 0x08 /* sta: gross error (over/underflow)*/ + #define UDC 0x04 /* sta: unexpected disconnect */ + #define RST 0x02 /* sta: scsi bus reset detected */ + #define PAR 0x01 /* sta: scsi parity error */ + +/*44*/ u8 nc_slpar; +/*45*/ u8 nc_swide; +/*46*/ u8 nc_macntl; +/*47*/ u8 nc_gpcntl; +/*48*/ u8 nc_stime0; /* cmd: timeout for select&handshake*/ +/*49*/ u8 nc_stime1; /* cmd: timeout user defined */ +/*4a*/ u16 nc_respid; /* sta: Reselect-IDs */ + +/*4c*/ u8 nc_stest0; + +/*4d*/ u8 nc_stest1; + #define SCLK 0x80 /* Use the PCI clock as SCSI clock */ + #define DBLEN 0x08 /* clock doubler running */ + #define DBLSEL 0x04 /* clock doubler selected */ + + +/*4e*/ u8 nc_stest2; + #define ROF 0x40 /* reset scsi offset (after gross error!) */ + #define EXT 0x02 /* extended filtering */ + +/*4f*/ u8 nc_stest3; + #define TE 0x80 /* c: tolerAnt enable */ + #define HSC 0x20 /* c: Halt SCSI Clock */ + #define CSF 0x02 /* c: clear scsi fifo */ + +/*50*/ u16 nc_sidl; /* Lowlevel: latched from scsi data */ +/*52*/ u8 nc_stest4; + #define SMODE 0xc0 /* SCSI bus mode (895/6 only) */ + #define SMODE_HVD 0x40 /* High Voltage Differential */ + #define SMODE_SE 0x80 /* Single Ended */ + #define SMODE_LVD 0xc0 /* Low Voltage Differential */ + #define LCKFRQ 0x20 /* Frequency Lock (895/6 only) */ + /* bits 0-5 rsvd for C1010 */ + +/*53*/ u8 nc_53_; +/*54*/ u16 nc_sodl; /* Lowlevel: data out to scsi data */ +/*56*/ u8 nc_ccntl0; /* Chip Control 0 (896) */ + #define ENPMJ 0x80 /* Enable Phase Mismatch Jump */ + #define PMJCTL 0x40 /* Phase Mismatch Jump Control */ + #define ENNDJ 0x20 /* Enable Non Data PM Jump */ + #define DISFC 0x10 /* Disable Auto FIFO Clear */ + #define DILS 0x02 /* Disable Internal Load/Store */ + #define DPR 0x01 /* Disable Pipe Req */ + +/*57*/ u8 nc_ccntl1; /* Chip Control 1 (896) */ + #define ZMOD 0x80 /* High Impedance Mode */ + #define DDAC 0x08 /* Disable Dual Address Cycle */ + #define XTIMOD 0x04 /* 64-bit Table Ind. Indexing Mode */ + #define EXTIBMV 0x02 /* Enable 64-bit Table Ind. BMOV */ + #define EXDBMV 0x01 /* Enable 64-bit Direct BMOV */ + +/*58*/ u16 nc_sbdl; /* Lowlevel: data from scsi data */ +/*5a*/ u16 nc_5a_; + +/*5c*/ u8 nc_scr0; /* Working register B */ +/*5d*/ u8 nc_scr1; +/*5e*/ u8 nc_scr2; +/*5f*/ u8 nc_scr3; + +/*60*/ u8 nc_scrx[64]; /* Working register C-R */ +/*a0*/ u32 nc_mmrs; /* Memory Move Read Selector */ +/*a4*/ u32 nc_mmws; /* Memory Move Write Selector */ +/*a8*/ u32 nc_sfs; /* Script Fetch Selector */ +/*ac*/ u32 nc_drs; /* DSA Relative Selector */ +/*b0*/ u32 nc_sbms; /* Static Block Move Selector */ +/*b4*/ u32 nc_dbms; /* Dynamic Block Move Selector */ +/*b8*/ u32 nc_dnad64; /* DMA Next Address 64 */ +/*bc*/ u16 nc_scntl4; /* C1010 only */ + #define U3EN 0x80 /* Enable Ultra 3 */ + #define AIPCKEN 0x40 /* AIP checking enable */ + /* Also enable AIP generation on C10-33*/ + #define XCLKH_DT 0x08 /* Extra clock of data hold on DT edge */ + #define XCLKH_ST 0x04 /* Extra clock of data hold on ST edge */ + #define XCLKS_DT 0x02 /* Extra clock of data set on DT edge */ + #define XCLKS_ST 0x01 /* Extra clock of data set on ST edge */ +/*be*/ u8 nc_aipcntl0; /* AIP Control 0 C1010 only */ +/*bf*/ u8 nc_aipcntl1; /* AIP Control 1 C1010 only */ + #define DISAIP 0x08 /* Disable AIP generation C10-66 only */ +/*c0*/ u32 nc_pmjad1; /* Phase Mismatch Jump Address 1 */ +/*c4*/ u32 nc_pmjad2; /* Phase Mismatch Jump Address 2 */ +/*c8*/ u8 nc_rbc; /* Remaining Byte Count */ +/*c9*/ u8 nc_rbc1; +/*ca*/ u8 nc_rbc2; +/*cb*/ u8 nc_rbc3; + +/*cc*/ u8 nc_ua; /* Updated Address */ +/*cd*/ u8 nc_ua1; +/*ce*/ u8 nc_ua2; +/*cf*/ u8 nc_ua3; +/*d0*/ u32 nc_esa; /* Entry Storage Address */ +/*d4*/ u8 nc_ia; /* Instruction Address */ +/*d5*/ u8 nc_ia1; +/*d6*/ u8 nc_ia2; +/*d7*/ u8 nc_ia3; +/*d8*/ u32 nc_sbc; /* SCSI Byte Count (3 bytes only) */ +/*dc*/ u32 nc_csbc; /* Cumulative SCSI Byte Count */ + /* Following for C1010 only */ +/*e0*/ u16 nc_crcpad; /* CRC Value */ +/*e2*/ u8 nc_crccntl0; /* CRC control register */ + #define SNDCRC 0x10 /* Send CRC Request */ +/*e3*/ u8 nc_crccntl1; /* CRC control register */ +/*e4*/ u32 nc_crcdata; /* CRC data register */ +/*e8*/ u32 nc_e8_; +/*ec*/ u32 nc_ec_; +/*f0*/ u16 nc_dfbc; /* DMA FIFO byte count */ +}; + +/*----------------------------------------------------------- + * + * Utility macros for the script. + * + *----------------------------------------------------------- + */ + +#define REGJ(p,r) (offsetof(struct sym_reg, p ## r)) +#define REG(r) REGJ (nc_, r) + +/*----------------------------------------------------------- + * + * SCSI phases + * + *----------------------------------------------------------- + */ + +#define SCR_DATA_OUT 0x00000000 +#define SCR_DATA_IN 0x01000000 +#define SCR_COMMAND 0x02000000 +#define SCR_STATUS 0x03000000 +#define SCR_DT_DATA_OUT 0x04000000 +#define SCR_DT_DATA_IN 0x05000000 +#define SCR_MSG_OUT 0x06000000 +#define SCR_MSG_IN 0x07000000 +/* DT phases are illegal for non Ultra3 mode */ +#define SCR_ILG_OUT 0x04000000 +#define SCR_ILG_IN 0x05000000 + +/*----------------------------------------------------------- + * + * Data transfer via SCSI. + * + *----------------------------------------------------------- + * + * MOVE_ABS (LEN) + * <<start address>> + * + * MOVE_IND (LEN) + * <<dnad_offset>> + * + * MOVE_TBL + * <<dnad_offset>> + * + *----------------------------------------------------------- + */ + +#define OPC_MOVE 0x08000000 + +#define SCR_MOVE_ABS(l) ((0x00000000 | OPC_MOVE) | (l)) +/* #define SCR_MOVE_IND(l) ((0x20000000 | OPC_MOVE) | (l)) */ +#define SCR_MOVE_TBL (0x10000000 | OPC_MOVE) + +#define SCR_CHMOV_ABS(l) ((0x00000000) | (l)) +/* #define SCR_CHMOV_IND(l) ((0x20000000) | (l)) */ +#define SCR_CHMOV_TBL (0x10000000) + +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT +/* We steal the `indirect addressing' flag for target mode MOVE in scripts */ + +#define OPC_TCHMOVE 0x08000000 + +#define SCR_TCHMOVE_ABS(l) ((0x20000000 | OPC_TCHMOVE) | (l)) +#define SCR_TCHMOVE_TBL (0x30000000 | OPC_TCHMOVE) + +#define SCR_TMOV_ABS(l) ((0x20000000) | (l)) +#define SCR_TMOV_TBL (0x30000000) +#endif + +struct sym_tblmove { + u32 size; + u32 addr; +}; + +/*----------------------------------------------------------- + * + * Selection + * + *----------------------------------------------------------- + * + * SEL_ABS | SCR_ID (0..15) [ | REL_JMP] + * <<alternate_address>> + * + * SEL_TBL | << dnad_offset>> [ | REL_JMP] + * <<alternate_address>> + * + *----------------------------------------------------------- + */ + +#define SCR_SEL_ABS 0x40000000 +#define SCR_SEL_ABS_ATN 0x41000000 +#define SCR_SEL_TBL 0x42000000 +#define SCR_SEL_TBL_ATN 0x43000000 + +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT +#define SCR_RESEL_ABS 0x40000000 +#define SCR_RESEL_ABS_ATN 0x41000000 +#define SCR_RESEL_TBL 0x42000000 +#define SCR_RESEL_TBL_ATN 0x43000000 +#endif + +struct sym_tblsel { + u_char sel_scntl4; /* C1010 only */ + u_char sel_sxfer; + u_char sel_id; + u_char sel_scntl3; +}; + +#define SCR_JMP_REL 0x04000000 +#define SCR_ID(id) (((u32)(id)) << 16) + +/*----------------------------------------------------------- + * + * Waiting for Disconnect or Reselect + * + *----------------------------------------------------------- + * + * WAIT_DISC + * dummy: <<alternate_address>> + * + * WAIT_RESEL + * <<alternate_address>> + * + *----------------------------------------------------------- + */ + +#define SCR_WAIT_DISC 0x48000000 +#define SCR_WAIT_RESEL 0x50000000 + +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT +#define SCR_DISCONNECT 0x48000000 +#endif + +/*----------------------------------------------------------- + * + * Bit Set / Reset + * + *----------------------------------------------------------- + * + * SET (flags {|.. }) + * + * CLR (flags {|.. }) + * + *----------------------------------------------------------- + */ + +#define SCR_SET(f) (0x58000000 | (f)) +#define SCR_CLR(f) (0x60000000 | (f)) + +#define SCR_CARRY 0x00000400 +#define SCR_TRG 0x00000200 +#define SCR_ACK 0x00000040 +#define SCR_ATN 0x00000008 + + +/*----------------------------------------------------------- + * + * Memory to memory move + * + *----------------------------------------------------------- + * + * COPY (bytecount) + * << source_address >> + * << destination_address >> + * + * SCR_COPY sets the NO FLUSH option by default. + * SCR_COPY_F does not set this option. + * + * For chips which do not support this option, + * sym_fw_bind_script() will remove this bit. + * + *----------------------------------------------------------- + */ + +#define SCR_NO_FLUSH 0x01000000 + +#define SCR_COPY(n) (0xc0000000 | SCR_NO_FLUSH | (n)) +#define SCR_COPY_F(n) (0xc0000000 | (n)) + +/*----------------------------------------------------------- + * + * Register move and binary operations + * + *----------------------------------------------------------- + * + * SFBR_REG (reg, op, data) reg = SFBR op data + * << 0 >> + * + * REG_SFBR (reg, op, data) SFBR = reg op data + * << 0 >> + * + * REG_REG (reg, op, data) reg = reg op data + * << 0 >> + * + *----------------------------------------------------------- + * + * On 825A, 875, 895 and 896 chips the content + * of SFBR register can be used as data (SCR_SFBR_DATA). + * The 896 has additionnal IO registers starting at + * offset 0x80. Bit 7 of register offset is stored in + * bit 7 of the SCRIPTS instruction first DWORD. + * + *----------------------------------------------------------- + */ + +#define SCR_REG_OFS(ofs) ((((ofs) & 0x7f) << 16ul) + ((ofs) & 0x80)) + +#define SCR_SFBR_REG(reg,op,data) \ + (0x68000000 | (SCR_REG_OFS(REG(reg))) | (op) | (((data)&0xff)<<8ul)) + +#define SCR_REG_SFBR(reg,op,data) \ + (0x70000000 | (SCR_REG_OFS(REG(reg))) | (op) | (((data)&0xff)<<8ul)) + +#define SCR_REG_REG(reg,op,data) \ + (0x78000000 | (SCR_REG_OFS(REG(reg))) | (op) | (((data)&0xff)<<8ul)) + + +#define SCR_LOAD 0x00000000 +#define SCR_SHL 0x01000000 +#define SCR_OR 0x02000000 +#define SCR_XOR 0x03000000 +#define SCR_AND 0x04000000 +#define SCR_SHR 0x05000000 +#define SCR_ADD 0x06000000 +#define SCR_ADDC 0x07000000 + +#define SCR_SFBR_DATA (0x00800000>>8ul) /* Use SFBR as data */ + +/*----------------------------------------------------------- + * + * FROM_REG (reg) SFBR = reg + * << 0 >> + * + * TO_REG (reg) reg = SFBR + * << 0 >> + * + * LOAD_REG (reg, data) reg = <data> + * << 0 >> + * + * LOAD_SFBR(data) SFBR = <data> + * << 0 >> + * + *----------------------------------------------------------- + */ + +#define SCR_FROM_REG(reg) \ + SCR_REG_SFBR(reg,SCR_OR,0) + +#define SCR_TO_REG(reg) \ + SCR_SFBR_REG(reg,SCR_OR,0) + +#define SCR_LOAD_REG(reg,data) \ + SCR_REG_REG(reg,SCR_LOAD,data) + +#define SCR_LOAD_SFBR(data) \ + (SCR_REG_SFBR (gpreg, SCR_LOAD, data)) + +/*----------------------------------------------------------- + * + * LOAD from memory to register. + * STORE from register to memory. + * + * Only supported by 810A, 860, 825A, 875, 895 and 896. + * + *----------------------------------------------------------- + * + * LOAD_ABS (LEN) + * <<start address>> + * + * LOAD_REL (LEN) (DSA relative) + * <<dsa_offset>> + * + *----------------------------------------------------------- + */ + +#define SCR_REG_OFS2(ofs) (((ofs) & 0xff) << 16ul) +#define SCR_NO_FLUSH2 0x02000000 +#define SCR_DSA_REL2 0x10000000 + +#define SCR_LOAD_R(reg, how, n) \ + (0xe1000000 | how | (SCR_REG_OFS2(REG(reg))) | (n)) + +#define SCR_STORE_R(reg, how, n) \ + (0xe0000000 | how | (SCR_REG_OFS2(REG(reg))) | (n)) + +#define SCR_LOAD_ABS(reg, n) SCR_LOAD_R(reg, SCR_NO_FLUSH2, n) +#define SCR_LOAD_REL(reg, n) SCR_LOAD_R(reg, SCR_NO_FLUSH2|SCR_DSA_REL2, n) +#define SCR_LOAD_ABS_F(reg, n) SCR_LOAD_R(reg, 0, n) +#define SCR_LOAD_REL_F(reg, n) SCR_LOAD_R(reg, SCR_DSA_REL2, n) + +#define SCR_STORE_ABS(reg, n) SCR_STORE_R(reg, SCR_NO_FLUSH2, n) +#define SCR_STORE_REL(reg, n) SCR_STORE_R(reg, SCR_NO_FLUSH2|SCR_DSA_REL2,n) +#define SCR_STORE_ABS_F(reg, n) SCR_STORE_R(reg, 0, n) +#define SCR_STORE_REL_F(reg, n) SCR_STORE_R(reg, SCR_DSA_REL2, n) + + +/*----------------------------------------------------------- + * + * Waiting for Disconnect or Reselect + * + *----------------------------------------------------------- + * + * JUMP [ | IFTRUE/IFFALSE ( ... ) ] + * <<address>> + * + * JUMPR [ | IFTRUE/IFFALSE ( ... ) ] + * <<distance>> + * + * CALL [ | IFTRUE/IFFALSE ( ... ) ] + * <<address>> + * + * CALLR [ | IFTRUE/IFFALSE ( ... ) ] + * <<distance>> + * + * RETURN [ | IFTRUE/IFFALSE ( ... ) ] + * <<dummy>> + * + * INT [ | IFTRUE/IFFALSE ( ... ) ] + * <<ident>> + * + * INT_FLY [ | IFTRUE/IFFALSE ( ... ) ] + * <<ident>> + * + * Conditions: + * WHEN (phase) + * IF (phase) + * CARRYSET + * DATA (data, mask) + * + *----------------------------------------------------------- + */ + +#define SCR_NO_OP 0x80000000 +#define SCR_JUMP 0x80080000 +#define SCR_JUMP64 0x80480000 +#define SCR_JUMPR 0x80880000 +#define SCR_CALL 0x88080000 +#define SCR_CALLR 0x88880000 +#define SCR_RETURN 0x90080000 +#define SCR_INT 0x98080000 +#define SCR_INT_FLY 0x98180000 + +#define IFFALSE(arg) (0x00080000 | (arg)) +#define IFTRUE(arg) (0x00000000 | (arg)) + +#define WHEN(phase) (0x00030000 | (phase)) +#define IF(phase) (0x00020000 | (phase)) + +#define DATA(D) (0x00040000 | ((D) & 0xff)) +#define MASK(D,M) (0x00040000 | (((M ^ 0xff) & 0xff) << 8ul)|((D) & 0xff)) + +#define CARRYSET (0x00200000) + +/*----------------------------------------------------------- + * + * SCSI constants. + * + *----------------------------------------------------------- + */ + +/* + * Messages + */ + +#define M_COMPLETE (0x00) +#define M_EXTENDED (0x01) +#define M_SAVE_DP (0x02) +#define M_RESTORE_DP (0x03) +#define M_DISCONNECT (0x04) +#define M_ID_ERROR (0x05) +#define M_ABORT (0x06) +#define M_REJECT (0x07) +#define M_NOOP (0x08) +#define M_PARITY (0x09) +#define M_LCOMPLETE (0x0a) +#define M_FCOMPLETE (0x0b) +#define M_RESET (0x0c) +#define M_ABORT_TAG (0x0d) +#define M_CLEAR_QUEUE (0x0e) +#define M_INIT_REC (0x0f) +#define M_REL_REC (0x10) +#define M_TERMINATE (0x11) +#define M_SIMPLE_TAG (0x20) +#define M_HEAD_TAG (0x21) +#define M_ORDERED_TAG (0x22) +#define M_IGN_RESIDUE (0x23) +#define M_IDENTIFY (0x80) + +#define M_X_MODIFY_DP (0x00) +#define M_X_SYNC_REQ (0x01) +#define M_X_WIDE_REQ (0x03) +#define M_X_PPR_REQ (0x04) + +/* + * PPR protocol options + */ +#define PPR_OPT_IU (0x01) +#define PPR_OPT_DT (0x02) +#define PPR_OPT_QAS (0x04) +#define PPR_OPT_MASK (0x07) + +/* + * Status + */ + +#define S_GOOD (0x00) +#define S_CHECK_COND (0x02) +#define S_COND_MET (0x04) +#define S_BUSY (0x08) +#define S_INT (0x10) +#define S_INT_COND_MET (0x14) +#define S_CONFLICT (0x18) +#define S_TERMINATED (0x20) +#define S_QUEUE_FULL (0x28) +#define S_ILLEGAL (0xff) + +#endif /* defined SYM_DEFS_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_fw.c b/drivers/scsi/sym53c8xx_2/sym_fw.c new file mode 100644 index 000000000000..46858e893e2a --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_fw.c @@ -0,0 +1,617 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __FreeBSD__ +#include <dev/sym/sym_glue.h> +#else +#include "sym_glue.h" +#endif + +/* + * Macros used for all firmwares. + */ +#define SYM_GEN_A(s, label) ((short) offsetof(s, label)), +#define SYM_GEN_B(s, label) ((short) offsetof(s, label)), +#define SYM_GEN_Z(s, label) ((short) offsetof(s, label)), +#define PADDR_A(label) SYM_GEN_PADDR_A(struct SYM_FWA_SCR, label) +#define PADDR_B(label) SYM_GEN_PADDR_B(struct SYM_FWB_SCR, label) + + +#if SYM_CONF_GENERIC_SUPPORT +/* + * Allocate firmware #1 script area. + */ +#define SYM_FWA_SCR sym_fw1a_scr +#define SYM_FWB_SCR sym_fw1b_scr +#define SYM_FWZ_SCR sym_fw1z_scr +#ifdef __FreeBSD__ +#include <dev/sym/sym_fw1.h> +#else +#include "sym_fw1.h" +#endif +static struct sym_fwa_ofs sym_fw1a_ofs = { + SYM_GEN_FW_A(struct SYM_FWA_SCR) +}; +static struct sym_fwb_ofs sym_fw1b_ofs = { + SYM_GEN_FW_B(struct SYM_FWB_SCR) +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + SYM_GEN_B(struct SYM_FWB_SCR, data_io) +#endif +}; +static struct sym_fwz_ofs sym_fw1z_ofs = { + SYM_GEN_FW_Z(struct SYM_FWZ_SCR) +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + SYM_GEN_Z(struct SYM_FWZ_SCR, start_ram) +#endif +}; +#undef SYM_FWA_SCR +#undef SYM_FWB_SCR +#undef SYM_FWZ_SCR +#endif /* SYM_CONF_GENERIC_SUPPORT */ + +/* + * Allocate firmware #2 script area. + */ +#define SYM_FWA_SCR sym_fw2a_scr +#define SYM_FWB_SCR sym_fw2b_scr +#define SYM_FWZ_SCR sym_fw2z_scr +#ifdef __FreeBSD__ +#include <dev/sym/sym_fw2.h> +#else +#include "sym_fw2.h" +#endif +static struct sym_fwa_ofs sym_fw2a_ofs = { + SYM_GEN_FW_A(struct SYM_FWA_SCR) +}; +static struct sym_fwb_ofs sym_fw2b_ofs = { + SYM_GEN_FW_B(struct SYM_FWB_SCR) +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + SYM_GEN_B(struct SYM_FWB_SCR, data_io) +#endif + SYM_GEN_B(struct SYM_FWB_SCR, start64) + SYM_GEN_B(struct SYM_FWB_SCR, pm_handle) +}; +static struct sym_fwz_ofs sym_fw2z_ofs = { + SYM_GEN_FW_Z(struct SYM_FWZ_SCR) +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + SYM_GEN_Z(struct SYM_FWZ_SCR, start_ram) + SYM_GEN_Z(struct SYM_FWZ_SCR, start_ram64) +#endif +}; +#undef SYM_FWA_SCR +#undef SYM_FWB_SCR +#undef SYM_FWZ_SCR + +#undef SYM_GEN_A +#undef SYM_GEN_B +#undef SYM_GEN_Z +#undef PADDR_A +#undef PADDR_B + +#if SYM_CONF_GENERIC_SUPPORT +/* + * Patch routine for firmware #1. + */ +static void +sym_fw1_patch(hcb_p np) +{ + struct sym_fw1a_scr *scripta0; + struct sym_fw1b_scr *scriptb0; +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + struct sym_fw1z_scr *scriptz0 = + (struct sym_fw1z_scr *) np->scriptz0; +#endif + + scripta0 = (struct sym_fw1a_scr *) np->scripta0; + scriptb0 = (struct sym_fw1b_scr *) np->scriptb0; + +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + /* + * Set up BUS physical address of SCRIPTS that is to + * be copied to on-chip RAM by the SCRIPTS processor. + */ + scriptz0->scripta0_ba[0] = cpu_to_scr(vtobus(scripta0)); +#endif + + /* + * Remove LED support if not needed. + */ + if (!(np->features & FE_LED0)) { + scripta0->idle[0] = cpu_to_scr(SCR_NO_OP); + scripta0->reselected[0] = cpu_to_scr(SCR_NO_OP); + scripta0->start[0] = cpu_to_scr(SCR_NO_OP); + } + +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If user does not want to use IMMEDIATE ARBITRATION + * when we are reselected while attempting to arbitrate, + * patch the SCRIPTS accordingly with a SCRIPT NO_OP. + */ + if (!SYM_CONF_SET_IARB_ON_ARB_LOST) + scripta0->ungetjob[0] = cpu_to_scr(SCR_NO_OP); +#endif + /* + * Patch some data in SCRIPTS. + * - start and done queue initial bus address. + * - target bus address table bus address. + */ + scriptb0->startpos[0] = cpu_to_scr(np->squeue_ba); + scriptb0->done_pos[0] = cpu_to_scr(np->dqueue_ba); + scriptb0->targtbl[0] = cpu_to_scr(np->targtbl_ba); +} +#endif /* SYM_CONF_GENERIC_SUPPORT */ + +/* + * Patch routine for firmware #2. + */ +static void +sym_fw2_patch(hcb_p np) +{ + struct sym_fw2a_scr *scripta0; + struct sym_fw2b_scr *scriptb0; +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + struct sym_fw2z_scr *scriptz0 = + (struct sym_fw2z_scr *) np->scriptz0; +#endif + + scripta0 = (struct sym_fw2a_scr *) np->scripta0; + scriptb0 = (struct sym_fw2b_scr *) np->scriptb0; + +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + /* + * Set up BUS physical address of SCRIPTS that is to + * be copied to on-chip RAM by the SCRIPTS processor. + */ + scriptz0->scripta0_ba64[0] = /* Nothing is missing here */ + scriptz0->scripta0_ba[0] = cpu_to_scr(vtobus(scripta0)); + scriptz0->scriptb0_ba64[0] = cpu_to_scr(vtobus(scriptb0)); + scriptz0->ram_seg64[0] = np->scr_ram_seg; +#endif + + /* + * Remove LED support if not needed. + */ + if (!(np->features & FE_LED0)) { + scripta0->idle[0] = cpu_to_scr(SCR_NO_OP); + scripta0->reselected[0] = cpu_to_scr(SCR_NO_OP); + scripta0->start[0] = cpu_to_scr(SCR_NO_OP); + } + +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 + /* + * Remove useless 64 bit DMA specific SCRIPTS, + * when this feature is not available. + */ + if (!np->use_dac) { + scripta0->is_dmap_dirty[0] = cpu_to_scr(SCR_NO_OP); + scripta0->is_dmap_dirty[1] = 0; + scripta0->is_dmap_dirty[2] = cpu_to_scr(SCR_NO_OP); + scripta0->is_dmap_dirty[3] = 0; + } +#endif + +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If user does not want to use IMMEDIATE ARBITRATION + * when we are reselected while attempting to arbitrate, + * patch the SCRIPTS accordingly with a SCRIPT NO_OP. + */ + if (!SYM_CONF_SET_IARB_ON_ARB_LOST) + scripta0->ungetjob[0] = cpu_to_scr(SCR_NO_OP); +#endif + /* + * Patch some variable in SCRIPTS. + * - start and done queue initial bus address. + * - target bus address table bus address. + */ + scriptb0->startpos[0] = cpu_to_scr(np->squeue_ba); + scriptb0->done_pos[0] = cpu_to_scr(np->dqueue_ba); + scriptb0->targtbl[0] = cpu_to_scr(np->targtbl_ba); + + /* + * Remove the load of SCNTL4 on reselection if not a C10. + */ + if (!(np->features & FE_C10)) { + scripta0->resel_scntl4[0] = cpu_to_scr(SCR_NO_OP); + scripta0->resel_scntl4[1] = cpu_to_scr(0); + } + + /* + * Remove a couple of work-arounds specific to C1010 if + * they are not desirable. See `sym_fw2.h' for more details. + */ + if (!(np->device_id == PCI_ID_LSI53C1010_2 && + np->revision_id < 0x1 && + np->pciclk_khz < 60000)) { + scripta0->datao_phase[0] = cpu_to_scr(SCR_NO_OP); + scripta0->datao_phase[1] = cpu_to_scr(0); + } + if (!(np->device_id == PCI_ID_LSI53C1010 && + /* np->revision_id < 0xff */ 1)) { + scripta0->sel_done[0] = cpu_to_scr(SCR_NO_OP); + scripta0->sel_done[1] = cpu_to_scr(0); + } + + /* + * Patch some other variables in SCRIPTS. + * These ones are loaded by the SCRIPTS processor. + */ + scriptb0->pm0_data_addr[0] = + cpu_to_scr(np->scripta_ba + + offsetof(struct sym_fw2a_scr, pm0_data)); + scriptb0->pm1_data_addr[0] = + cpu_to_scr(np->scripta_ba + + offsetof(struct sym_fw2a_scr, pm1_data)); +} + +/* + * Fill the data area in scripts. + * To be done for all firmwares. + */ +static void +sym_fw_fill_data (u32 *in, u32 *out) +{ + int i; + + for (i = 0; i < SYM_CONF_MAX_SG; i++) { + *in++ = SCR_CHMOV_TBL ^ SCR_DATA_IN; + *in++ = offsetof (struct sym_dsb, data[i]); + *out++ = SCR_CHMOV_TBL ^ SCR_DATA_OUT; + *out++ = offsetof (struct sym_dsb, data[i]); + } +} + +/* + * Setup useful script bus addresses. + * To be done for all firmwares. + */ +static void +sym_fw_setup_bus_addresses(hcb_p np, struct sym_fw *fw) +{ + u32 *pa; + u_short *po; + int i; + + /* + * Build the bus address table for script A + * from the script A offset table. + */ + po = (u_short *) fw->a_ofs; + pa = (u32 *) &np->fwa_bas; + for (i = 0 ; i < sizeof(np->fwa_bas)/sizeof(u32) ; i++) + pa[i] = np->scripta_ba + po[i]; + + /* + * Same for script B. + */ + po = (u_short *) fw->b_ofs; + pa = (u32 *) &np->fwb_bas; + for (i = 0 ; i < sizeof(np->fwb_bas)/sizeof(u32) ; i++) + pa[i] = np->scriptb_ba + po[i]; + + /* + * Same for script Z. + */ + po = (u_short *) fw->z_ofs; + pa = (u32 *) &np->fwz_bas; + for (i = 0 ; i < sizeof(np->fwz_bas)/sizeof(u32) ; i++) + pa[i] = np->scriptz_ba + po[i]; +} + +#if SYM_CONF_GENERIC_SUPPORT +/* + * Setup routine for firmware #1. + */ +static void +sym_fw1_setup(hcb_p np, struct sym_fw *fw) +{ + struct sym_fw1a_scr *scripta0; + struct sym_fw1b_scr *scriptb0; + + scripta0 = (struct sym_fw1a_scr *) np->scripta0; + scriptb0 = (struct sym_fw1b_scr *) np->scriptb0; + + /* + * Fill variable parts in scripts. + */ + sym_fw_fill_data(scripta0->data_in, scripta0->data_out); + + /* + * Setup bus addresses used from the C code.. + */ + sym_fw_setup_bus_addresses(np, fw); +} +#endif /* SYM_CONF_GENERIC_SUPPORT */ + +/* + * Setup routine for firmware #2. + */ +static void +sym_fw2_setup(hcb_p np, struct sym_fw *fw) +{ + struct sym_fw2a_scr *scripta0; + struct sym_fw2b_scr *scriptb0; + + scripta0 = (struct sym_fw2a_scr *) np->scripta0; + scriptb0 = (struct sym_fw2b_scr *) np->scriptb0; + + /* + * Fill variable parts in scripts. + */ + sym_fw_fill_data(scripta0->data_in, scripta0->data_out); + + /* + * Setup bus addresses used from the C code.. + */ + sym_fw_setup_bus_addresses(np, fw); +} + +/* + * Allocate firmware descriptors. + */ +#if SYM_CONF_GENERIC_SUPPORT +static struct sym_fw sym_fw1 = SYM_FW_ENTRY(sym_fw1, "NCR-generic"); +#endif /* SYM_CONF_GENERIC_SUPPORT */ +static struct sym_fw sym_fw2 = SYM_FW_ENTRY(sym_fw2, "LOAD/STORE-based"); + +/* + * Find the most appropriate firmware for a chip. + */ +struct sym_fw * +sym_find_firmware(struct sym_pci_chip *chip) +{ + if (chip->features & FE_LDSTR) + return &sym_fw2; +#if SYM_CONF_GENERIC_SUPPORT + else if (!(chip->features & (FE_PFEN|FE_NOPM|FE_DAC))) + return &sym_fw1; +#endif + else + return 0; +} + +/* + * Bind a script to physical addresses. + */ +void sym_fw_bind_script (hcb_p np, u32 *start, int len) +{ + u32 opcode, new, old, tmp1, tmp2; + u32 *end, *cur; + int relocs; + + cur = start; + end = start + len/4; + + while (cur < end) { + + opcode = *cur; + + /* + * If we forget to change the length + * in scripts, a field will be + * padded with 0. This is an illegal + * command. + */ + if (opcode == 0) { + printf ("%s: ERROR0 IN SCRIPT at %d.\n", + sym_name(np), (int) (cur-start)); + MDELAY (10000); + ++cur; + continue; + }; + + /* + * We use the bogus value 0xf00ff00f ;-) + * to reserve data area in SCRIPTS. + */ + if (opcode == SCR_DATA_ZERO) { + *cur++ = 0; + continue; + } + + if (DEBUG_FLAGS & DEBUG_SCRIPT) + printf ("%d: <%x>\n", (int) (cur-start), + (unsigned)opcode); + + /* + * We don't have to decode ALL commands + */ + switch (opcode >> 28) { + case 0xf: + /* + * LOAD / STORE DSA relative, don't relocate. + */ + relocs = 0; + break; + case 0xe: + /* + * LOAD / STORE absolute. + */ + relocs = 1; + break; + case 0xc: + /* + * COPY has TWO arguments. + */ + relocs = 2; + tmp1 = cur[1]; + tmp2 = cur[2]; + if ((tmp1 ^ tmp2) & 3) { + printf ("%s: ERROR1 IN SCRIPT at %d.\n", + sym_name(np), (int) (cur-start)); + MDELAY (10000); + } + /* + * If PREFETCH feature not enabled, remove + * the NO FLUSH bit if present. + */ + if ((opcode & SCR_NO_FLUSH) && + !(np->features & FE_PFEN)) { + opcode = (opcode & ~SCR_NO_FLUSH); + } + break; + case 0x0: + /* + * MOVE/CHMOV (absolute address) + */ + if (!(np->features & FE_WIDE)) + opcode = (opcode | OPC_MOVE); + relocs = 1; + break; + case 0x1: + /* + * MOVE/CHMOV (table indirect) + */ + if (!(np->features & FE_WIDE)) + opcode = (opcode | OPC_MOVE); + relocs = 0; + break; +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + case 0x2: + /* + * MOVE/CHMOV in target role (absolute address) + */ + opcode &= ~0x20000000; + if (!(np->features & FE_WIDE)) + opcode = (opcode & ~OPC_TCHMOVE); + relocs = 1; + break; + case 0x3: + /* + * MOVE/CHMOV in target role (table indirect) + */ + opcode &= ~0x20000000; + if (!(np->features & FE_WIDE)) + opcode = (opcode & ~OPC_TCHMOVE); + relocs = 0; + break; +#endif + case 0x8: + /* + * JUMP / CALL + * dont't relocate if relative :-) + */ + if (opcode & 0x00800000) + relocs = 0; + else if ((opcode & 0xf8400000) == 0x80400000)/*JUMP64*/ + relocs = 2; + else + relocs = 1; + break; + case 0x4: + case 0x5: + case 0x6: + case 0x7: + relocs = 1; + break; + default: + relocs = 0; + break; + }; + + /* + * Scriptify:) the opcode. + */ + *cur++ = cpu_to_scr(opcode); + + /* + * If no relocation, assume 1 argument + * and just scriptize:) it. + */ + if (!relocs) { + *cur = cpu_to_scr(*cur); + ++cur; + continue; + } + + /* + * Otherwise performs all needed relocations. + */ + while (relocs--) { + old = *cur; + + switch (old & RELOC_MASK) { + case RELOC_REGISTER: + new = (old & ~RELOC_MASK) + np->mmio_ba; + break; + case RELOC_LABEL_A: + new = (old & ~RELOC_MASK) + np->scripta_ba; + break; + case RELOC_LABEL_B: + new = (old & ~RELOC_MASK) + np->scriptb_ba; + break; + case RELOC_SOFTC: + new = (old & ~RELOC_MASK) + np->hcb_ba; + break; + case 0: + /* + * Don't relocate a 0 address. + * They are mostly used for patched or + * script self-modified areas. + */ + if (old == 0) { + new = old; + break; + } + /* fall through */ + default: + new = 0; + panic("sym_fw_bind_script: " + "weird relocation %x\n", old); + break; + } + + *cur++ = cpu_to_scr(new); + } + }; +} diff --git a/drivers/scsi/sym53c8xx_2/sym_fw.h b/drivers/scsi/sym53c8xx_2/sym_fw.h new file mode 100644 index 000000000000..993b08c5b6aa --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_fw.h @@ -0,0 +1,232 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM_FW_H +#define SYM_FW_H +/* + * Macro used to generate interfaces for script A. + */ +#define SYM_GEN_FW_A(s) \ + SYM_GEN_A(s, start) SYM_GEN_A(s, getjob_begin) \ + SYM_GEN_A(s, getjob_end) \ + SYM_GEN_A(s, select) SYM_GEN_A(s, wf_sel_done) \ + SYM_GEN_A(s, send_ident) \ + SYM_GEN_A(s, dispatch) SYM_GEN_A(s, init) \ + SYM_GEN_A(s, clrack) SYM_GEN_A(s, complete_error) \ + SYM_GEN_A(s, done) SYM_GEN_A(s, done_end) \ + SYM_GEN_A(s, idle) SYM_GEN_A(s, ungetjob) \ + SYM_GEN_A(s, reselect) \ + SYM_GEN_A(s, resel_tag) SYM_GEN_A(s, resel_dsa) \ + SYM_GEN_A(s, resel_no_tag) \ + SYM_GEN_A(s, data_in) SYM_GEN_A(s, data_in2) \ + SYM_GEN_A(s, data_out) SYM_GEN_A(s, data_out2) \ + SYM_GEN_A(s, pm0_data) SYM_GEN_A(s, pm1_data) + +/* + * Macro used to generate interfaces for script B. + */ +#define SYM_GEN_FW_B(s) \ + SYM_GEN_B(s, no_data) \ + SYM_GEN_B(s, sel_for_abort) SYM_GEN_B(s, sel_for_abort_1) \ + SYM_GEN_B(s, msg_bad) SYM_GEN_B(s, msg_weird) \ + SYM_GEN_B(s, wdtr_resp) SYM_GEN_B(s, send_wdtr) \ + SYM_GEN_B(s, sdtr_resp) SYM_GEN_B(s, send_sdtr) \ + SYM_GEN_B(s, ppr_resp) SYM_GEN_B(s, send_ppr) \ + SYM_GEN_B(s, nego_bad_phase) \ + SYM_GEN_B(s, ident_break) SYM_GEN_B(s, ident_break_atn) \ + SYM_GEN_B(s, sdata_in) SYM_GEN_B(s, resel_bad_lun) \ + SYM_GEN_B(s, bad_i_t_l) SYM_GEN_B(s, bad_i_t_l_q) \ + SYM_GEN_B(s, wsr_ma_helper) + +/* + * Macro used to generate interfaces for script Z. + */ +#define SYM_GEN_FW_Z(s) \ + SYM_GEN_Z(s, snooptest) SYM_GEN_Z(s, snoopend) + +/* + * Generates structure interface that contains + * offsets within script A, B and Z. + */ +#define SYM_GEN_A(s, label) s label; +#define SYM_GEN_B(s, label) s label; +#define SYM_GEN_Z(s, label) s label; +struct sym_fwa_ofs { + SYM_GEN_FW_A(u_short) +}; +struct sym_fwb_ofs { + SYM_GEN_FW_B(u_short) +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + SYM_GEN_B(u_short, data_io) +#endif + SYM_GEN_B(u_short, start64) + SYM_GEN_B(u_short, pm_handle) +}; +struct sym_fwz_ofs { + SYM_GEN_FW_Z(u_short) +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + SYM_GEN_Z(u_short, start_ram) + SYM_GEN_Z(u_short, start_ram64) +#endif +}; + +/* + * Generates structure interface that contains + * bus addresses within script A, B and Z. + */ +struct sym_fwa_ba { + SYM_GEN_FW_A(u32) +}; +struct sym_fwb_ba { + SYM_GEN_FW_B(u32) +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + SYM_GEN_B(u32, data_io) +#endif + SYM_GEN_B(u32, start64); + SYM_GEN_B(u32, pm_handle); +}; +struct sym_fwz_ba { + SYM_GEN_FW_Z(u32) +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + SYM_GEN_Z(u32, start_ram) + SYM_GEN_Z(u32, start_ram64) +#endif +}; +#undef SYM_GEN_A +#undef SYM_GEN_B +#undef SYM_GEN_Z + +/* + * Let cc know about the name of the controller data structure. + * We need this for function prototype declarations just below. + */ +struct sym_hcb; + +/* + * Generic structure that defines a firmware. + */ +struct sym_fw { + char *name; /* Name we want to print out */ + u32 *a_base; /* Pointer to script A template */ + int a_size; /* Size of script A */ + struct sym_fwa_ofs + *a_ofs; /* Useful offsets in script A */ + u32 *b_base; /* Pointer to script B template */ + int b_size; /* Size of script B */ + struct sym_fwb_ofs + *b_ofs; /* Useful offsets in script B */ + u32 *z_base; /* Pointer to script Z template */ + int z_size; /* Size of script Z */ + struct sym_fwz_ofs + *z_ofs; /* Useful offsets in script Z */ + /* Setup and patch methods for this firmware */ + void (*setup)(struct sym_hcb *, struct sym_fw *); + void (*patch)(struct sym_hcb *); +}; + +/* + * Macro used to declare a firmware. + */ +#define SYM_FW_ENTRY(fw, name) \ +{ \ + name, \ + (u32 *) &fw##a_scr, sizeof(fw##a_scr), &fw##a_ofs, \ + (u32 *) &fw##b_scr, sizeof(fw##b_scr), &fw##b_ofs, \ + (u32 *) &fw##z_scr, sizeof(fw##z_scr), &fw##z_ofs, \ + fw##_setup, fw##_patch \ +} + +/* + * Macros used from the C code to get useful + * SCRIPTS bus addresses. + */ +#define SCRIPTA_BA(np, label) (np->fwa_bas.label) +#define SCRIPTB_BA(np, label) (np->fwb_bas.label) +#define SCRIPTZ_BA(np, label) (np->fwz_bas.label) + +/* + * Macros used by scripts definitions. + * + * HADDR_1 generates a reference to a field of the controller data. + * HADDR_2 generates a reference to a field of the controller data + * with offset. + * RADDR_1 generates a reference to a script processor register. + * RADDR_2 generates a reference to a script processor register + * with offset. + * PADDR_A generates a reference to another part of script A. + * PADDR_B generates a reference to another part of script B. + * + * SYM_GEN_PADDR_A and SYM_GEN_PADDR_B are used to define respectively + * the PADDR_A and PADDR_B macros for each firmware by setting argument + * `s' to the name of the corresponding structure. + * + * SCR_DATA_ZERO is used to allocate a DWORD of data in scripts areas. + */ + +#define RELOC_SOFTC 0x40000000 +#define RELOC_LABEL_A 0x50000000 +#define RELOC_REGISTER 0x60000000 +#define RELOC_LABEL_B 0x80000000 +#define RELOC_MASK 0xf0000000 + +#define HADDR_1(label) (RELOC_SOFTC | offsetof(struct sym_hcb, label)) +#define HADDR_2(label,ofs) (RELOC_SOFTC | \ + (offsetof(struct sym_hcb, label)+(ofs))) +#define RADDR_1(label) (RELOC_REGISTER | REG(label)) +#define RADDR_2(label,ofs) (RELOC_REGISTER | ((REG(label))+(ofs))) + +#define SYM_GEN_PADDR_A(s, label) (RELOC_LABEL_A | offsetof(s, label)) +#define SYM_GEN_PADDR_B(s, label) (RELOC_LABEL_B | offsetof(s, label)) + +#define SCR_DATA_ZERO 0xf00ff00f + +#endif /* SYM_FW_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_fw1.h b/drivers/scsi/sym53c8xx_2/sym_fw1.h new file mode 100644 index 000000000000..31675f33f192 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_fw1.h @@ -0,0 +1,1874 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Scripts for SYMBIOS-Processor + * + * We have to know the offsets of all labels before we reach + * them (for forward jumps). Therefore we declare a struct + * here. If you make changes inside the script, + * + * DONT FORGET TO CHANGE THE LENGTHS HERE! + */ + +/* + * Script fragments which are loaded into the on-chip RAM + * of 825A, 875, 876, 895, 895A, 896 and 1010 chips. + * Must not exceed 4K bytes. + */ +struct SYM_FWA_SCR { + u32 start [ 11]; + u32 getjob_begin [ 4]; + u32 _sms_a10 [ 5]; + u32 getjob_end [ 4]; + u32 _sms_a20 [ 4]; +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + u32 select [ 8]; +#else + u32 select [ 6]; +#endif + u32 _sms_a30 [ 5]; + u32 wf_sel_done [ 2]; + u32 send_ident [ 2]; +#ifdef SYM_CONF_IARB_SUPPORT + u32 select2 [ 8]; +#else + u32 select2 [ 2]; +#endif + u32 command [ 2]; + u32 dispatch [ 28]; + u32 sel_no_cmd [ 10]; + u32 init [ 6]; + u32 clrack [ 4]; + u32 datai_done [ 11]; + u32 datai_done_wsr [ 20]; + u32 datao_done [ 11]; + u32 datao_done_wss [ 6]; + u32 datai_phase [ 5]; + u32 datao_phase [ 5]; + u32 msg_in [ 2]; + u32 msg_in2 [ 10]; +#ifdef SYM_CONF_IARB_SUPPORT + u32 status [ 14]; +#else + u32 status [ 10]; +#endif + u32 complete [ 6]; + u32 complete2 [ 8]; + u32 _sms_a40 [ 12]; + u32 done [ 5]; + u32 _sms_a50 [ 5]; + u32 _sms_a60 [ 2]; + u32 done_end [ 4]; + u32 complete_error [ 5]; + u32 save_dp [ 11]; + u32 restore_dp [ 7]; + u32 disconnect [ 11]; + u32 disconnect2 [ 5]; + u32 _sms_a65 [ 3]; +#ifdef SYM_CONF_IARB_SUPPORT + u32 idle [ 4]; +#else + u32 idle [ 2]; +#endif +#ifdef SYM_CONF_IARB_SUPPORT + u32 ungetjob [ 7]; +#else + u32 ungetjob [ 5]; +#endif +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + u32 reselect [ 4]; +#else + u32 reselect [ 2]; +#endif + u32 reselected [ 19]; + u32 _sms_a70 [ 6]; + u32 _sms_a80 [ 4]; + u32 reselected1 [ 25]; + u32 _sms_a90 [ 4]; + u32 resel_lun0 [ 7]; + u32 _sms_a100 [ 4]; + u32 resel_tag [ 8]; +#if SYM_CONF_MAX_TASK*4 > 512 + u32 _sms_a110 [ 23]; +#elif SYM_CONF_MAX_TASK*4 > 256 + u32 _sms_a110 [ 17]; +#else + u32 _sms_a110 [ 13]; +#endif + u32 _sms_a120 [ 2]; + u32 resel_go [ 4]; + u32 _sms_a130 [ 7]; + u32 resel_dsa [ 2]; + u32 resel_dsa1 [ 4]; + u32 _sms_a140 [ 7]; + u32 resel_no_tag [ 4]; + u32 _sms_a145 [ 7]; + u32 data_in [SYM_CONF_MAX_SG * 2]; + u32 data_in2 [ 4]; + u32 data_out [SYM_CONF_MAX_SG * 2]; + u32 data_out2 [ 4]; + u32 pm0_data [ 12]; + u32 pm0_data_out [ 6]; + u32 pm0_data_end [ 7]; + u32 pm_data_end [ 4]; + u32 _sms_a150 [ 4]; + u32 pm1_data [ 12]; + u32 pm1_data_out [ 6]; + u32 pm1_data_end [ 9]; +}; + +/* + * Script fragments which stay in main memory for all chips + * except for chips that support 8K on-chip RAM. + */ +struct SYM_FWB_SCR { + u32 no_data [ 2]; +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + u32 sel_for_abort [ 18]; +#else + u32 sel_for_abort [ 16]; +#endif + u32 sel_for_abort_1 [ 2]; + u32 msg_in_etc [ 12]; + u32 msg_received [ 5]; + u32 msg_weird_seen [ 5]; + u32 msg_extended [ 17]; + u32 _sms_b10 [ 4]; + u32 msg_bad [ 6]; + u32 msg_weird [ 4]; + u32 msg_weird1 [ 8]; + u32 wdtr_resp [ 6]; + u32 send_wdtr [ 4]; + u32 sdtr_resp [ 6]; + u32 send_sdtr [ 4]; + u32 ppr_resp [ 6]; + u32 send_ppr [ 4]; + u32 nego_bad_phase [ 4]; + u32 msg_out [ 4]; + u32 msg_out_done [ 4]; + u32 data_ovrun [ 3]; + u32 data_ovrun1 [ 22]; + u32 data_ovrun2 [ 8]; + u32 abort_resel [ 16]; + u32 resend_ident [ 4]; + u32 ident_break [ 4]; + u32 ident_break_atn [ 4]; + u32 sdata_in [ 6]; + u32 resel_bad_lun [ 4]; + u32 bad_i_t_l [ 4]; + u32 bad_i_t_l_q [ 4]; + u32 bad_status [ 7]; + u32 wsr_ma_helper [ 4]; + +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + /* Unknown direction handling */ + u32 data_io [ 2]; + u32 data_io_com [ 8]; + u32 data_io_out [ 7]; +#endif + /* Data area */ + u32 zero [ 1]; + u32 scratch [ 1]; + u32 scratch1 [ 1]; + u32 prev_done [ 1]; + u32 done_pos [ 1]; + u32 nextjob [ 1]; + u32 startpos [ 1]; + u32 targtbl [ 1]; +}; + +/* + * Script fragments used at initialisations. + * Only runs out of main memory. + */ +struct SYM_FWZ_SCR { + u32 snooptest [ 9]; + u32 snoopend [ 2]; +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + u32 start_ram [ 1]; + u32 scripta0_ba [ 4]; +#endif +}; + +static struct SYM_FWA_SCR SYM_FWA_SCR = { +/*--------------------------< START >----------------------------*/ { + /* + * Switch the LED on. + * Will be patched with a NO_OP if LED + * not needed or not desired. + */ + SCR_REG_REG (gpreg, SCR_AND, 0xfe), + 0, + /* + * Clear SIGP. + */ + SCR_FROM_REG (ctest2), + 0, + /* + * Stop here if the C code wants to perform + * some error recovery procedure manually. + * (Indicate this by setting SEM in ISTAT) + */ + SCR_FROM_REG (istat), + 0, + /* + * Report to the C code the next position in + * the start queue the SCRIPTS will schedule. + * The C code must not change SCRATCHA. + */ + SCR_COPY (4), + PADDR_B (startpos), + RADDR_1 (scratcha), + SCR_INT ^ IFTRUE (MASK (SEM, SEM)), + SIR_SCRIPT_STOPPED, + /* + * Start the next job. + * + * @DSA = start point for this job. + * SCRATCHA = address of this job in the start queue. + * + * We will restore startpos with SCRATCHA if we fails the + * arbitration or if it is the idle job. + * + * The below GETJOB_BEGIN to GETJOB_END section of SCRIPTS + * is a critical path. If it is partially executed, it then + * may happen that the job address is not yet in the DSA + * and the the next queue position points to the next JOB. + */ +}/*-------------------------< GETJOB_BEGIN >---------------------*/,{ + /* + * Copy to a fixed location both the next STARTPOS + * and the current JOB address, using self modifying + * SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (scratcha), + PADDR_A (_sms_a10), + SCR_COPY (8), +}/*-------------------------< _SMS_A10 >-------------------------*/,{ + 0, + PADDR_B (nextjob), + /* + * Move the start address to TEMP using self- + * modifying SCRIPTS and jump indirectly to + * that address. + */ + SCR_COPY (4), + PADDR_B (nextjob), + RADDR_1 (dsa), +}/*-------------------------< GETJOB_END >-----------------------*/,{ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a20), + SCR_COPY (4), +}/*-------------------------< _SMS_A20 >-------------------------*/,{ + 0, + RADDR_1 (temp), + SCR_RETURN, + 0, +}/*-------------------------< SELECT >---------------------------*/,{ + /* + * DSA contains the address of a scheduled + * data structure. + * + * SCRATCHA contains the address of the start queue + * entry which points to the next job. + * + * Set Initiator mode. + * + * (Target mode is left as an exercise for the reader) + */ +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + SCR_CLR (SCR_TRG), + 0, +#endif + /* + * And try to select this target. + */ + SCR_SEL_TBL_ATN ^ offsetof (struct sym_dsb, select), + PADDR_A (ungetjob), + /* + * Now there are 4 possibilities: + * + * (1) The chip looses arbitration. + * This is ok, because it will try again, + * when the bus becomes idle. + * (But beware of the timeout function!) + * + * (2) The chip is reselected. + * Then the script processor takes the jump + * to the RESELECT label. + * + * (3) The chip wins arbitration. + * Then it will execute SCRIPTS instruction until + * the next instruction that checks SCSI phase. + * Then will stop and wait for selection to be + * complete or selection time-out to occur. + * + * After having won arbitration, the SCRIPTS + * processor is able to execute instructions while + * the SCSI core is performing SCSI selection. + */ + + /* + * Copy the CCB header to a fixed location + * in the HCB using self-modifying SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a30), + SCR_COPY (sizeof(struct sym_ccbh)), +}/*-------------------------< _SMS_A30 >-------------------------*/,{ + 0, + HADDR_1 (ccb_head), + /* + * Initialize the status register + */ + SCR_COPY (4), + HADDR_1 (ccb_head.status), + RADDR_1 (scr0), +}/*-------------------------< WF_SEL_DONE >----------------------*/,{ + SCR_INT ^ IFFALSE (WHEN (SCR_MSG_OUT)), + SIR_SEL_ATN_NO_MSG_OUT, +}/*-------------------------< SEND_IDENT >-----------------------*/,{ + /* + * Selection complete. + * Send the IDENTIFY and possibly the TAG message + * and negotiation message if present. + */ + SCR_MOVE_TBL ^ SCR_MSG_OUT, + offsetof (struct sym_dsb, smsg), +}/*-------------------------< SELECT2 >--------------------------*/,{ +#ifdef SYM_CONF_IARB_SUPPORT + /* + * Set IMMEDIATE ARBITRATION if we have been given + * a hint to do so. (Some job to do after this one). + */ + SCR_FROM_REG (HF_REG), + 0, + SCR_JUMPR ^ IFFALSE (MASK (HF_HINT_IARB, HF_HINT_IARB)), + 8, + SCR_REG_REG (scntl1, SCR_OR, IARB), + 0, +#endif + /* + * Anticipate the COMMAND phase. + * This is the PHASE we expect at this point. + */ + SCR_JUMP ^ IFFALSE (WHEN (SCR_COMMAND)), + PADDR_A (sel_no_cmd), +}/*-------------------------< COMMAND >--------------------------*/,{ + /* + * ... and send the command + */ + SCR_MOVE_TBL ^ SCR_COMMAND, + offsetof (struct sym_dsb, cmd), +}/*-------------------------< DISPATCH >-------------------------*/,{ + /* + * MSG_IN is the only phase that shall be + * entered at least once for each (re)selection. + * So we test it first. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (msg_in), + SCR_JUMP ^ IFTRUE (IF (SCR_DATA_OUT)), + PADDR_A (datao_phase), + SCR_JUMP ^ IFTRUE (IF (SCR_DATA_IN)), + PADDR_A (datai_phase), + SCR_JUMP ^ IFTRUE (IF (SCR_STATUS)), + PADDR_A (status), + SCR_JUMP ^ IFTRUE (IF (SCR_COMMAND)), + PADDR_A (command), + SCR_JUMP ^ IFTRUE (IF (SCR_MSG_OUT)), + PADDR_B (msg_out), + /* + * Discard as many illegal phases as + * required and tell the C code about. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_ILG_OUT)), + 16, + SCR_MOVE_ABS (1) ^ SCR_ILG_OUT, + HADDR_1 (scratch), + SCR_JUMPR ^ IFTRUE (WHEN (SCR_ILG_OUT)), + -16, + SCR_JUMPR ^ IFFALSE (WHEN (SCR_ILG_IN)), + 16, + SCR_MOVE_ABS (1) ^ SCR_ILG_IN, + HADDR_1 (scratch), + SCR_JUMPR ^ IFTRUE (WHEN (SCR_ILG_IN)), + -16, + SCR_INT, + SIR_BAD_PHASE, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< SEL_NO_CMD >-----------------------*/,{ + /* + * The target does not switch to command + * phase after IDENTIFY has been sent. + * + * If it stays in MSG OUT phase send it + * the IDENTIFY again. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_OUT)), + PADDR_B (resend_ident), + /* + * If target does not switch to MSG IN phase + * and we sent a negotiation, assert the + * failure immediately. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (dispatch), + SCR_FROM_REG (HS_REG), + 0, + SCR_INT ^ IFTRUE (DATA (HS_NEGOTIATE)), + SIR_NEGO_FAILED, + /* + * Jump to dispatcher. + */ + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< INIT >-----------------------------*/,{ + /* + * Wait for the SCSI RESET signal to be + * inactive before restarting operations, + * since the chip may hang on SEL_ATN + * if SCSI RESET is active. + */ + SCR_FROM_REG (sstat0), + 0, + SCR_JUMPR ^ IFTRUE (MASK (IRST, IRST)), + -16, + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< CLRACK >---------------------------*/,{ + /* + * Terminate possible pending message phase. + */ + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAI_DONE >-----------------------*/,{ + /* + * Save current pointer to LASTP. + */ + SCR_COPY (4), + RADDR_1 (temp), + HADDR_1 (ccb_head.lastp), + /* + * If the SWIDE is not full, jump to dispatcher. + * We anticipate a STATUS phase. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_JUMP ^ IFTRUE (MASK (WSR, WSR)), + PADDR_A (datai_done_wsr), + SCR_JUMP ^ IFTRUE (WHEN (SCR_STATUS)), + PADDR_A (status), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAI_DONE_WSR >-------------------*/,{ + /* + * The SWIDE is full. + * Clear this condition. + */ + SCR_REG_REG (scntl2, SCR_OR, WSR), + 0, + /* + * We are expecting an IGNORE RESIDUE message + * from the device, otherwise we are in data + * overrun condition. Check against MSG_IN phase. + */ + SCR_INT ^ IFFALSE (WHEN (SCR_MSG_IN)), + SIR_SWIDE_OVERRUN, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_IN)), + PADDR_A (dispatch), + /* + * We are in MSG_IN phase, + * Read the first byte of the message. + * If it is not an IGNORE RESIDUE message, + * signal overrun and jump to message + * processing. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[0]), + SCR_INT ^ IFFALSE (DATA (M_IGN_RESIDUE)), + SIR_SWIDE_OVERRUN, + SCR_JUMP ^ IFFALSE (DATA (M_IGN_RESIDUE)), + PADDR_A (msg_in2), + /* + * We got the message we expected. + * Read the 2nd byte, and jump to dispatcher. + */ + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[1]), + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAO_DONE >-----------------------*/,{ + /* + * Save current pointer to LASTP. + */ + SCR_COPY (4), + RADDR_1 (temp), + HADDR_1 (ccb_head.lastp), + /* + * If the SODL is not full jump to dispatcher. + * We anticipate a STATUS phase. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_JUMP ^ IFTRUE (MASK (WSS, WSS)), + PADDR_A (datao_done_wss), + SCR_JUMP ^ IFTRUE (WHEN (SCR_STATUS)), + PADDR_A (status), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAO_DONE_WSS >-------------------*/,{ + /* + * The SODL is full, clear this condition. + */ + SCR_REG_REG (scntl2, SCR_OR, WSS), + 0, + /* + * And signal a DATA UNDERRUN condition + * to the C code. + */ + SCR_INT, + SIR_SODL_UNDERRUN, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAI_PHASE >----------------------*/,{ + /* + * Jump to current pointer. + */ + SCR_COPY (4), + HADDR_1 (ccb_head.lastp), + RADDR_1 (temp), + SCR_RETURN, + 0, +}/*-------------------------< DATAO_PHASE >----------------------*/,{ + /* + * Jump to current pointer. + */ + SCR_COPY (4), + HADDR_1 (ccb_head.lastp), + RADDR_1 (temp), + SCR_RETURN, + 0, +}/*-------------------------< MSG_IN >---------------------------*/,{ + /* + * Get the first byte of the message. + * + * The script processor doesn't negate the + * ACK signal after this transfer. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[0]), +}/*-------------------------< MSG_IN2 >--------------------------*/,{ + /* + * Check first against 1 byte messages + * that we handle from SCRIPTS. + */ + SCR_JUMP ^ IFTRUE (DATA (M_COMPLETE)), + PADDR_A (complete), + SCR_JUMP ^ IFTRUE (DATA (M_DISCONNECT)), + PADDR_A (disconnect), + SCR_JUMP ^ IFTRUE (DATA (M_SAVE_DP)), + PADDR_A (save_dp), + SCR_JUMP ^ IFTRUE (DATA (M_RESTORE_DP)), + PADDR_A (restore_dp), + /* + * We handle all other messages from the + * C code, so no need to waste on-chip RAM + * for those ones. + */ + SCR_JUMP, + PADDR_B (msg_in_etc), +}/*-------------------------< STATUS >---------------------------*/,{ + /* + * get the status + */ + SCR_MOVE_ABS (1) ^ SCR_STATUS, + HADDR_1 (scratch), +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If STATUS is not GOOD, clear IMMEDIATE ARBITRATION, + * since we may have to tamper the start queue from + * the C code. + */ + SCR_JUMPR ^ IFTRUE (DATA (S_GOOD)), + 8, + SCR_REG_REG (scntl1, SCR_AND, ~IARB), + 0, +#endif + /* + * save status to scsi_status. + * mark as complete. + */ + SCR_TO_REG (SS_REG), + 0, + SCR_LOAD_REG (HS_REG, HS_COMPLETE), + 0, + /* + * Anticipate the MESSAGE PHASE for + * the TASK COMPLETE message. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (msg_in), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< COMPLETE >-------------------------*/,{ + /* + * Complete message. + * + * When we terminate the cycle by clearing ACK, + * the target may disconnect immediately. + * + * We don't want to be told of an "unexpected disconnect", + * so we disable this feature. + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + /* + * Terminate cycle ... + */ + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + /* + * ... and wait for the disconnect. + */ + SCR_WAIT_DISC, + 0, +}/*-------------------------< COMPLETE2 >------------------------*/,{ + /* + * Save host status. + */ + SCR_COPY (4), + RADDR_1 (scr0), + HADDR_1 (ccb_head.status), + /* + * Move back the CCB header using self-modifying + * SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a40), + SCR_COPY (sizeof(struct sym_ccbh)), + HADDR_1 (ccb_head), +}/*-------------------------< _SMS_A40 >-------------------------*/,{ + 0, + /* + * Some bridges may reorder DMA writes to memory. + * We donnot want the CPU to deal with completions + * without all the posted write having been flushed + * to memory. This DUMMY READ should flush posted + * buffers prior to the CPU having to deal with + * completions. + */ + SCR_COPY (4), /* DUMMY READ */ + HADDR_1 (ccb_head.status), + RADDR_1 (scr0), + /* + * If command resulted in not GOOD status, + * call the C code if needed. + */ + SCR_FROM_REG (SS_REG), + 0, + SCR_CALL ^ IFFALSE (DATA (S_GOOD)), + PADDR_B (bad_status), + /* + * If we performed an auto-sense, call + * the C code to synchronyze task aborts + * with UNIT ATTENTION conditions. + */ + SCR_FROM_REG (HF_REG), + 0, + SCR_JUMP ^ IFFALSE (MASK (0 ,(HF_SENSE|HF_EXT_ERR))), + PADDR_A (complete_error), +}/*-------------------------< DONE >-----------------------------*/,{ + /* + * Copy the DSA to the DONE QUEUE and + * signal completion to the host. + * If we are interrupted between DONE + * and DONE_END, we must reset, otherwise + * the completed CCB may be lost. + */ + SCR_COPY (4), + PADDR_B (done_pos), + PADDR_A (_sms_a50), + SCR_COPY (4), + RADDR_1 (dsa), +}/*-------------------------< _SMS_A50 >-------------------------*/,{ + 0, + SCR_COPY (4), + PADDR_B (done_pos), + PADDR_A (_sms_a60), + /* + * The instruction below reads the DONE QUEUE next + * free position from memory. + * In addition it ensures that all PCI posted writes + * are flushed and so the DSA value of the done + * CCB is visible by the CPU before INTFLY is raised. + */ + SCR_COPY (8), +}/*-------------------------< _SMS_A60 >-------------------------*/,{ + 0, + PADDR_B (prev_done), +}/*-------------------------< DONE_END >-------------------------*/,{ + SCR_INT_FLY, + 0, + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< COMPLETE_ERROR >-------------------*/,{ + SCR_COPY (4), + PADDR_B (startpos), + RADDR_1 (scratcha), + SCR_INT, + SIR_COMPLETE_ERROR, +}/*-------------------------< SAVE_DP >--------------------------*/,{ + /* + * Clear ACK immediately. + * No need to delay it. + */ + SCR_CLR (SCR_ACK), + 0, + /* + * Keep track we received a SAVE DP, so + * we will switch to the other PM context + * on the next PM since the DP may point + * to the current PM context. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_DP_SAVED), + 0, + /* + * SAVE_DP message: + * Copy LASTP to SAVEP. + */ + SCR_COPY (4), + HADDR_1 (ccb_head.lastp), + HADDR_1 (ccb_head.savep), + /* + * Anticipate the MESSAGE PHASE for + * the DISCONNECT message. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (msg_in), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< RESTORE_DP >-----------------------*/,{ + /* + * Clear ACK immediately. + * No need to delay it. + */ + SCR_CLR (SCR_ACK), + 0, + /* + * Copy SAVEP to LASTP. + */ + SCR_COPY (4), + HADDR_1 (ccb_head.savep), + HADDR_1 (ccb_head.lastp), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DISCONNECT >-----------------------*/,{ + /* + * DISCONNECTing ... + * + * disable the "unexpected disconnect" feature, + * and remove the ACK signal. + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + /* + * Wait for the disconnect. + */ + SCR_WAIT_DISC, + 0, + /* + * Status is: DISCONNECTED. + */ + SCR_LOAD_REG (HS_REG, HS_DISCONNECT), + 0, + /* + * Save host status. + */ + SCR_COPY (4), + RADDR_1 (scr0), + HADDR_1 (ccb_head.status), +}/*-------------------------< DISCONNECT2 >----------------------*/,{ + /* + * Move back the CCB header using self-modifying + * SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a65), + SCR_COPY (sizeof(struct sym_ccbh)), + HADDR_1 (ccb_head), +}/*-------------------------< _SMS_A65 >-------------------------*/,{ + 0, + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< IDLE >-----------------------------*/,{ + /* + * Nothing to do? + * Switch the LED off and wait for reselect. + * Will be patched with a NO_OP if LED + * not needed or not desired. + */ + SCR_REG_REG (gpreg, SCR_OR, 0x01), + 0, +#ifdef SYM_CONF_IARB_SUPPORT + SCR_JUMPR, + 8, +#endif +}/*-------------------------< UNGETJOB >-------------------------*/,{ +#ifdef SYM_CONF_IARB_SUPPORT + /* + * Set IMMEDIATE ARBITRATION, for the next time. + * This will give us better chance to win arbitration + * for the job we just wanted to do. + */ + SCR_REG_REG (scntl1, SCR_OR, IARB), + 0, +#endif + /* + * We are not able to restart the SCRIPTS if we are + * interrupted and these instruction haven't been + * all executed. BTW, this is very unlikely to + * happen, but we check that from the C code. + */ + SCR_LOAD_REG (dsa, 0xff), + 0, + SCR_COPY (4), + RADDR_1 (scratcha), + PADDR_B (startpos), +}/*-------------------------< RESELECT >-------------------------*/,{ +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + /* + * Make sure we are in initiator mode. + */ + SCR_CLR (SCR_TRG), + 0, +#endif + /* + * Sleep waiting for a reselection. + */ + SCR_WAIT_RESEL, + PADDR_A(start), +}/*-------------------------< RESELECTED >-----------------------*/,{ + /* + * Switch the LED on. + * Will be patched with a NO_OP if LED + * not needed or not desired. + */ + SCR_REG_REG (gpreg, SCR_AND, 0xfe), + 0, + /* + * load the target id into the sdid + */ + SCR_REG_SFBR (ssid, SCR_AND, 0x8F), + 0, + SCR_TO_REG (sdid), + 0, + /* + * Load the target control block address + */ + SCR_COPY (4), + PADDR_B (targtbl), + RADDR_1 (dsa), + SCR_SFBR_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_AND, 0x3c), + 0, + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a70), + SCR_COPY (4), +}/*-------------------------< _SMS_A70 >-------------------------*/,{ + 0, + RADDR_1 (dsa), + /* + * Copy the TCB header to a fixed place in + * the HCB. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a80), + SCR_COPY (sizeof(struct sym_tcbh)), +}/*-------------------------< _SMS_A80 >-------------------------*/,{ + 0, + HADDR_1 (tcb_head), + /* + * We expect MESSAGE IN phase. + * If not, get help from the C code. + */ + SCR_INT ^ IFFALSE (WHEN (SCR_MSG_IN)), + SIR_RESEL_NO_MSG_IN, +}/*-------------------------< RESELECTED1 >----------------------*/,{ + /* + * Load the synchronous transfer registers. + */ + SCR_COPY (1), + HADDR_1 (tcb_head.wval), + RADDR_1 (scntl3), + SCR_COPY (1), + HADDR_1 (tcb_head.sval), + RADDR_1 (sxfer), + /* + * Get the IDENTIFY message. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin), + /* + * If IDENTIFY LUN #0, use a faster path + * to find the LCB structure. + */ + SCR_JUMP ^ IFTRUE (MASK (0x80, 0xbf)), + PADDR_A (resel_lun0), + /* + * If message isn't an IDENTIFY, + * tell the C code about. + */ + SCR_INT ^ IFFALSE (MASK (0x80, 0x80)), + SIR_RESEL_NO_IDENTIFY, + /* + * It is an IDENTIFY message, + * Load the LUN control block address. + */ + SCR_COPY (4), + HADDR_1 (tcb_head.luntbl_sa), + RADDR_1 (dsa), + SCR_SFBR_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_AND, 0xfc), + 0, + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a90), + SCR_COPY (4), +}/*-------------------------< _SMS_A90 >-------------------------*/,{ + 0, + RADDR_1 (dsa), + SCR_JUMPR, + 12, +}/*-------------------------< RESEL_LUN0 >-----------------------*/,{ + /* + * LUN 0 special case (but usual one :)) + */ + SCR_COPY (4), + HADDR_1 (tcb_head.lun0_sa), + RADDR_1 (dsa), + /* + * Jump indirectly to the reselect action for this LUN. + * (lcb.head.resel_sa assumed at offset zero of lcb). + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a100), + SCR_COPY (4), +}/*-------------------------< _SMS_A100 >------------------------*/,{ + 0, + RADDR_1 (temp), + SCR_RETURN, + 0, + /* In normal situations, we jump to RESEL_TAG or RESEL_NO_TAG */ +}/*-------------------------< RESEL_TAG >------------------------*/,{ + /* + * ACK the IDENTIFY previously received. + */ + SCR_CLR (SCR_ACK), + 0, + /* + * It shall be a tagged command. + * Read SIMPLE+TAG. + * The C code will deal with errors. + * Agressive optimization, is'nt it? :) + */ + SCR_MOVE_ABS (2) ^ SCR_MSG_IN, + HADDR_1 (msgin), + /* + * Copy the LCB header to a fixed place in + * the HCB using self-modifying SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a110), + SCR_COPY (sizeof(struct sym_lcbh)), +}/*-------------------------< _SMS_A110 >------------------------*/,{ + 0, + HADDR_1 (lcb_head), + /* + * Load the pointer to the tagged task + * table for this LUN. + */ + SCR_COPY (4), + HADDR_1 (lcb_head.itlq_tbl_sa), + RADDR_1 (dsa), + /* + * The SIDL still contains the TAG value. + * Agressive optimization, isn't it? :):) + */ + SCR_REG_SFBR (sidl, SCR_SHL, 0), + 0, +#if SYM_CONF_MAX_TASK*4 > 512 + SCR_JUMPR ^ IFFALSE (CARRYSET), + 8, + SCR_REG_REG (dsa1, SCR_OR, 2), + 0, + SCR_REG_REG (sfbr, SCR_SHL, 0), + 0, + SCR_JUMPR ^ IFFALSE (CARRYSET), + 8, + SCR_REG_REG (dsa1, SCR_OR, 1), + 0, +#elif SYM_CONF_MAX_TASK*4 > 256 + SCR_JUMPR ^ IFFALSE (CARRYSET), + 8, + SCR_REG_REG (dsa1, SCR_OR, 1), + 0, +#endif + /* + * Retrieve the DSA of this task. + * JUMP indirectly to the restart point of the CCB. + */ + SCR_SFBR_REG (dsa, SCR_AND, 0xfc), + 0, + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a120), + SCR_COPY (4), +}/*-------------------------< _SMS_A120 >------------------------*/,{ + 0, + RADDR_1 (dsa), +}/*-------------------------< RESEL_GO >-------------------------*/,{ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a130), + /* + * Move 'ccb.phys.head.go' action to + * scratch/scratch1. So scratch1 will + * contain the 'restart' field of the + * 'go' structure. + */ + SCR_COPY (8), +}/*-------------------------< _SMS_A130 >------------------------*/,{ + 0, + PADDR_B (scratch), + SCR_COPY (4), + PADDR_B (scratch1), /* phys.head.go.restart */ + RADDR_1 (temp), + SCR_RETURN, + 0, + /* In normal situations we branch to RESEL_DSA */ +}/*-------------------------< RESEL_DSA >------------------------*/,{ + /* + * ACK the IDENTIFY or TAG previously received. + */ + SCR_CLR (SCR_ACK), + 0, +}/*-------------------------< RESEL_DSA1 >-----------------------*/,{ + /* + * Copy the CCB header to a fixed location + * in the HCB using self-modifying SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a140), + SCR_COPY (sizeof(struct sym_ccbh)), +}/*-------------------------< _SMS_A140 >------------------------*/,{ + 0, + HADDR_1 (ccb_head), + /* + * Initialize the status register + */ + SCR_COPY (4), + HADDR_1 (ccb_head.status), + RADDR_1 (scr0), + /* + * Jump to dispatcher. + */ + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< RESEL_NO_TAG >---------------------*/,{ + /* + * Copy the LCB header to a fixed place in + * the HCB using self-modifying SCRIPTS. + */ + SCR_COPY (4), + RADDR_1 (dsa), + PADDR_A (_sms_a145), + SCR_COPY (sizeof(struct sym_lcbh)), +}/*-------------------------< _SMS_A145 >------------------------*/,{ + 0, + HADDR_1 (lcb_head), + /* + * Load the DSA with the unique ITL task. + */ + SCR_COPY (4), + HADDR_1 (lcb_head.itl_task_sa), + RADDR_1 (dsa), + SCR_JUMP, + PADDR_A (resel_go), +}/*-------------------------< DATA_IN >--------------------------*/,{ +/* + * Because the size depends on the + * #define SYM_CONF_MAX_SG parameter, + * it is filled in at runtime. + * + * ##===========< i=0; i<SYM_CONF_MAX_SG >========= + * || SCR_CHMOV_TBL ^ SCR_DATA_IN, + * || offsetof (struct sym_dsb, data[ i]), + * ##========================================== + */ +0 +}/*-------------------------< DATA_IN2 >-------------------------*/,{ + SCR_CALL, + PADDR_A (datai_done), + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< DATA_OUT >-------------------------*/,{ +/* + * Because the size depends on the + * #define SYM_CONF_MAX_SG parameter, + * it is filled in at runtime. + * + * ##===========< i=0; i<SYM_CONF_MAX_SG >========= + * || SCR_CHMOV_TBL ^ SCR_DATA_OUT, + * || offsetof (struct sym_dsb, data[ i]), + * ##========================================== + */ +0 +}/*-------------------------< DATA_OUT2 >------------------------*/,{ + SCR_CALL, + PADDR_A (datao_done), + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< PM0_DATA >-------------------------*/,{ + /* + * Read our host flags to SFBR, so we will be able + * to check against the data direction we expect. + */ + SCR_FROM_REG (HF_REG), + 0, + /* + * Check against actual DATA PHASE. + */ + SCR_JUMP ^ IFFALSE (WHEN (SCR_DATA_IN)), + PADDR_A (pm0_data_out), + /* + * Actual phase is DATA IN. + * Check against expected direction. + */ + SCR_JUMP ^ IFFALSE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM0 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM0), + 0, + /* + * Move the data to memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.pm0.sg), + SCR_JUMP, + PADDR_A (pm0_data_end), +}/*-------------------------< PM0_DATA_OUT >---------------------*/,{ + /* + * Actual phase is DATA OUT. + * Check against expected direction. + */ + SCR_JUMP ^ IFTRUE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM0 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM0), + 0, + /* + * Move the data from memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_OUT, + offsetof (struct sym_ccb, phys.pm0.sg), +}/*-------------------------< PM0_DATA_END >---------------------*/,{ + /* + * Clear the flag that told we were moving + * data from the PM0 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_AND, (~HF_IN_PM0)), + 0, + /* + * Return to the previous DATA script which + * is guaranteed by design (if no bug) to be + * the main DATA script for this transfer. + */ + SCR_COPY (4), + RADDR_1 (dsa), + RADDR_1 (scratcha), + SCR_REG_REG (scratcha, SCR_ADD, offsetof (struct sym_ccb,phys.pm0.ret)), + 0, +}/*-------------------------< PM_DATA_END >----------------------*/,{ + SCR_COPY (4), + RADDR_1 (scratcha), + PADDR_A (_sms_a150), + SCR_COPY (4), +}/*-------------------------< _SMS_A150 >------------------------*/,{ + 0, + RADDR_1 (temp), + SCR_RETURN, + 0, +}/*-------------------------< PM1_DATA >-------------------------*/,{ + /* + * Read our host flags to SFBR, so we will be able + * to check against the data direction we expect. + */ + SCR_FROM_REG (HF_REG), + 0, + /* + * Check against actual DATA PHASE. + */ + SCR_JUMP ^ IFFALSE (WHEN (SCR_DATA_IN)), + PADDR_A (pm1_data_out), + /* + * Actual phase is DATA IN. + * Check against expected direction. + */ + SCR_JUMP ^ IFFALSE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM1 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM1), + 0, + /* + * Move the data to memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.pm1.sg), + SCR_JUMP, + PADDR_A (pm1_data_end), +}/*-------------------------< PM1_DATA_OUT >---------------------*/,{ + /* + * Actual phase is DATA OUT. + * Check against expected direction. + */ + SCR_JUMP ^ IFTRUE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM1 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM1), + 0, + /* + * Move the data from memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_OUT, + offsetof (struct sym_ccb, phys.pm1.sg), +}/*-------------------------< PM1_DATA_END >---------------------*/,{ + /* + * Clear the flag that told we were moving + * data from the PM1 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_AND, (~HF_IN_PM1)), + 0, + /* + * Return to the previous DATA script which + * is guaranteed by design (if no bug) to be + * the main DATA script for this transfer. + */ + SCR_COPY (4), + RADDR_1 (dsa), + RADDR_1 (scratcha), + SCR_REG_REG (scratcha, SCR_ADD, offsetof (struct sym_ccb,phys.pm1.ret)), + 0, + SCR_JUMP, + PADDR_A (pm_data_end), +}/*--------------------------<>----------------------------------*/ +}; + +static struct SYM_FWB_SCR SYM_FWB_SCR = { +/*-------------------------< NO_DATA >--------------------------*/ { + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< SEL_FOR_ABORT >--------------------*/,{ + /* + * We are jumped here by the C code, if we have + * some target to reset or some disconnected + * job to abort. Since error recovery is a serious + * busyness, we will really reset the SCSI BUS, if + * case of a SCSI interrupt occuring in this path. + */ + +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + /* + * Set initiator mode. + */ + SCR_CLR (SCR_TRG), + 0, +#endif + /* + * And try to select this target. + */ + SCR_SEL_TBL_ATN ^ offsetof (struct sym_hcb, abrt_sel), + PADDR_A (reselect), + /* + * Wait for the selection to complete or + * the selection to time out. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_MSG_OUT)), + -8, + /* + * Call the C code. + */ + SCR_INT, + SIR_TARGET_SELECTED, + /* + * The C code should let us continue here. + * Send the 'kiss of death' message. + * We expect an immediate disconnect once + * the target has eaten the message. + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + SCR_MOVE_TBL ^ SCR_MSG_OUT, + offsetof (struct sym_hcb, abrt_tbl), + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + SCR_WAIT_DISC, + 0, + /* + * Tell the C code that we are done. + */ + SCR_INT, + SIR_ABORT_SENT, +}/*-------------------------< SEL_FOR_ABORT_1 >------------------*/,{ + /* + * Jump at scheduler. + */ + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< MSG_IN_ETC >-----------------------*/,{ + /* + * If it is an EXTENDED (variable size message) + * Handle it. + */ + SCR_JUMP ^ IFTRUE (DATA (M_EXTENDED)), + PADDR_B (msg_extended), + /* + * Let the C code handle any other + * 1 byte message. + */ + SCR_JUMP ^ IFTRUE (MASK (0x00, 0xf0)), + PADDR_B (msg_received), + SCR_JUMP ^ IFTRUE (MASK (0x10, 0xf0)), + PADDR_B (msg_received), + /* + * We donnot handle 2 bytes messages from SCRIPTS. + * So, let the C code deal with these ones too. + */ + SCR_JUMP ^ IFFALSE (MASK (0x20, 0xf0)), + PADDR_B (msg_weird_seen), + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[1]), +}/*-------------------------< MSG_RECEIVED >---------------------*/,{ + SCR_COPY (4), /* DUMMY READ */ + HADDR_1 (scratch), + RADDR_1 (scratcha), + SCR_INT, + SIR_MSG_RECEIVED, +}/*-------------------------< MSG_WEIRD_SEEN >-------------------*/,{ + SCR_COPY (4), /* DUMMY READ */ + HADDR_1 (scratch), + RADDR_1 (scratcha), + SCR_INT, + SIR_MSG_WEIRD, +}/*-------------------------< MSG_EXTENDED >---------------------*/,{ + /* + * Clear ACK and get the next byte + * assumed to be the message length. + */ + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[1]), + /* + * Try to catch some unlikely situations as 0 length + * or too large the length. + */ + SCR_JUMP ^ IFTRUE (DATA (0)), + PADDR_B (msg_weird_seen), + SCR_TO_REG (scratcha), + 0, + SCR_REG_REG (sfbr, SCR_ADD, (256-8)), + 0, + SCR_JUMP ^ IFTRUE (CARRYSET), + PADDR_B (msg_weird_seen), + /* + * We donnot handle extended messages from SCRIPTS. + * Read the amount of data correponding to the + * message length and call the C code. + */ + SCR_COPY (1), + RADDR_1 (scratcha), + PADDR_B (_sms_b10), + SCR_CLR (SCR_ACK), + 0, +}/*-------------------------< _SMS_B10 >-------------------------*/,{ + SCR_MOVE_ABS (0) ^ SCR_MSG_IN, + HADDR_1 (msgin[2]), + SCR_JUMP, + PADDR_B (msg_received), +}/*-------------------------< MSG_BAD >--------------------------*/,{ + /* + * unimplemented message - reject it. + */ + SCR_INT, + SIR_REJECT_TO_SEND, + SCR_SET (SCR_ATN), + 0, + SCR_JUMP, + PADDR_A (clrack), +}/*-------------------------< MSG_WEIRD >------------------------*/,{ + /* + * weird message received + * ignore all MSG IN phases and reject it. + */ + SCR_INT, + SIR_REJECT_TO_SEND, + SCR_SET (SCR_ATN), + 0, +}/*-------------------------< MSG_WEIRD1 >-----------------------*/,{ + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_IN)), + PADDR_A (dispatch), + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (scratch), + SCR_JUMP, + PADDR_B (msg_weird1), +}/*-------------------------< WDTR_RESP >------------------------*/,{ + /* + * let the target fetch our answer. + */ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_OUT)), + PADDR_B (nego_bad_phase), +}/*-------------------------< SEND_WDTR >------------------------*/,{ + /* + * Send the M_X_WIDE_REQ + */ + SCR_MOVE_ABS (4) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_JUMP, + PADDR_B (msg_out_done), +}/*-------------------------< SDTR_RESP >------------------------*/,{ + /* + * let the target fetch our answer. + */ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_OUT)), + PADDR_B (nego_bad_phase), +}/*-------------------------< SEND_SDTR >------------------------*/,{ + /* + * Send the M_X_SYNC_REQ + */ + SCR_MOVE_ABS (5) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_JUMP, + PADDR_B (msg_out_done), +}/*-------------------------< PPR_RESP >-------------------------*/,{ + /* + * let the target fetch our answer. + */ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_OUT)), + PADDR_B (nego_bad_phase), +}/*-------------------------< SEND_PPR >-------------------------*/,{ + /* + * Send the M_X_PPR_REQ + */ + SCR_MOVE_ABS (8) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_JUMP, + PADDR_B (msg_out_done), +}/*-------------------------< NEGO_BAD_PHASE >-------------------*/,{ + SCR_INT, + SIR_NEGO_PROTO, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< MSG_OUT >--------------------------*/,{ + /* + * The target requests a message. + * We donnot send messages that may + * require the device to go to bus free. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + /* + * ... wait for the next phase + * if it's a message out, send it again, ... + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_OUT)), + PADDR_B (msg_out), +}/*-------------------------< MSG_OUT_DONE >---------------------*/,{ + /* + * Let the C code be aware of the + * sent message and clear the message. + */ + SCR_INT, + SIR_MSG_OUT_DONE, + /* + * ... and process the next phase + */ + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATA_OVRUN >-----------------------*/,{ + /* + * Zero scratcha that will count the + * extras bytes. + */ + SCR_COPY (4), + PADDR_B (zero), + RADDR_1 (scratcha), +}/*-------------------------< DATA_OVRUN1 >----------------------*/,{ + /* + * The target may want to transfer too much data. + * + * If phase is DATA OUT write 1 byte and count it. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_DATA_OUT)), + 16, + SCR_CHMOV_ABS (1) ^ SCR_DATA_OUT, + HADDR_1 (scratch), + SCR_JUMP, + PADDR_B (data_ovrun2), + /* + * If WSR is set, clear this condition, and + * count this byte. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_JUMPR ^ IFFALSE (MASK (WSR, WSR)), + 16, + SCR_REG_REG (scntl2, SCR_OR, WSR), + 0, + SCR_JUMP, + PADDR_B (data_ovrun2), + /* + * Finally check against DATA IN phase. + * Signal data overrun to the C code + * and jump to dispatcher if not so. + * Read 1 byte otherwise and count it. + */ + SCR_JUMPR ^ IFTRUE (WHEN (SCR_DATA_IN)), + 16, + SCR_INT, + SIR_DATA_OVERRUN, + SCR_JUMP, + PADDR_A (dispatch), + SCR_CHMOV_ABS (1) ^ SCR_DATA_IN, + HADDR_1 (scratch), +}/*-------------------------< DATA_OVRUN2 >----------------------*/,{ + /* + * Count this byte. + * This will allow to return a negative + * residual to user. + */ + SCR_REG_REG (scratcha, SCR_ADD, 0x01), + 0, + SCR_REG_REG (scratcha1, SCR_ADDC, 0), + 0, + SCR_REG_REG (scratcha2, SCR_ADDC, 0), + 0, + /* + * .. and repeat as required. + */ + SCR_JUMP, + PADDR_B (data_ovrun1), +}/*-------------------------< ABORT_RESEL >----------------------*/,{ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + /* + * send the abort/abortag/reset message + * we expect an immediate disconnect + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + SCR_WAIT_DISC, + 0, + SCR_INT, + SIR_RESEL_ABORTED, + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< RESEND_IDENT >---------------------*/,{ + /* + * The target stays in MSG OUT phase after having acked + * Identify [+ Tag [+ Extended message ]]. Targets shall + * behave this way on parity error. + * We must send it again all the messages. + */ + SCR_SET (SCR_ATN), /* Shall be asserted 2 deskew delays before the */ + 0, /* 1rst ACK = 90 ns. Hope the chip isn't too fast */ + SCR_JUMP, + PADDR_A (send_ident), +}/*-------------------------< IDENT_BREAK >----------------------*/,{ + SCR_CLR (SCR_ATN), + 0, + SCR_JUMP, + PADDR_A (select2), +}/*-------------------------< IDENT_BREAK_ATN >------------------*/,{ + SCR_SET (SCR_ATN), + 0, + SCR_JUMP, + PADDR_A (select2), +}/*-------------------------< SDATA_IN >-------------------------*/,{ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_dsb, sense), + SCR_CALL, + PADDR_A (datai_done), + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< RESEL_BAD_LUN >--------------------*/,{ + /* + * Message is an IDENTIFY, but lun is unknown. + * Signal problem to C code for logging the event. + * Send a M_ABORT to clear all pending tasks. + */ + SCR_INT, + SIR_RESEL_BAD_LUN, + SCR_JUMP, + PADDR_B (abort_resel), +}/*-------------------------< BAD_I_T_L >------------------------*/,{ + /* + * We donnot have a task for that I_T_L. + * Signal problem to C code for logging the event. + * Send a M_ABORT message. + */ + SCR_INT, + SIR_RESEL_BAD_I_T_L, + SCR_JUMP, + PADDR_B (abort_resel), +}/*-------------------------< BAD_I_T_L_Q >----------------------*/,{ + /* + * We donnot have a task that matches the tag. + * Signal problem to C code for logging the event. + * Send a M_ABORTTAG message. + */ + SCR_INT, + SIR_RESEL_BAD_I_T_L_Q, + SCR_JUMP, + PADDR_B (abort_resel), +}/*-------------------------< BAD_STATUS >-----------------------*/,{ + /* + * Anything different from INTERMEDIATE + * CONDITION MET should be a bad SCSI status, + * given that GOOD status has already been tested. + * Call the C code. + */ + SCR_COPY (4), + PADDR_B (startpos), + RADDR_1 (scratcha), + SCR_INT ^ IFFALSE (DATA (S_COND_MET)), + SIR_BAD_SCSI_STATUS, + SCR_RETURN, + 0, +}/*-------------------------< WSR_MA_HELPER >--------------------*/,{ + /* + * Helper for the C code when WSR bit is set. + * Perform the move of the residual byte. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.wresid), + SCR_JUMP, + PADDR_A (dispatch), + +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN +}/*-------------------------< DATA_IO >--------------------------*/,{ + /* + * We jump here if the data direction was unknown at the + * time we had to queue the command to the scripts processor. + * Pointers had been set as follow in this situation: + * savep --> DATA_IO + * lastp --> start pointer when DATA_IN + * wlastp --> start pointer when DATA_OUT + * This script sets savep and lastp according to the + * direction chosen by the target. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_DATA_OUT)), + PADDR_B (data_io_out), +}/*-------------------------< DATA_IO_COM >----------------------*/,{ + /* + * Direction is DATA IN. + */ + SCR_COPY (4), + HADDR_1 (ccb_head.lastp), + HADDR_1 (ccb_head.savep), + /* + * Jump to the SCRIPTS according to actual direction. + */ + SCR_COPY (4), + HADDR_1 (ccb_head.savep), + RADDR_1 (temp), + SCR_RETURN, + 0, +}/*-------------------------< DATA_IO_OUT >----------------------*/,{ + /* + * Direction is DATA OUT. + */ + SCR_REG_REG (HF_REG, SCR_AND, (~HF_DATA_IN)), + 0, + SCR_COPY (4), + HADDR_1 (ccb_head.wlastp), + HADDR_1 (ccb_head.lastp), + SCR_JUMP, + PADDR_B(data_io_com), +#endif /* SYM_OPT_HANDLE_DIR_UNKNOWN */ + +}/*-------------------------< ZERO >-----------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< SCRATCH >--------------------------*/,{ + SCR_DATA_ZERO, /* MUST BE BEFORE SCRATCH1 */ +}/*-------------------------< SCRATCH1 >-------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< PREV_DONE >------------------------*/,{ + SCR_DATA_ZERO, /* MUST BE BEFORE DONE_POS ! */ +}/*-------------------------< DONE_POS >-------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< NEXTJOB >--------------------------*/,{ + SCR_DATA_ZERO, /* MUST BE BEFORE STARTPOS ! */ +}/*-------------------------< STARTPOS >-------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< TARGTBL >--------------------------*/,{ + SCR_DATA_ZERO, +}/*--------------------------<>----------------------------------*/ +}; + +static struct SYM_FWZ_SCR SYM_FWZ_SCR = { + /*-------------------------< SNOOPTEST >------------------------*/{ + /* + * Read the variable. + */ + SCR_COPY (4), + HADDR_1 (scratch), + RADDR_1 (scratcha), + /* + * Write the variable. + */ + SCR_COPY (4), + RADDR_1 (temp), + HADDR_1 (scratch), + /* + * Read back the variable. + */ + SCR_COPY (4), + HADDR_1 (scratch), + RADDR_1 (temp), +}/*-------------------------< SNOOPEND >-------------------------*/,{ + /* + * And stop. + */ + SCR_INT, + 99, +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + /* + * We may use MEMORY MOVE instructions to load the on chip-RAM, + * if it happens that mapping PCI memory is not possible. + * But writing the RAM from the CPU is the preferred method, + * since PCI 2.2 seems to disallow PCI self-mastering. + */ +}/*-------------------------< START_RAM >------------------------*/,{ + /* + * Load the script into on-chip RAM, + * and jump to start point. + */ + SCR_COPY (sizeof(struct SYM_FWA_SCR)), +}/*-------------------------< SCRIPTA0_BA >----------------------*/,{ + 0, + PADDR_A (start), + SCR_JUMP, + PADDR_A (init), +#endif /* SYM_OPT_NO_BUS_MEMORY_MAPPING */ +}/*--------------------------<>----------------------------------*/ +}; diff --git a/drivers/scsi/sym53c8xx_2/sym_fw2.h b/drivers/scsi/sym53c8xx_2/sym_fw2.h new file mode 100644 index 000000000000..92cd6dc920fc --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_fw2.h @@ -0,0 +1,1994 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Scripts for SYMBIOS-Processor + * + * We have to know the offsets of all labels before we reach + * them (for forward jumps). Therefore we declare a struct + * here. If you make changes inside the script, + * + * DONT FORGET TO CHANGE THE LENGTHS HERE! + */ + +/* + * Script fragments which are loaded into the on-chip RAM + * of 825A, 875, 876, 895, 895A, 896 and 1010 chips. + * Must not exceed 4K bytes. + */ +struct SYM_FWA_SCR { + u32 start [ 14]; + u32 getjob_begin [ 4]; + u32 getjob_end [ 4]; +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + u32 select [ 6]; +#else + u32 select [ 4]; +#endif +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 + u32 is_dmap_dirty [ 4]; +#endif + u32 wf_sel_done [ 2]; + u32 sel_done [ 2]; + u32 send_ident [ 2]; +#ifdef SYM_CONF_IARB_SUPPORT + u32 select2 [ 8]; +#else + u32 select2 [ 2]; +#endif + u32 command [ 2]; + u32 dispatch [ 28]; + u32 sel_no_cmd [ 10]; + u32 init [ 6]; + u32 clrack [ 4]; + u32 datai_done [ 10]; + u32 datai_done_wsr [ 20]; + u32 datao_done [ 10]; + u32 datao_done_wss [ 6]; + u32 datai_phase [ 4]; + u32 datao_phase [ 6]; + u32 msg_in [ 2]; + u32 msg_in2 [ 10]; +#ifdef SYM_CONF_IARB_SUPPORT + u32 status [ 14]; +#else + u32 status [ 10]; +#endif + u32 complete [ 6]; + u32 complete2 [ 12]; + u32 done [ 14]; + u32 done_end [ 2]; + u32 complete_error [ 4]; + u32 save_dp [ 12]; + u32 restore_dp [ 8]; + u32 disconnect [ 12]; +#ifdef SYM_CONF_IARB_SUPPORT + u32 idle [ 4]; +#else + u32 idle [ 2]; +#endif +#ifdef SYM_CONF_IARB_SUPPORT + u32 ungetjob [ 6]; +#else + u32 ungetjob [ 4]; +#endif +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + u32 reselect [ 4]; +#else + u32 reselect [ 2]; +#endif + u32 reselected [ 22]; + u32 resel_scntl4 [ 20]; + u32 resel_lun0 [ 6]; +#if SYM_CONF_MAX_TASK*4 > 512 + u32 resel_tag [ 26]; +#elif SYM_CONF_MAX_TASK*4 > 256 + u32 resel_tag [ 20]; +#else + u32 resel_tag [ 16]; +#endif + u32 resel_dsa [ 2]; + u32 resel_dsa1 [ 4]; + u32 resel_no_tag [ 6]; + u32 data_in [SYM_CONF_MAX_SG * 2]; + u32 data_in2 [ 4]; + u32 data_out [SYM_CONF_MAX_SG * 2]; + u32 data_out2 [ 4]; + u32 pm0_data [ 12]; + u32 pm0_data_out [ 6]; + u32 pm0_data_end [ 6]; + u32 pm1_data [ 12]; + u32 pm1_data_out [ 6]; + u32 pm1_data_end [ 6]; +}; + +/* + * Script fragments which stay in main memory for all chips + * except for chips that support 8K on-chip RAM. + */ +struct SYM_FWB_SCR { + u32 start64 [ 2]; + u32 no_data [ 2]; +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + u32 sel_for_abort [ 18]; +#else + u32 sel_for_abort [ 16]; +#endif + u32 sel_for_abort_1 [ 2]; + u32 msg_in_etc [ 12]; + u32 msg_received [ 4]; + u32 msg_weird_seen [ 4]; + u32 msg_extended [ 20]; + u32 msg_bad [ 6]; + u32 msg_weird [ 4]; + u32 msg_weird1 [ 8]; + + u32 wdtr_resp [ 6]; + u32 send_wdtr [ 4]; + u32 sdtr_resp [ 6]; + u32 send_sdtr [ 4]; + u32 ppr_resp [ 6]; + u32 send_ppr [ 4]; + u32 nego_bad_phase [ 4]; + u32 msg_out [ 4]; + u32 msg_out_done [ 4]; + u32 data_ovrun [ 2]; + u32 data_ovrun1 [ 22]; + u32 data_ovrun2 [ 8]; + u32 abort_resel [ 16]; + u32 resend_ident [ 4]; + u32 ident_break [ 4]; + u32 ident_break_atn [ 4]; + u32 sdata_in [ 6]; + u32 resel_bad_lun [ 4]; + u32 bad_i_t_l [ 4]; + u32 bad_i_t_l_q [ 4]; + u32 bad_status [ 6]; + u32 pm_handle [ 20]; + u32 pm_handle1 [ 4]; + u32 pm_save [ 4]; + u32 pm0_save [ 12]; + u32 pm_save_end [ 4]; + u32 pm1_save [ 14]; + + /* WSR handling */ + u32 pm_wsr_handle [ 38]; + u32 wsr_ma_helper [ 4]; + +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + /* Unknown direction handling */ + u32 data_io [ 2]; + u32 data_io_in [ 2]; + u32 data_io_com [ 6]; + u32 data_io_out [ 8]; +#endif + /* Data area */ + u32 zero [ 1]; + u32 scratch [ 1]; + u32 pm0_data_addr [ 1]; + u32 pm1_data_addr [ 1]; + u32 done_pos [ 1]; + u32 startpos [ 1]; + u32 targtbl [ 1]; +}; + +/* + * Script fragments used at initialisations. + * Only runs out of main memory. + */ +struct SYM_FWZ_SCR { + u32 snooptest [ 6]; + u32 snoopend [ 2]; +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + u32 start_ram [ 1]; + u32 scripta0_ba [ 4]; + u32 start_ram64 [ 3]; + u32 scripta0_ba64 [ 3]; + u32 scriptb0_ba64 [ 6]; + u32 ram_seg64 [ 1]; +#endif +}; + +static struct SYM_FWA_SCR SYM_FWA_SCR = { +/*--------------------------< START >----------------------------*/ { + /* + * Switch the LED on. + * Will be patched with a NO_OP if LED + * not needed or not desired. + */ + SCR_REG_REG (gpreg, SCR_AND, 0xfe), + 0, + /* + * Clear SIGP. + */ + SCR_FROM_REG (ctest2), + 0, + /* + * Stop here if the C code wants to perform + * some error recovery procedure manually. + * (Indicate this by setting SEM in ISTAT) + */ + SCR_FROM_REG (istat), + 0, + /* + * Report to the C code the next position in + * the start queue the SCRIPTS will schedule. + * The C code must not change SCRATCHA. + */ + SCR_LOAD_ABS (scratcha, 4), + PADDR_B (startpos), + SCR_INT ^ IFTRUE (MASK (SEM, SEM)), + SIR_SCRIPT_STOPPED, + /* + * Start the next job. + * + * @DSA = start point for this job. + * SCRATCHA = address of this job in the start queue. + * + * We will restore startpos with SCRATCHA if we fails the + * arbitration or if it is the idle job. + * + * The below GETJOB_BEGIN to GETJOB_END section of SCRIPTS + * is a critical path. If it is partially executed, it then + * may happen that the job address is not yet in the DSA + * and the the next queue position points to the next JOB. + */ + SCR_LOAD_ABS (dsa, 4), + PADDR_B (startpos), + SCR_LOAD_REL (temp, 4), + 4, +}/*-------------------------< GETJOB_BEGIN >---------------------*/,{ + SCR_STORE_ABS (temp, 4), + PADDR_B (startpos), + SCR_LOAD_REL (dsa, 4), + 0, +}/*-------------------------< GETJOB_END >-----------------------*/,{ + SCR_LOAD_REL (temp, 4), + 0, + SCR_RETURN, + 0, +}/*-------------------------< SELECT >---------------------------*/,{ + /* + * DSA contains the address of a scheduled + * data structure. + * + * SCRATCHA contains the address of the start queue + * entry which points to the next job. + * + * Set Initiator mode. + * + * (Target mode is left as an exercise for the reader) + */ +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + SCR_CLR (SCR_TRG), + 0, +#endif + /* + * And try to select this target. + */ + SCR_SEL_TBL_ATN ^ offsetof (struct sym_dsb, select), + PADDR_A (ungetjob), + /* + * Now there are 4 possibilities: + * + * (1) The chip looses arbitration. + * This is ok, because it will try again, + * when the bus becomes idle. + * (But beware of the timeout function!) + * + * (2) The chip is reselected. + * Then the script processor takes the jump + * to the RESELECT label. + * + * (3) The chip wins arbitration. + * Then it will execute SCRIPTS instruction until + * the next instruction that checks SCSI phase. + * Then will stop and wait for selection to be + * complete or selection time-out to occur. + * + * After having won arbitration, the SCRIPTS + * processor is able to execute instructions while + * the SCSI core is performing SCSI selection. + */ + /* + * Initialize the status registers + */ + SCR_LOAD_REL (scr0, 4), + offsetof (struct sym_ccb, phys.head.status), + /* + * We may need help from CPU if the DMA segment + * registers aren't up-to-date for this IO. + * Patched with NOOP for chips that donnot + * support DAC addressing. + */ +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 +}/*-------------------------< IS_DMAP_DIRTY >--------------------*/,{ + SCR_FROM_REG (HX_REG), + 0, + SCR_INT ^ IFTRUE (MASK (HX_DMAP_DIRTY, HX_DMAP_DIRTY)), + SIR_DMAP_DIRTY, +#endif +}/*-------------------------< WF_SEL_DONE >----------------------*/,{ + SCR_INT ^ IFFALSE (WHEN (SCR_MSG_OUT)), + SIR_SEL_ATN_NO_MSG_OUT, +}/*-------------------------< SEL_DONE >-------------------------*/,{ + /* + * C1010-33 errata work-around. + * Due to a race, the SCSI core may not have + * loaded SCNTL3 on SEL_TBL instruction. + * We reload it once phase is stable. + * Patched with a NOOP for other chips. + */ + SCR_LOAD_REL (scntl3, 1), + offsetof(struct sym_dsb, select.sel_scntl3), +}/*-------------------------< SEND_IDENT >-----------------------*/,{ + /* + * Selection complete. + * Send the IDENTIFY and possibly the TAG message + * and negotiation message if present. + */ + SCR_MOVE_TBL ^ SCR_MSG_OUT, + offsetof (struct sym_dsb, smsg), +}/*-------------------------< SELECT2 >--------------------------*/,{ +#ifdef SYM_CONF_IARB_SUPPORT + /* + * Set IMMEDIATE ARBITRATION if we have been given + * a hint to do so. (Some job to do after this one). + */ + SCR_FROM_REG (HF_REG), + 0, + SCR_JUMPR ^ IFFALSE (MASK (HF_HINT_IARB, HF_HINT_IARB)), + 8, + SCR_REG_REG (scntl1, SCR_OR, IARB), + 0, +#endif + /* + * Anticipate the COMMAND phase. + * This is the PHASE we expect at this point. + */ + SCR_JUMP ^ IFFALSE (WHEN (SCR_COMMAND)), + PADDR_A (sel_no_cmd), +}/*-------------------------< COMMAND >--------------------------*/,{ + /* + * ... and send the command + */ + SCR_MOVE_TBL ^ SCR_COMMAND, + offsetof (struct sym_dsb, cmd), +}/*-------------------------< DISPATCH >-------------------------*/,{ + /* + * MSG_IN is the only phase that shall be + * entered at least once for each (re)selection. + * So we test it first. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (msg_in), + SCR_JUMP ^ IFTRUE (IF (SCR_DATA_OUT)), + PADDR_A (datao_phase), + SCR_JUMP ^ IFTRUE (IF (SCR_DATA_IN)), + PADDR_A (datai_phase), + SCR_JUMP ^ IFTRUE (IF (SCR_STATUS)), + PADDR_A (status), + SCR_JUMP ^ IFTRUE (IF (SCR_COMMAND)), + PADDR_A (command), + SCR_JUMP ^ IFTRUE (IF (SCR_MSG_OUT)), + PADDR_B (msg_out), + /* + * Discard as many illegal phases as + * required and tell the C code about. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_ILG_OUT)), + 16, + SCR_MOVE_ABS (1) ^ SCR_ILG_OUT, + HADDR_1 (scratch), + SCR_JUMPR ^ IFTRUE (WHEN (SCR_ILG_OUT)), + -16, + SCR_JUMPR ^ IFFALSE (WHEN (SCR_ILG_IN)), + 16, + SCR_MOVE_ABS (1) ^ SCR_ILG_IN, + HADDR_1 (scratch), + SCR_JUMPR ^ IFTRUE (WHEN (SCR_ILG_IN)), + -16, + SCR_INT, + SIR_BAD_PHASE, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< SEL_NO_CMD >-----------------------*/,{ + /* + * The target does not switch to command + * phase after IDENTIFY has been sent. + * + * If it stays in MSG OUT phase send it + * the IDENTIFY again. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_OUT)), + PADDR_B (resend_ident), + /* + * If target does not switch to MSG IN phase + * and we sent a negotiation, assert the + * failure immediately. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (dispatch), + SCR_FROM_REG (HS_REG), + 0, + SCR_INT ^ IFTRUE (DATA (HS_NEGOTIATE)), + SIR_NEGO_FAILED, + /* + * Jump to dispatcher. + */ + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< INIT >-----------------------------*/,{ + /* + * Wait for the SCSI RESET signal to be + * inactive before restarting operations, + * since the chip may hang on SEL_ATN + * if SCSI RESET is active. + */ + SCR_FROM_REG (sstat0), + 0, + SCR_JUMPR ^ IFTRUE (MASK (IRST, IRST)), + -16, + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< CLRACK >---------------------------*/,{ + /* + * Terminate possible pending message phase. + */ + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAI_DONE >-----------------------*/,{ + /* + * Save current pointer to LASTP. + */ + SCR_STORE_REL (temp, 4), + offsetof (struct sym_ccb, phys.head.lastp), + /* + * If the SWIDE is not full, jump to dispatcher. + * We anticipate a STATUS phase. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_JUMP ^ IFTRUE (MASK (WSR, WSR)), + PADDR_A (datai_done_wsr), + SCR_JUMP ^ IFTRUE (WHEN (SCR_STATUS)), + PADDR_A (status), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAI_DONE_WSR >-------------------*/,{ + /* + * The SWIDE is full. + * Clear this condition. + */ + SCR_REG_REG (scntl2, SCR_OR, WSR), + 0, + /* + * We are expecting an IGNORE RESIDUE message + * from the device, otherwise we are in data + * overrun condition. Check against MSG_IN phase. + */ + SCR_INT ^ IFFALSE (WHEN (SCR_MSG_IN)), + SIR_SWIDE_OVERRUN, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_IN)), + PADDR_A (dispatch), + /* + * We are in MSG_IN phase, + * Read the first byte of the message. + * If it is not an IGNORE RESIDUE message, + * signal overrun and jump to message + * processing. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[0]), + SCR_INT ^ IFFALSE (DATA (M_IGN_RESIDUE)), + SIR_SWIDE_OVERRUN, + SCR_JUMP ^ IFFALSE (DATA (M_IGN_RESIDUE)), + PADDR_A (msg_in2), + /* + * We got the message we expected. + * Read the 2nd byte, and jump to dispatcher. + */ + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[1]), + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAO_DONE >-----------------------*/,{ + /* + * Save current pointer to LASTP. + */ + SCR_STORE_REL (temp, 4), + offsetof (struct sym_ccb, phys.head.lastp), + /* + * If the SODL is not full jump to dispatcher. + * We anticipate a STATUS phase. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_JUMP ^ IFTRUE (MASK (WSS, WSS)), + PADDR_A (datao_done_wss), + SCR_JUMP ^ IFTRUE (WHEN (SCR_STATUS)), + PADDR_A (status), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAO_DONE_WSS >-------------------*/,{ + /* + * The SODL is full, clear this condition. + */ + SCR_REG_REG (scntl2, SCR_OR, WSS), + 0, + /* + * And signal a DATA UNDERRUN condition + * to the C code. + */ + SCR_INT, + SIR_SODL_UNDERRUN, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATAI_PHASE >----------------------*/,{ + /* + * Jump to current pointer. + */ + SCR_LOAD_REL (temp, 4), + offsetof (struct sym_ccb, phys.head.lastp), + SCR_RETURN, + 0, +}/*-------------------------< DATAO_PHASE >----------------------*/,{ + /* + * C1010-66 errata work-around. + * Extra clocks of data hold must be inserted + * in DATA OUT phase on 33 MHz PCI BUS. + * Patched with a NOOP for other chips. + */ + SCR_REG_REG (scntl4, SCR_OR, (XCLKH_DT|XCLKH_ST)), + 0, + /* + * Jump to current pointer. + */ + SCR_LOAD_REL (temp, 4), + offsetof (struct sym_ccb, phys.head.lastp), + SCR_RETURN, + 0, +}/*-------------------------< MSG_IN >---------------------------*/,{ + /* + * Get the first byte of the message. + * + * The script processor doesn't negate the + * ACK signal after this transfer. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[0]), +}/*-------------------------< MSG_IN2 >--------------------------*/,{ + /* + * Check first against 1 byte messages + * that we handle from SCRIPTS. + */ + SCR_JUMP ^ IFTRUE (DATA (M_COMPLETE)), + PADDR_A (complete), + SCR_JUMP ^ IFTRUE (DATA (M_DISCONNECT)), + PADDR_A (disconnect), + SCR_JUMP ^ IFTRUE (DATA (M_SAVE_DP)), + PADDR_A (save_dp), + SCR_JUMP ^ IFTRUE (DATA (M_RESTORE_DP)), + PADDR_A (restore_dp), + /* + * We handle all other messages from the + * C code, so no need to waste on-chip RAM + * for those ones. + */ + SCR_JUMP, + PADDR_B (msg_in_etc), +}/*-------------------------< STATUS >---------------------------*/,{ + /* + * get the status + */ + SCR_MOVE_ABS (1) ^ SCR_STATUS, + HADDR_1 (scratch), +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If STATUS is not GOOD, clear IMMEDIATE ARBITRATION, + * since we may have to tamper the start queue from + * the C code. + */ + SCR_JUMPR ^ IFTRUE (DATA (S_GOOD)), + 8, + SCR_REG_REG (scntl1, SCR_AND, ~IARB), + 0, +#endif + /* + * save status to scsi_status. + * mark as complete. + */ + SCR_TO_REG (SS_REG), + 0, + SCR_LOAD_REG (HS_REG, HS_COMPLETE), + 0, + /* + * Anticipate the MESSAGE PHASE for + * the TASK COMPLETE message. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (msg_in), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< COMPLETE >-------------------------*/,{ + /* + * Complete message. + * + * When we terminate the cycle by clearing ACK, + * the target may disconnect immediately. + * + * We don't want to be told of an "unexpected disconnect", + * so we disable this feature. + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + /* + * Terminate cycle ... + */ + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + /* + * ... and wait for the disconnect. + */ + SCR_WAIT_DISC, + 0, +}/*-------------------------< COMPLETE2 >------------------------*/,{ + /* + * Save host status. + */ + SCR_STORE_REL (scr0, 4), + offsetof (struct sym_ccb, phys.head.status), + /* + * Some bridges may reorder DMA writes to memory. + * We donnot want the CPU to deal with completions + * without all the posted write having been flushed + * to memory. This DUMMY READ should flush posted + * buffers prior to the CPU having to deal with + * completions. + */ + SCR_LOAD_REL (scr0, 4), /* DUMMY READ */ + offsetof (struct sym_ccb, phys.head.status), + + /* + * If command resulted in not GOOD status, + * call the C code if needed. + */ + SCR_FROM_REG (SS_REG), + 0, + SCR_CALL ^ IFFALSE (DATA (S_GOOD)), + PADDR_B (bad_status), + /* + * If we performed an auto-sense, call + * the C code to synchronyze task aborts + * with UNIT ATTENTION conditions. + */ + SCR_FROM_REG (HF_REG), + 0, + SCR_JUMP ^ IFFALSE (MASK (0 ,(HF_SENSE|HF_EXT_ERR))), + PADDR_A (complete_error), +}/*-------------------------< DONE >-----------------------------*/,{ + /* + * Copy the DSA to the DONE QUEUE and + * signal completion to the host. + * If we are interrupted between DONE + * and DONE_END, we must reset, otherwise + * the completed CCB may be lost. + */ + SCR_STORE_ABS (dsa, 4), + PADDR_B (scratch), + SCR_LOAD_ABS (dsa, 4), + PADDR_B (done_pos), + SCR_LOAD_ABS (scratcha, 4), + PADDR_B (scratch), + SCR_STORE_REL (scratcha, 4), + 0, + /* + * The instruction below reads the DONE QUEUE next + * free position from memory. + * In addition it ensures that all PCI posted writes + * are flushed and so the DSA value of the done + * CCB is visible by the CPU before INTFLY is raised. + */ + SCR_LOAD_REL (scratcha, 4), + 4, + SCR_INT_FLY, + 0, + SCR_STORE_ABS (scratcha, 4), + PADDR_B (done_pos), +}/*-------------------------< DONE_END >-------------------------*/,{ + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< COMPLETE_ERROR >-------------------*/,{ + SCR_LOAD_ABS (scratcha, 4), + PADDR_B (startpos), + SCR_INT, + SIR_COMPLETE_ERROR, +}/*-------------------------< SAVE_DP >--------------------------*/,{ + /* + * Clear ACK immediately. + * No need to delay it. + */ + SCR_CLR (SCR_ACK), + 0, + /* + * Keep track we received a SAVE DP, so + * we will switch to the other PM context + * on the next PM since the DP may point + * to the current PM context. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_DP_SAVED), + 0, + /* + * SAVE_DP message: + * Copy LASTP to SAVEP. + */ + SCR_LOAD_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.lastp), + SCR_STORE_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.savep), + /* + * Anticipate the MESSAGE PHASE for + * the DISCONNECT message. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_IN)), + PADDR_A (msg_in), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< RESTORE_DP >-----------------------*/,{ + /* + * Clear ACK immediately. + * No need to delay it. + */ + SCR_CLR (SCR_ACK), + 0, + /* + * Copy SAVEP to LASTP. + */ + SCR_LOAD_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.savep), + SCR_STORE_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.lastp), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DISCONNECT >-----------------------*/,{ + /* + * DISCONNECTing ... + * + * disable the "unexpected disconnect" feature, + * and remove the ACK signal. + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + /* + * Wait for the disconnect. + */ + SCR_WAIT_DISC, + 0, + /* + * Status is: DISCONNECTED. + */ + SCR_LOAD_REG (HS_REG, HS_DISCONNECT), + 0, + /* + * Save host status. + */ + SCR_STORE_REL (scr0, 4), + offsetof (struct sym_ccb, phys.head.status), + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< IDLE >-----------------------------*/,{ + /* + * Nothing to do? + * Switch the LED off and wait for reselect. + * Will be patched with a NO_OP if LED + * not needed or not desired. + */ + SCR_REG_REG (gpreg, SCR_OR, 0x01), + 0, +#ifdef SYM_CONF_IARB_SUPPORT + SCR_JUMPR, + 8, +#endif +}/*-------------------------< UNGETJOB >-------------------------*/,{ +#ifdef SYM_CONF_IARB_SUPPORT + /* + * Set IMMEDIATE ARBITRATION, for the next time. + * This will give us better chance to win arbitration + * for the job we just wanted to do. + */ + SCR_REG_REG (scntl1, SCR_OR, IARB), + 0, +#endif + /* + * We are not able to restart the SCRIPTS if we are + * interrupted and these instruction haven't been + * all executed. BTW, this is very unlikely to + * happen, but we check that from the C code. + */ + SCR_LOAD_REG (dsa, 0xff), + 0, + SCR_STORE_ABS (scratcha, 4), + PADDR_B (startpos), +}/*-------------------------< RESELECT >-------------------------*/,{ +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + /* + * Make sure we are in initiator mode. + */ + SCR_CLR (SCR_TRG), + 0, +#endif + /* + * Sleep waiting for a reselection. + */ + SCR_WAIT_RESEL, + PADDR_A(start), +}/*-------------------------< RESELECTED >-----------------------*/,{ + /* + * Switch the LED on. + * Will be patched with a NO_OP if LED + * not needed or not desired. + */ + SCR_REG_REG (gpreg, SCR_AND, 0xfe), + 0, + /* + * load the target id into the sdid + */ + SCR_REG_SFBR (ssid, SCR_AND, 0x8F), + 0, + SCR_TO_REG (sdid), + 0, + /* + * Load the target control block address + */ + SCR_LOAD_ABS (dsa, 4), + PADDR_B (targtbl), + SCR_SFBR_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_AND, 0x3c), + 0, + SCR_LOAD_REL (dsa, 4), + 0, + /* + * We expect MESSAGE IN phase. + * If not, get help from the C code. + */ + SCR_INT ^ IFFALSE (WHEN (SCR_MSG_IN)), + SIR_RESEL_NO_MSG_IN, + /* + * Load the legacy synchronous transfer registers. + */ + SCR_LOAD_REL (scntl3, 1), + offsetof(struct sym_tcb, head.wval), + SCR_LOAD_REL (sxfer, 1), + offsetof(struct sym_tcb, head.sval), +}/*-------------------------< RESEL_SCNTL4 >---------------------*/,{ + /* + * The C1010 uses a new synchronous timing scheme. + * Will be patched with a NO_OP if not a C1010. + */ + SCR_LOAD_REL (scntl4, 1), + offsetof(struct sym_tcb, head.uval), + /* + * Get the IDENTIFY message. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin), + /* + * If IDENTIFY LUN #0, use a faster path + * to find the LCB structure. + */ + SCR_JUMP ^ IFTRUE (MASK (0x80, 0xbf)), + PADDR_A (resel_lun0), + /* + * If message isn't an IDENTIFY, + * tell the C code about. + */ + SCR_INT ^ IFFALSE (MASK (0x80, 0x80)), + SIR_RESEL_NO_IDENTIFY, + /* + * It is an IDENTIFY message, + * Load the LUN control block address. + */ + SCR_LOAD_REL (dsa, 4), + offsetof(struct sym_tcb, head.luntbl_sa), + SCR_SFBR_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_SHL, 0), + 0, + SCR_REG_REG (dsa, SCR_AND, 0xfc), + 0, + SCR_LOAD_REL (dsa, 4), + 0, + SCR_JUMPR, + 8, +}/*-------------------------< RESEL_LUN0 >-----------------------*/,{ + /* + * LUN 0 special case (but usual one :)) + */ + SCR_LOAD_REL (dsa, 4), + offsetof(struct sym_tcb, head.lun0_sa), + /* + * Jump indirectly to the reselect action for this LUN. + */ + SCR_LOAD_REL (temp, 4), + offsetof(struct sym_lcb, head.resel_sa), + SCR_RETURN, + 0, + /* In normal situations, we jump to RESEL_TAG or RESEL_NO_TAG */ +}/*-------------------------< RESEL_TAG >------------------------*/,{ + /* + * ACK the IDENTIFY previously received. + */ + SCR_CLR (SCR_ACK), + 0, + /* + * It shall be a tagged command. + * Read SIMPLE+TAG. + * The C code will deal with errors. + * Agressive optimization, is'nt it? :) + */ + SCR_MOVE_ABS (2) ^ SCR_MSG_IN, + HADDR_1 (msgin), + /* + * Load the pointer to the tagged task + * table for this LUN. + */ + SCR_LOAD_REL (dsa, 4), + offsetof(struct sym_lcb, head.itlq_tbl_sa), + /* + * The SIDL still contains the TAG value. + * Agressive optimization, isn't it? :):) + */ + SCR_REG_SFBR (sidl, SCR_SHL, 0), + 0, +#if SYM_CONF_MAX_TASK*4 > 512 + SCR_JUMPR ^ IFFALSE (CARRYSET), + 8, + SCR_REG_REG (dsa1, SCR_OR, 2), + 0, + SCR_REG_REG (sfbr, SCR_SHL, 0), + 0, + SCR_JUMPR ^ IFFALSE (CARRYSET), + 8, + SCR_REG_REG (dsa1, SCR_OR, 1), + 0, +#elif SYM_CONF_MAX_TASK*4 > 256 + SCR_JUMPR ^ IFFALSE (CARRYSET), + 8, + SCR_REG_REG (dsa1, SCR_OR, 1), + 0, +#endif + /* + * Retrieve the DSA of this task. + * JUMP indirectly to the restart point of the CCB. + */ + SCR_SFBR_REG (dsa, SCR_AND, 0xfc), + 0, + SCR_LOAD_REL (dsa, 4), + 0, + SCR_LOAD_REL (temp, 4), + offsetof(struct sym_ccb, phys.head.go.restart), + SCR_RETURN, + 0, + /* In normal situations we branch to RESEL_DSA */ +}/*-------------------------< RESEL_DSA >------------------------*/,{ + /* + * ACK the IDENTIFY or TAG previously received. + */ + SCR_CLR (SCR_ACK), + 0, +}/*-------------------------< RESEL_DSA1 >-----------------------*/,{ + /* + * Initialize the status registers + */ + SCR_LOAD_REL (scr0, 4), + offsetof (struct sym_ccb, phys.head.status), + /* + * Jump to dispatcher. + */ + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< RESEL_NO_TAG >---------------------*/,{ + /* + * Load the DSA with the unique ITL task. + */ + SCR_LOAD_REL (dsa, 4), + offsetof(struct sym_lcb, head.itl_task_sa), + /* + * JUMP indirectly to the restart point of the CCB. + */ + SCR_LOAD_REL (temp, 4), + offsetof(struct sym_ccb, phys.head.go.restart), + SCR_RETURN, + 0, + /* In normal situations we branch to RESEL_DSA */ +}/*-------------------------< DATA_IN >--------------------------*/,{ +/* + * Because the size depends on the + * #define SYM_CONF_MAX_SG parameter, + * it is filled in at runtime. + * + * ##===========< i=0; i<SYM_CONF_MAX_SG >========= + * || SCR_CHMOV_TBL ^ SCR_DATA_IN, + * || offsetof (struct sym_dsb, data[ i]), + * ##========================================== + */ +0 +}/*-------------------------< DATA_IN2 >-------------------------*/,{ + SCR_CALL, + PADDR_A (datai_done), + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< DATA_OUT >-------------------------*/,{ +/* + * Because the size depends on the + * #define SYM_CONF_MAX_SG parameter, + * it is filled in at runtime. + * + * ##===========< i=0; i<SYM_CONF_MAX_SG >========= + * || SCR_CHMOV_TBL ^ SCR_DATA_OUT, + * || offsetof (struct sym_dsb, data[ i]), + * ##========================================== + */ +0 +}/*-------------------------< DATA_OUT2 >------------------------*/,{ + SCR_CALL, + PADDR_A (datao_done), + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< PM0_DATA >-------------------------*/,{ + /* + * Read our host flags to SFBR, so we will be able + * to check against the data direction we expect. + */ + SCR_FROM_REG (HF_REG), + 0, + /* + * Check against actual DATA PHASE. + */ + SCR_JUMP ^ IFFALSE (WHEN (SCR_DATA_IN)), + PADDR_A (pm0_data_out), + /* + * Actual phase is DATA IN. + * Check against expected direction. + */ + SCR_JUMP ^ IFFALSE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM0 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM0), + 0, + /* + * Move the data to memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.pm0.sg), + SCR_JUMP, + PADDR_A (pm0_data_end), +}/*-------------------------< PM0_DATA_OUT >---------------------*/,{ + /* + * Actual phase is DATA OUT. + * Check against expected direction. + */ + SCR_JUMP ^ IFTRUE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM0 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM0), + 0, + /* + * Move the data from memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_OUT, + offsetof (struct sym_ccb, phys.pm0.sg), +}/*-------------------------< PM0_DATA_END >---------------------*/,{ + /* + * Clear the flag that told we were moving + * data from the PM0 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_AND, (~HF_IN_PM0)), + 0, + /* + * Return to the previous DATA script which + * is guaranteed by design (if no bug) to be + * the main DATA script for this transfer. + */ + SCR_LOAD_REL (temp, 4), + offsetof (struct sym_ccb, phys.pm0.ret), + SCR_RETURN, + 0, +}/*-------------------------< PM1_DATA >-------------------------*/,{ + /* + * Read our host flags to SFBR, so we will be able + * to check against the data direction we expect. + */ + SCR_FROM_REG (HF_REG), + 0, + /* + * Check against actual DATA PHASE. + */ + SCR_JUMP ^ IFFALSE (WHEN (SCR_DATA_IN)), + PADDR_A (pm1_data_out), + /* + * Actual phase is DATA IN. + * Check against expected direction. + */ + SCR_JUMP ^ IFFALSE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM1 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM1), + 0, + /* + * Move the data to memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.pm1.sg), + SCR_JUMP, + PADDR_A (pm1_data_end), +}/*-------------------------< PM1_DATA_OUT >---------------------*/,{ + /* + * Actual phase is DATA OUT. + * Check against expected direction. + */ + SCR_JUMP ^ IFTRUE (MASK (HF_DATA_IN, HF_DATA_IN)), + PADDR_B (data_ovrun), + /* + * Keep track we are moving data from the + * PM1 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_OR, HF_IN_PM1), + 0, + /* + * Move the data from memory. + */ + SCR_CHMOV_TBL ^ SCR_DATA_OUT, + offsetof (struct sym_ccb, phys.pm1.sg), +}/*-------------------------< PM1_DATA_END >---------------------*/,{ + /* + * Clear the flag that told we were moving + * data from the PM1 DATA mini-script. + */ + SCR_REG_REG (HF_REG, SCR_AND, (~HF_IN_PM1)), + 0, + /* + * Return to the previous DATA script which + * is guaranteed by design (if no bug) to be + * the main DATA script for this transfer. + */ + SCR_LOAD_REL (temp, 4), + offsetof (struct sym_ccb, phys.pm1.ret), + SCR_RETURN, + 0, +}/*-------------------------<>-----------------------------------*/ +}; + +static struct SYM_FWB_SCR SYM_FWB_SCR = { +/*--------------------------< START64 >--------------------------*/ { + /* + * SCRIPT entry point for the 895A, 896 and 1010. + * For now, there is no specific stuff for those + * chips at this point, but this may come. + */ + SCR_JUMP, + PADDR_A (init), +}/*-------------------------< NO_DATA >--------------------------*/,{ + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< SEL_FOR_ABORT >--------------------*/,{ + /* + * We are jumped here by the C code, if we have + * some target to reset or some disconnected + * job to abort. Since error recovery is a serious + * busyness, we will really reset the SCSI BUS, if + * case of a SCSI interrupt occuring in this path. + */ +#ifdef SYM_CONF_TARGET_ROLE_SUPPORT + /* + * Set initiator mode. + */ + SCR_CLR (SCR_TRG), + 0, +#endif + /* + * And try to select this target. + */ + SCR_SEL_TBL_ATN ^ offsetof (struct sym_hcb, abrt_sel), + PADDR_A (reselect), + /* + * Wait for the selection to complete or + * the selection to time out. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_MSG_OUT)), + -8, + /* + * Call the C code. + */ + SCR_INT, + SIR_TARGET_SELECTED, + /* + * The C code should let us continue here. + * Send the 'kiss of death' message. + * We expect an immediate disconnect once + * the target has eaten the message. + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + SCR_MOVE_TBL ^ SCR_MSG_OUT, + offsetof (struct sym_hcb, abrt_tbl), + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + SCR_WAIT_DISC, + 0, + /* + * Tell the C code that we are done. + */ + SCR_INT, + SIR_ABORT_SENT, +}/*-------------------------< SEL_FOR_ABORT_1 >------------------*/,{ + /* + * Jump at scheduler. + */ + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< MSG_IN_ETC >-----------------------*/,{ + /* + * If it is an EXTENDED (variable size message) + * Handle it. + */ + SCR_JUMP ^ IFTRUE (DATA (M_EXTENDED)), + PADDR_B (msg_extended), + /* + * Let the C code handle any other + * 1 byte message. + */ + SCR_JUMP ^ IFTRUE (MASK (0x00, 0xf0)), + PADDR_B (msg_received), + SCR_JUMP ^ IFTRUE (MASK (0x10, 0xf0)), + PADDR_B (msg_received), + /* + * We donnot handle 2 bytes messages from SCRIPTS. + * So, let the C code deal with these ones too. + */ + SCR_JUMP ^ IFFALSE (MASK (0x20, 0xf0)), + PADDR_B (msg_weird_seen), + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[1]), +}/*-------------------------< MSG_RECEIVED >---------------------*/,{ + SCR_LOAD_REL (scratcha, 4), /* DUMMY READ */ + 0, + SCR_INT, + SIR_MSG_RECEIVED, +}/*-------------------------< MSG_WEIRD_SEEN >-------------------*/,{ + SCR_LOAD_REL (scratcha, 4), /* DUMMY READ */ + 0, + SCR_INT, + SIR_MSG_WEIRD, +}/*-------------------------< MSG_EXTENDED >---------------------*/,{ + /* + * Clear ACK and get the next byte + * assumed to be the message length. + */ + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (msgin[1]), + /* + * Try to catch some unlikely situations as 0 length + * or too large the length. + */ + SCR_JUMP ^ IFTRUE (DATA (0)), + PADDR_B (msg_weird_seen), + SCR_TO_REG (scratcha), + 0, + SCR_REG_REG (sfbr, SCR_ADD, (256-8)), + 0, + SCR_JUMP ^ IFTRUE (CARRYSET), + PADDR_B (msg_weird_seen), + /* + * We donnot handle extended messages from SCRIPTS. + * Read the amount of data correponding to the + * message length and call the C code. + */ + SCR_STORE_REL (scratcha, 1), + offsetof (struct sym_dsb, smsg_ext.size), + SCR_CLR (SCR_ACK), + 0, + SCR_MOVE_TBL ^ SCR_MSG_IN, + offsetof (struct sym_dsb, smsg_ext), + SCR_JUMP, + PADDR_B (msg_received), +}/*-------------------------< MSG_BAD >--------------------------*/,{ + /* + * unimplemented message - reject it. + */ + SCR_INT, + SIR_REJECT_TO_SEND, + SCR_SET (SCR_ATN), + 0, + SCR_JUMP, + PADDR_A (clrack), +}/*-------------------------< MSG_WEIRD >------------------------*/,{ + /* + * weird message received + * ignore all MSG IN phases and reject it. + */ + SCR_INT, + SIR_REJECT_TO_SEND, + SCR_SET (SCR_ATN), + 0, +}/*-------------------------< MSG_WEIRD1 >-----------------------*/,{ + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_IN)), + PADDR_A (dispatch), + SCR_MOVE_ABS (1) ^ SCR_MSG_IN, + HADDR_1 (scratch), + SCR_JUMP, + PADDR_B (msg_weird1), +}/*-------------------------< WDTR_RESP >------------------------*/,{ + /* + * let the target fetch our answer. + */ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_OUT)), + PADDR_B (nego_bad_phase), +}/*-------------------------< SEND_WDTR >------------------------*/,{ + /* + * Send the M_X_WIDE_REQ + */ + SCR_MOVE_ABS (4) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_JUMP, + PADDR_B (msg_out_done), +}/*-------------------------< SDTR_RESP >------------------------*/,{ + /* + * let the target fetch our answer. + */ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_OUT)), + PADDR_B (nego_bad_phase), +}/*-------------------------< SEND_SDTR >------------------------*/,{ + /* + * Send the M_X_SYNC_REQ + */ + SCR_MOVE_ABS (5) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_JUMP, + PADDR_B (msg_out_done), +}/*-------------------------< PPR_RESP >-------------------------*/,{ + /* + * let the target fetch our answer. + */ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + SCR_JUMP ^ IFFALSE (WHEN (SCR_MSG_OUT)), + PADDR_B (nego_bad_phase), +}/*-------------------------< SEND_PPR >-------------------------*/,{ + /* + * Send the M_X_PPR_REQ + */ + SCR_MOVE_ABS (8) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_JUMP, + PADDR_B (msg_out_done), +}/*-------------------------< NEGO_BAD_PHASE >-------------------*/,{ + SCR_INT, + SIR_NEGO_PROTO, + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< MSG_OUT >--------------------------*/,{ + /* + * The target requests a message. + * We donnot send messages that may + * require the device to go to bus free. + */ + SCR_MOVE_ABS (1) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + /* + * ... wait for the next phase + * if it's a message out, send it again, ... + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_MSG_OUT)), + PADDR_B (msg_out), +}/*-------------------------< MSG_OUT_DONE >---------------------*/,{ + /* + * Let the C code be aware of the + * sent message and clear the message. + */ + SCR_INT, + SIR_MSG_OUT_DONE, + /* + * ... and process the next phase + */ + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< DATA_OVRUN >-----------------------*/,{ + /* + * Use scratcha to count the extra bytes. + */ + SCR_LOAD_ABS (scratcha, 4), + PADDR_B (zero), +}/*-------------------------< DATA_OVRUN1 >----------------------*/,{ + /* + * The target may want to transfer too much data. + * + * If phase is DATA OUT write 1 byte and count it. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_DATA_OUT)), + 16, + SCR_CHMOV_ABS (1) ^ SCR_DATA_OUT, + HADDR_1 (scratch), + SCR_JUMP, + PADDR_B (data_ovrun2), + /* + * If WSR is set, clear this condition, and + * count this byte. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_JUMPR ^ IFFALSE (MASK (WSR, WSR)), + 16, + SCR_REG_REG (scntl2, SCR_OR, WSR), + 0, + SCR_JUMP, + PADDR_B (data_ovrun2), + /* + * Finally check against DATA IN phase. + * Signal data overrun to the C code + * and jump to dispatcher if not so. + * Read 1 byte otherwise and count it. + */ + SCR_JUMPR ^ IFTRUE (WHEN (SCR_DATA_IN)), + 16, + SCR_INT, + SIR_DATA_OVERRUN, + SCR_JUMP, + PADDR_A (dispatch), + SCR_CHMOV_ABS (1) ^ SCR_DATA_IN, + HADDR_1 (scratch), +}/*-------------------------< DATA_OVRUN2 >----------------------*/,{ + /* + * Count this byte. + * This will allow to return a negative + * residual to user. + */ + SCR_REG_REG (scratcha, SCR_ADD, 0x01), + 0, + SCR_REG_REG (scratcha1, SCR_ADDC, 0), + 0, + SCR_REG_REG (scratcha2, SCR_ADDC, 0), + 0, + /* + * .. and repeat as required. + */ + SCR_JUMP, + PADDR_B (data_ovrun1), +}/*-------------------------< ABORT_RESEL >----------------------*/,{ + SCR_SET (SCR_ATN), + 0, + SCR_CLR (SCR_ACK), + 0, + /* + * send the abort/abortag/reset message + * we expect an immediate disconnect + */ + SCR_REG_REG (scntl2, SCR_AND, 0x7f), + 0, + SCR_MOVE_ABS (1) ^ SCR_MSG_OUT, + HADDR_1 (msgout), + SCR_CLR (SCR_ACK|SCR_ATN), + 0, + SCR_WAIT_DISC, + 0, + SCR_INT, + SIR_RESEL_ABORTED, + SCR_JUMP, + PADDR_A (start), +}/*-------------------------< RESEND_IDENT >---------------------*/,{ + /* + * The target stays in MSG OUT phase after having acked + * Identify [+ Tag [+ Extended message ]]. Targets shall + * behave this way on parity error. + * We must send it again all the messages. + */ + SCR_SET (SCR_ATN), /* Shall be asserted 2 deskew delays before the */ + 0, /* 1rst ACK = 90 ns. Hope the chip isn't too fast */ + SCR_JUMP, + PADDR_A (send_ident), +}/*-------------------------< IDENT_BREAK >----------------------*/,{ + SCR_CLR (SCR_ATN), + 0, + SCR_JUMP, + PADDR_A (select2), +}/*-------------------------< IDENT_BREAK_ATN >------------------*/,{ + SCR_SET (SCR_ATN), + 0, + SCR_JUMP, + PADDR_A (select2), +}/*-------------------------< SDATA_IN >-------------------------*/,{ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_dsb, sense), + SCR_CALL, + PADDR_A (datai_done), + SCR_JUMP, + PADDR_B (data_ovrun), +}/*-------------------------< RESEL_BAD_LUN >--------------------*/,{ + /* + * Message is an IDENTIFY, but lun is unknown. + * Signal problem to C code for logging the event. + * Send a M_ABORT to clear all pending tasks. + */ + SCR_INT, + SIR_RESEL_BAD_LUN, + SCR_JUMP, + PADDR_B (abort_resel), +}/*-------------------------< BAD_I_T_L >------------------------*/,{ + /* + * We donnot have a task for that I_T_L. + * Signal problem to C code for logging the event. + * Send a M_ABORT message. + */ + SCR_INT, + SIR_RESEL_BAD_I_T_L, + SCR_JUMP, + PADDR_B (abort_resel), +}/*-------------------------< BAD_I_T_L_Q >----------------------*/,{ + /* + * We donnot have a task that matches the tag. + * Signal problem to C code for logging the event. + * Send a M_ABORTTAG message. + */ + SCR_INT, + SIR_RESEL_BAD_I_T_L_Q, + SCR_JUMP, + PADDR_B (abort_resel), +}/*-------------------------< BAD_STATUS >-----------------------*/,{ + /* + * Anything different from INTERMEDIATE + * CONDITION MET should be a bad SCSI status, + * given that GOOD status has already been tested. + * Call the C code. + */ + SCR_LOAD_ABS (scratcha, 4), + PADDR_B (startpos), + SCR_INT ^ IFFALSE (DATA (S_COND_MET)), + SIR_BAD_SCSI_STATUS, + SCR_RETURN, + 0, +}/*-------------------------< PM_HANDLE >------------------------*/,{ + /* + * Phase mismatch handling. + * + * Since we have to deal with 2 SCSI data pointers + * (current and saved), we need at least 2 contexts. + * Each context (pm0 and pm1) has a saved area, a + * SAVE mini-script and a DATA phase mini-script. + */ + /* + * Get the PM handling flags. + */ + SCR_FROM_REG (HF_REG), + 0, + /* + * If no flags (1rst PM for example), avoid + * all the below heavy flags testing. + * This makes the normal case a bit faster. + */ + SCR_JUMP ^ IFTRUE (MASK (0, (HF_IN_PM0 | HF_IN_PM1 | HF_DP_SAVED))), + PADDR_B (pm_handle1), + /* + * If we received a SAVE DP, switch to the + * other PM context since the savep may point + * to the current PM context. + */ + SCR_JUMPR ^ IFFALSE (MASK (HF_DP_SAVED, HF_DP_SAVED)), + 8, + SCR_REG_REG (sfbr, SCR_XOR, HF_ACT_PM), + 0, + /* + * If we have been interrupt in a PM DATA mini-script, + * we take the return address from the corresponding + * saved area. + * This ensure the return address always points to the + * main DATA script for this transfer. + */ + SCR_JUMP ^ IFTRUE (MASK (0, (HF_IN_PM0 | HF_IN_PM1))), + PADDR_B (pm_handle1), + SCR_JUMPR ^ IFFALSE (MASK (HF_IN_PM0, HF_IN_PM0)), + 16, + SCR_LOAD_REL (ia, 4), + offsetof(struct sym_ccb, phys.pm0.ret), + SCR_JUMP, + PADDR_B (pm_save), + SCR_LOAD_REL (ia, 4), + offsetof(struct sym_ccb, phys.pm1.ret), + SCR_JUMP, + PADDR_B (pm_save), +}/*-------------------------< PM_HANDLE1 >-----------------------*/,{ + /* + * Normal case. + * Update the return address so that it + * will point after the interrupted MOVE. + */ + SCR_REG_REG (ia, SCR_ADD, 8), + 0, + SCR_REG_REG (ia1, SCR_ADDC, 0), + 0, +}/*-------------------------< PM_SAVE >--------------------------*/,{ + /* + * Clear all the flags that told us if we were + * interrupted in a PM DATA mini-script and/or + * we received a SAVE DP. + */ + SCR_SFBR_REG (HF_REG, SCR_AND, (~(HF_IN_PM0|HF_IN_PM1|HF_DP_SAVED))), + 0, + /* + * Choose the current PM context. + */ + SCR_JUMP ^ IFTRUE (MASK (HF_ACT_PM, HF_ACT_PM)), + PADDR_B (pm1_save), +}/*-------------------------< PM0_SAVE >-------------------------*/,{ + SCR_STORE_REL (ia, 4), + offsetof(struct sym_ccb, phys.pm0.ret), + /* + * If WSR bit is set, either UA and RBC may + * have to be changed whether the device wants + * to ignore this residue or not. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_CALL ^ IFTRUE (MASK (WSR, WSR)), + PADDR_B (pm_wsr_handle), + /* + * Save the remaining byte count, the updated + * address and the return address. + */ + SCR_STORE_REL (rbc, 4), + offsetof(struct sym_ccb, phys.pm0.sg.size), + SCR_STORE_REL (ua, 4), + offsetof(struct sym_ccb, phys.pm0.sg.addr), + /* + * Set the current pointer at the PM0 DATA mini-script. + */ + SCR_LOAD_ABS (ia, 4), + PADDR_B (pm0_data_addr), +}/*-------------------------< PM_SAVE_END >----------------------*/,{ + SCR_STORE_REL (ia, 4), + offsetof(struct sym_ccb, phys.head.lastp), + SCR_JUMP, + PADDR_A (dispatch), +}/*-------------------------< PM1_SAVE >-------------------------*/,{ + SCR_STORE_REL (ia, 4), + offsetof(struct sym_ccb, phys.pm1.ret), + /* + * If WSR bit is set, either UA and RBC may + * have to be changed whether the device wants + * to ignore this residue or not. + */ + SCR_FROM_REG (scntl2), + 0, + SCR_CALL ^ IFTRUE (MASK (WSR, WSR)), + PADDR_B (pm_wsr_handle), + /* + * Save the remaining byte count, the updated + * address and the return address. + */ + SCR_STORE_REL (rbc, 4), + offsetof(struct sym_ccb, phys.pm1.sg.size), + SCR_STORE_REL (ua, 4), + offsetof(struct sym_ccb, phys.pm1.sg.addr), + /* + * Set the current pointer at the PM1 DATA mini-script. + */ + SCR_LOAD_ABS (ia, 4), + PADDR_B (pm1_data_addr), + SCR_JUMP, + PADDR_B (pm_save_end), +}/*-------------------------< PM_WSR_HANDLE >--------------------*/,{ + /* + * Phase mismatch handling from SCRIPT with WSR set. + * Such a condition can occur if the chip wants to + * execute a CHMOV(size > 1) when the WSR bit is + * set and the target changes PHASE. + * + * We must move the residual byte to memory. + * + * UA contains bit 0..31 of the address to + * move the residual byte. + * Move it to the table indirect. + */ + SCR_STORE_REL (ua, 4), + offsetof (struct sym_ccb, phys.wresid.addr), + /* + * Increment UA (move address to next position). + */ + SCR_REG_REG (ua, SCR_ADD, 1), + 0, + SCR_REG_REG (ua1, SCR_ADDC, 0), + 0, + SCR_REG_REG (ua2, SCR_ADDC, 0), + 0, + SCR_REG_REG (ua3, SCR_ADDC, 0), + 0, + /* + * Compute SCRATCHA as: + * - size to transfer = 1 byte. + * - bit 24..31 = high address bit [32...39]. + */ + SCR_LOAD_ABS (scratcha, 4), + PADDR_B (zero), + SCR_REG_REG (scratcha, SCR_OR, 1), + 0, + SCR_FROM_REG (rbc3), + 0, + SCR_TO_REG (scratcha3), + 0, + /* + * Move this value to the table indirect. + */ + SCR_STORE_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.wresid.size), + /* + * Wait for a valid phase. + * While testing with bogus QUANTUM drives, the C1010 + * sometimes raised a spurious phase mismatch with + * WSR and the CHMOV(1) triggered another PM. + * Waiting explicitely for the PHASE seemed to avoid + * the nested phase mismatch. Btw, this didn't happen + * using my IBM drives. + */ + SCR_JUMPR ^ IFFALSE (WHEN (SCR_DATA_IN)), + 0, + /* + * Perform the move of the residual byte. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.wresid), + /* + * We can now handle the phase mismatch with UA fixed. + * RBC[0..23]=0 is a special case that does not require + * a PM context. The C code also checks against this. + */ + SCR_FROM_REG (rbc), + 0, + SCR_RETURN ^ IFFALSE (DATA (0)), + 0, + SCR_FROM_REG (rbc1), + 0, + SCR_RETURN ^ IFFALSE (DATA (0)), + 0, + SCR_FROM_REG (rbc2), + 0, + SCR_RETURN ^ IFFALSE (DATA (0)), + 0, + /* + * RBC[0..23]=0. + * Not only we donnot need a PM context, but this would + * lead to a bogus CHMOV(0). This condition means that + * the residual was the last byte to move from this CHMOV. + * So, we just have to move the current data script pointer + * (i.e. TEMP) to the SCRIPTS address following the + * interrupted CHMOV and jump to dispatcher. + * IA contains the data pointer to save. + */ + SCR_JUMP, + PADDR_B (pm_save_end), +}/*-------------------------< WSR_MA_HELPER >--------------------*/,{ + /* + * Helper for the C code when WSR bit is set. + * Perform the move of the residual byte. + */ + SCR_CHMOV_TBL ^ SCR_DATA_IN, + offsetof (struct sym_ccb, phys.wresid), + SCR_JUMP, + PADDR_A (dispatch), + +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN +}/*-------------------------< DATA_IO >--------------------------*/,{ + /* + * We jump here if the data direction was unknown at the + * time we had to queue the command to the scripts processor. + * Pointers had been set as follow in this situation: + * savep --> DATA_IO + * lastp --> start pointer when DATA_IN + * wlastp --> start pointer when DATA_OUT + * This script sets savep and lastp according to the + * direction chosen by the target. + */ + SCR_JUMP ^ IFTRUE (WHEN (SCR_DATA_OUT)), + PADDR_B (data_io_out), +}/*-------------------------< DATA_IO_IN >-----------------------*/,{ + /* + * Direction is DATA IN. + */ + SCR_LOAD_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.lastp), +}/*-------------------------< DATA_IO_COM >----------------------*/,{ + SCR_STORE_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.savep), + + /* + * Jump to the SCRIPTS according to actual direction. + */ + SCR_LOAD_REL (temp, 4), + offsetof (struct sym_ccb, phys.head.savep), + SCR_RETURN, + 0, +}/*-------------------------< DATA_IO_OUT >----------------------*/,{ + /* + * Direction is DATA OUT. + */ + SCR_REG_REG (HF_REG, SCR_AND, (~HF_DATA_IN)), + 0, + SCR_LOAD_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.wlastp), + SCR_STORE_REL (scratcha, 4), + offsetof (struct sym_ccb, phys.head.lastp), + SCR_JUMP, + PADDR_B(data_io_com), +#endif /* SYM_OPT_HANDLE_DIR_UNKNOWN */ + +}/*-------------------------< ZERO >-----------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< SCRATCH >--------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< PM0_DATA_ADDR >--------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< PM1_DATA_ADDR >--------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< DONE_POS >-------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< STARTPOS >-------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------< TARGTBL >--------------------------*/,{ + SCR_DATA_ZERO, +}/*-------------------------<>-----------------------------------*/ +}; + +static struct SYM_FWZ_SCR SYM_FWZ_SCR = { + /*-------------------------< SNOOPTEST >------------------------*/{ + /* + * Read the variable from memory. + */ + SCR_LOAD_REL (scratcha, 4), + offsetof(struct sym_hcb, scratch), + /* + * Write the variable to memory. + */ + SCR_STORE_REL (temp, 4), + offsetof(struct sym_hcb, scratch), + /* + * Read back the variable from memory. + */ + SCR_LOAD_REL (temp, 4), + offsetof(struct sym_hcb, scratch), +}/*-------------------------< SNOOPEND >-------------------------*/,{ + /* + * And stop. + */ + SCR_INT, + 99, +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + /* + * We may use MEMORY MOVE instructions to load the on chip-RAM, + * if it happens that mapping PCI memory is not possible. + * But writing the RAM from the CPU is the preferred method, + * since PCI 2.2 seems to disallow PCI self-mastering. + */ +}/*-------------------------< START_RAM >------------------------*/,{ + /* + * Load the script into on-chip RAM, + * and jump to start point. + */ + SCR_COPY (sizeof(struct SYM_FWA_SCR)), +}/*-------------------------< SCRIPTA0_BA >----------------------*/,{ + 0, + PADDR_A (start), + SCR_JUMP, + PADDR_A (init), +}/*-------------------------< START_RAM64 >----------------------*/,{ + /* + * Load the RAM and start for 64 bit PCI (895A,896). + * Both scripts (script and scripth) are loaded into + * the RAM which is 8K (4K for 825A/875/895). + * We also need to load some 32-63 bit segments + * address of the SCRIPTS processor. + * LOAD/STORE ABSOLUTE always refers to on-chip RAM + * in our implementation. The main memory is + * accessed using LOAD/STORE DSA RELATIVE. + */ + SCR_LOAD_REL (mmws, 4), + offsetof (struct sym_hcb, scr_ram_seg), + SCR_COPY (sizeof(struct SYM_FWA_SCR)), +}/*-------------------------< SCRIPTA0_BA64 >--------------------*/,{ + 0, + PADDR_A (start), + SCR_COPY (sizeof(struct SYM_FWB_SCR)), +}/*-------------------------< SCRIPTB0_BA64 >--------------------*/,{ + 0, + PADDR_B (start64), + SCR_LOAD_REL (mmrs, 4), + offsetof (struct sym_hcb, scr_ram_seg), + SCR_JUMP64, + PADDR_B (start64), +}/*-------------------------< RAM_SEG64 >------------------------*/,{ + 0, +#endif /* SYM_OPT_NO_BUS_MEMORY_MAPPING */ +}/*-------------------------<>-----------------------------------*/ +}; diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c new file mode 100644 index 000000000000..408db5e19ba7 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -0,0 +1,3012 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#define SYM_GLUE_C + +#include <linux/module.h> +#include "sym_glue.h" + +#define NAME53C "sym53c" +#define NAME53C8XX "sym53c8xx" + +/* + * Simple Wrapper to kernel PCI bus interface. + */ + +typedef struct pci_dev *pcidev_t; +#define PCIDEV_NULL (0) +#define PciBusNumber(d) (d)->bus->number +#define PciDeviceFn(d) (d)->devfn +#define PciVendorId(d) (d)->vendor +#define PciDeviceId(d) (d)->device +#define PciIrqLine(d) (d)->irq + +static u_long __init +pci_get_base_cookie(struct pci_dev *pdev, int index) +{ + u_long base; + +#if LINUX_VERSION_CODE > LinuxVersionCode(2,3,12) + base = pdev->resource[index].start; +#else + base = pdev->base_address[index]; +#if BITS_PER_LONG > 32 + if ((base & 0x7) == 0x4) + base |= (((u_long)pdev->base_address[++index]) << 32); +#endif +#endif + return (base & ~0x7ul); +} + +static int __init +pci_get_base_address(struct pci_dev *pdev, int index, u_long *base) +{ + u32 tmp; +#define PCI_BAR_OFFSET(index) (PCI_BASE_ADDRESS_0 + (index<<2)) + + pci_read_config_dword(pdev, PCI_BAR_OFFSET(index), &tmp); + *base = tmp; + ++index; + if ((tmp & 0x7) == 0x4) { +#if BITS_PER_LONG > 32 + pci_read_config_dword(pdev, PCI_BAR_OFFSET(index), &tmp); + *base |= (((u_long)tmp) << 32); +#endif + ++index; + } + return index; +#undef PCI_BAR_OFFSET +} + +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0) +#define pci_enable_device(pdev) (0) +#endif + +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,4) +#define scsi_set_pci_device(inst, pdev) do { ;} while (0) +#endif + +/* + * Insert a delay in micro-seconds and milli-seconds. + */ +void sym_udelay(int us) { udelay(us); } +void sym_mdelay(int ms) { mdelay(ms); } + +/* + * SMP threading. + * + * The whole SCSI sub-system under Linux is basically single-threaded. + * Everything, including low-level driver interrupt routine, happens + * whith the `io_request_lock' held. + * The sym53c8xx-1.x drivers series ran their interrupt code using a + * spin mutex per controller. This added complexity without improving + * scalability significantly. the sym-2 driver still use a spinlock + * per controller for safety, but basically runs with the damned + * io_request_lock held. + */ + +spinlock_t sym53c8xx_lock = SPIN_LOCK_UNLOCKED; + +#define SYM_LOCK_DRIVER(flags) spin_lock_irqsave(&sym53c8xx_lock, flags) +#define SYM_UNLOCK_DRIVER(flags) spin_unlock_irqrestore(&sym53c8xx_lock,flags) + +#define SYM_INIT_LOCK_HCB(np) spin_lock_init(&np->s.smp_lock); +#define SYM_LOCK_HCB(np, flags) spin_lock_irqsave(&np->s.smp_lock, flags) +#define SYM_UNLOCK_HCB(np, flags) spin_unlock_irqrestore(&np->s.smp_lock, flags) + +#define SYM_LOCK_SCSI(np, flags) \ + spin_lock_irqsave(&io_request_lock, flags) +#define SYM_UNLOCK_SCSI(np, flags) \ + spin_unlock_irqrestore(&io_request_lock, flags) + +/* Ugly, but will make things easier if this locking will ever disappear */ +#define SYM_LOCK_SCSI_NOSAVE(np) spin_lock_irq(&io_request_lock) +#define SYM_UNLOCK_SCSI_NORESTORE(np) spin_unlock_irq(&io_request_lock) + +/* + * These simple macros limit expression involving + * kernel time values (jiffies) to some that have + * chance not to be too much incorrect. :-) + */ +#define ktime_get(o) (jiffies + (u_long) o) +#define ktime_exp(b) ((long)(jiffies) - (long)(b) >= 0) +#define ktime_dif(a, b) ((long)(a) - (long)(b)) +#define ktime_add(a, o) ((a) + (u_long)(o)) +#define ktime_sub(a, o) ((a) - (u_long)(o)) + +/* + * Wrappers to the generic memory allocator. + */ +void *sym_calloc(int size, char *name) +{ + u_long flags; + void *m; + SYM_LOCK_DRIVER(flags); + m = sym_calloc_unlocked(size, name); + SYM_UNLOCK_DRIVER(flags); + return m; +} + +void sym_mfree(void *m, int size, char *name) +{ + u_long flags; + SYM_LOCK_DRIVER(flags); + sym_mfree_unlocked(m, size, name); + SYM_UNLOCK_DRIVER(flags); +} + +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING + +void *__sym_calloc_dma(m_pool_ident_t dev_dmat, int size, char *name) +{ + u_long flags; + void *m; + SYM_LOCK_DRIVER(flags); + m = __sym_calloc_dma_unlocked(dev_dmat, size, name); + SYM_UNLOCK_DRIVER(flags); + return m; +} + +void __sym_mfree_dma(m_pool_ident_t dev_dmat, void *m, int size, char *name) +{ + u_long flags; + SYM_LOCK_DRIVER(flags); + __sym_mfree_dma_unlocked(dev_dmat, m, size, name); + SYM_UNLOCK_DRIVER(flags); +} + +m_addr_t __vtobus(m_pool_ident_t dev_dmat, void *m) +{ + u_long flags; + m_addr_t b; + SYM_LOCK_DRIVER(flags); + b = __vtobus_unlocked(dev_dmat, m); + SYM_UNLOCK_DRIVER(flags); + return b; +} + +#endif /* SYM_LINUX_DYNAMIC_DMA_MAPPING */ + + +/* + * Map/unmap a PCI memory window. + */ +#ifndef SYM_OPT_NO_BUS_MEMORY_MAPPING +static u_long __init pci_map_mem(u_long base, u_long size) +{ + u_long page_base = ((u_long) base) & PAGE_MASK; + u_long page_offs = ((u_long) base) - page_base; + u_long page_remapped = (u_long) ioremap(page_base, page_offs+size); + + return page_remapped? (page_remapped + page_offs) : 0UL; +} + +static void __init pci_unmap_mem(u_long vaddr, u_long size) +{ + if (vaddr) + iounmap((void *) (vaddr & PAGE_MASK)); +} +#endif + +/* + * Used to retrieve the host structure when the + * driver is called from the proc FS. + */ +static struct Scsi_Host *first_host = NULL; + +/* + * /proc directory entry and proc_info. + */ +#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,27) +static struct proc_dir_entry proc_scsi_sym53c8xx = { + PROC_SCSI_SYM53C8XX, 9, NAME53C8XX, + S_IFDIR | S_IRUGO | S_IXUGO, 2 +}; +#endif + +/* + * Transfer direction + * + * Until some linux kernel version near 2.3.40, low-level scsi + * drivers were not told about data transfer direction. + */ +#if LINUX_VERSION_CODE > LinuxVersionCode(2, 3, 40) + +#define scsi_data_direction(cmd) (cmd->sc_data_direction) + +#else + +static __inline__ int scsi_data_direction(Scsi_Cmnd *cmd) +{ + int direction; + + switch((int) cmd->cmnd[0]) { + case 0x08: /* READ(6) 08 */ + case 0x28: /* READ(10) 28 */ + case 0xA8: /* READ(12) A8 */ + direction = SCSI_DATA_READ; + break; + case 0x0A: /* WRITE(6) 0A */ + case 0x2A: /* WRITE(10) 2A */ + case 0xAA: /* WRITE(12) AA */ + direction = SCSI_DATA_WRITE; + break; + default: + direction = SCSI_DATA_UNKNOWN; + break; + } + + return direction; +} + +#endif + +/* + * Driver host data structure. + */ +struct host_data { + hcb_p ncb; +}; + +/* + * Some type that fit DMA addresses as seen from BUS. + */ +#ifndef SYM_LINUX_DYNAMIC_DMA_MAPPING +typedef u_long bus_addr_t; +#else +#if SYM_CONF_DMA_ADDRESSING_MODE > 0 +typedef dma64_addr_t bus_addr_t; +#else +typedef dma_addr_t bus_addr_t; +#endif +#endif + +/* + * Used by the eh thread to wait for command completion. + * It is allocated on the eh thread stack. + */ +struct sym_eh_wait { + struct semaphore sem; + struct timer_list timer; + void (*old_done)(Scsi_Cmnd *); + int to_do; + int timed_out; +}; + +/* + * Driver private area in the SCSI command structure. + */ +struct sym_ucmd { /* Override the SCSI pointer structure */ + SYM_QUEHEAD link_cmdq; /* Must stay at offset ZERO */ +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING + bus_addr_t data_mapping; + u_char data_mapped; +#endif + struct sym_eh_wait *eh_wait; +}; + +typedef struct sym_ucmd *ucmd_p; + +#define SYM_UCMD_PTR(cmd) ((ucmd_p)(&(cmd)->SCp)) +#define SYM_SCMD_PTR(ucmd) sym_que_entry(ucmd, Scsi_Cmnd, SCp) +#define SYM_SOFTC_PTR(cmd) (((struct host_data *)cmd->host->hostdata)->ncb) + +/* + * Deal with DMA mapping/unmapping. + */ + +#ifndef SYM_LINUX_DYNAMIC_DMA_MAPPING + +/* Linux versions prior to pci bus iommu kernel interface */ + +#define __unmap_scsi_data(pdev, cmd) do {; } while (0) +#define __map_scsi_single_data(pdev, cmd) (__vtobus(pdev,(cmd)->request_buffer)) +#define __map_scsi_sg_data(pdev, cmd) ((cmd)->use_sg) +#define __sync_scsi_data(pdev, cmd) do {; } while (0) + +#define bus_sg_dma_address(sc) vtobus((sc)->address) +#define bus_sg_dma_len(sc) ((sc)->length) + +#else /* Linux version with pci bus iommu kernel interface */ + +#define bus_unmap_sg(pdev, sgptr, sgcnt, dir) \ + pci_unmap_sg(pdev, sgptr, sgcnt, dir) + +#define bus_unmap_single(pdev, mapping, bufptr, dir) \ + pci_unmap_single(pdev, mapping, bufptr, dir) + +#define bus_map_single(pdev, bufptr, bufsiz, dir) \ + pci_map_single(pdev, bufptr, bufsiz, dir) + +#define bus_map_sg(pdev, sgptr, sgcnt, dir) \ + pci_map_sg(pdev, sgptr, sgcnt, dir) + +#define bus_dma_sync_sg(pdev, sgptr, sgcnt, dir) \ + pci_dma_sync_sg(pdev, sgptr, sgcnt, dir) + +#define bus_dma_sync_single(pdev, mapping, bufsiz, dir) \ + pci_dma_sync_single(pdev, mapping, bufsiz, dir) + +#define bus_sg_dma_address(sc) sg_dma_address(sc) +#define bus_sg_dma_len(sc) sg_dma_len(sc) + +static void __unmap_scsi_data(pcidev_t pdev, Scsi_Cmnd *cmd) +{ + int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); + + switch(SYM_UCMD_PTR(cmd)->data_mapped) { + case 2: + bus_unmap_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); + break; + case 1: + bus_unmap_single(pdev, SYM_UCMD_PTR(cmd)->data_mapping, + cmd->request_bufflen, dma_dir); + break; + } + SYM_UCMD_PTR(cmd)->data_mapped = 0; +} + +static bus_addr_t __map_scsi_single_data(pcidev_t pdev, Scsi_Cmnd *cmd) +{ + bus_addr_t mapping; + int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); + + mapping = bus_map_single(pdev, cmd->request_buffer, + cmd->request_bufflen, dma_dir); + if (mapping) { + SYM_UCMD_PTR(cmd)->data_mapped = 1; + SYM_UCMD_PTR(cmd)->data_mapping = mapping; + } + + return mapping; +} + +static int __map_scsi_sg_data(pcidev_t pdev, Scsi_Cmnd *cmd) +{ + int use_sg; + int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); + + use_sg = bus_map_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); + if (use_sg > 0) { + SYM_UCMD_PTR(cmd)->data_mapped = 2; + SYM_UCMD_PTR(cmd)->data_mapping = use_sg; + } + + return use_sg; +} + +static void __sync_scsi_data(pcidev_t pdev, Scsi_Cmnd *cmd) +{ + int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); + + switch(SYM_UCMD_PTR(cmd)->data_mapped) { + case 2: + bus_dma_sync_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); + break; + case 1: + bus_dma_sync_single(pdev, SYM_UCMD_PTR(cmd)->data_mapping, + cmd->request_bufflen, dma_dir); + break; + } +} + +#endif /* SYM_LINUX_DYNAMIC_DMA_MAPPING */ + +#define unmap_scsi_data(np, cmd) \ + __unmap_scsi_data(np->s.device, cmd) +#define map_scsi_single_data(np, cmd) \ + __map_scsi_single_data(np->s.device, cmd) +#define map_scsi_sg_data(np, cmd) \ + __map_scsi_sg_data(np->s.device, cmd) +#define sync_scsi_data(np, cmd) \ + __sync_scsi_data(np->s.device, cmd) + +/* + * Complete a pending CAM CCB. + */ +void sym_xpt_done(hcb_p np, Scsi_Cmnd *ccb) +{ + sym_remque(&SYM_UCMD_PTR(ccb)->link_cmdq); + unmap_scsi_data(np, ccb); + ccb->scsi_done(ccb); +} + +void sym_xpt_done2(hcb_p np, Scsi_Cmnd *ccb, int cam_status) +{ + sym_set_cam_status(ccb, cam_status); + sym_xpt_done(np, ccb); +} + + +/* + * Print something that identifies the IO. + */ +void sym_print_addr (ccb_p cp) +{ + Scsi_Cmnd *cmd = cp->cam_ccb; + if (cmd) + printf("%s:%d:%d:", sym_name(SYM_SOFTC_PTR(cmd)), + cmd->target,cmd->lun); +} + +/* + * Tell the SCSI layer about a BUS RESET. + */ +void sym_xpt_async_bus_reset(hcb_p np) +{ + printf_notice("%s: SCSI BUS has been reset.\n", sym_name(np)); + np->s.settle_time = ktime_get(sym_driver_setup.settle_delay * HZ); + np->s.settle_time_valid = 1; + if (sym_verbose >= 2) + printf_info("%s: command processing suspended for %d seconds\n", + sym_name(np), sym_driver_setup.settle_delay); +} + +/* + * Tell the SCSI layer about a BUS DEVICE RESET message sent. + */ +void sym_xpt_async_sent_bdr(hcb_p np, int target) +{ + printf_notice("%s: TARGET %d has been reset.\n", sym_name(np), target); +} + +/* + * Tell the SCSI layer about the new transfer parameters. + */ +void sym_xpt_async_nego_wide(hcb_p np, int target) +{ + if (sym_verbose < 3) + return; + sym_announce_transfer_rate(np, target); +} + +/* + * Choose the more appropriate CAM status if + * the IO encountered an extended error. + */ +static int sym_xerr_cam_status(int cam_status, int x_status) +{ + if (x_status) { + if (x_status & XE_PARITY_ERR) + cam_status = DID_PARITY; + else if (x_status &(XE_EXTRA_DATA|XE_SODL_UNRUN|XE_SWIDE_OVRUN)) + cam_status = DID_ERROR; + else if (x_status & XE_BAD_PHASE) + cam_status = DID_ERROR; + else + cam_status = DID_ERROR; + } + return cam_status; +} + +/* + * Build CAM result for a failed or auto-sensed IO. + */ +void sym_set_cam_result_error(hcb_p np, ccb_p cp, int resid) +{ + Scsi_Cmnd *csio = cp->cam_ccb; + u_int cam_status, scsi_status, drv_status; + + drv_status = 0; + cam_status = DID_OK; + scsi_status = cp->ssss_status; + + if (cp->host_flags & HF_SENSE) { + scsi_status = cp->sv_scsi_status; + resid = cp->sv_resid; + if (sym_verbose && cp->sv_xerr_status) + sym_print_xerr(cp, cp->sv_xerr_status); + if (cp->host_status == HS_COMPLETE && + cp->ssss_status == S_GOOD && + cp->xerr_status == 0) { + cam_status = sym_xerr_cam_status(DID_OK, + cp->sv_xerr_status); + drv_status = DRIVER_SENSE; + /* + * Bounce back the sense data to user. + */ + bzero(&csio->sense_buffer, sizeof(csio->sense_buffer)); + bcopy(cp->sns_bbuf, csio->sense_buffer, + MIN(sizeof(csio->sense_buffer),SYM_SNS_BBUF_LEN)); +#if 0 + /* + * If the device reports a UNIT ATTENTION condition + * due to a RESET condition, we should consider all + * disconnect CCBs for this unit as aborted. + */ + if (1) { + u_char *p; + p = (u_char *) csio->sense_data; + if (p[0]==0x70 && p[2]==0x6 && p[12]==0x29) + sym_clear_tasks(np, DID_ABORT, + cp->target,cp->lun, -1); + } +#endif + } + else + cam_status = DID_ERROR; + } + else if (cp->host_status == HS_COMPLETE) /* Bad SCSI status */ + cam_status = DID_OK; + else if (cp->host_status == HS_SEL_TIMEOUT) /* Selection timeout */ + cam_status = DID_NO_CONNECT; + else if (cp->host_status == HS_UNEXPECTED) /* Unexpected BUS FREE*/ + cam_status = DID_ERROR; + else { /* Extended error */ + if (sym_verbose) { + PRINT_ADDR(cp); + printf ("COMMAND FAILED (%x %x %x).\n", + cp->host_status, cp->ssss_status, + cp->xerr_status); + } + /* + * Set the most appropriate value for CAM status. + */ + cam_status = sym_xerr_cam_status(DID_ERROR, cp->xerr_status); + } +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,99) + csio->resid = resid; +#endif + csio->result = (drv_status << 24) + (cam_status << 16) + scsi_status; +} + + +/* + * Called on successfull INQUIRY response. + */ +void sym_sniff_inquiry(hcb_p np, Scsi_Cmnd *cmd, int resid) +{ + int retv; + + if (!cmd || cmd->use_sg) + return; + + sync_scsi_data(np, cmd); + retv = __sym_sniff_inquiry(np, cmd->target, cmd->lun, + (u_char *) cmd->request_buffer, + cmd->request_bufflen - resid); + if (retv < 0) + return; + else if (retv) + sym_update_trans_settings(np, &np->target[cmd->target]); +} + +/* + * Build the scatter/gather array for an I/O. + */ + +static int sym_scatter_no_sglist(hcb_p np, ccb_p cp, Scsi_Cmnd *cmd) +{ + struct sym_tblmove *data = &cp->phys.data[SYM_CONF_MAX_SG-1]; + int segment; + + cp->data_len = cmd->request_bufflen; + + if (cmd->request_bufflen) { + bus_addr_t baddr = map_scsi_single_data(np, cmd); + if (baddr) { + sym_build_sge(np, data, baddr, cmd->request_bufflen); + segment = 1; + } + else + segment = -2; + } + else + segment = 0; + + return segment; +} + +static int sym_scatter(hcb_p np, ccb_p cp, Scsi_Cmnd *cmd) +{ + int segment; + int use_sg = (int) cmd->use_sg; + + cp->data_len = 0; + + if (!use_sg) + segment = sym_scatter_no_sglist(np, cp, cmd); + else if (use_sg > SYM_CONF_MAX_SG) + segment = -1; + else if ((use_sg = map_scsi_sg_data(np, cmd)) > 0) { + struct scatterlist *scatter = (struct scatterlist *)cmd->buffer; + struct sym_tblmove *data; + + data = &cp->phys.data[SYM_CONF_MAX_SG - use_sg]; + + for (segment = 0; segment < use_sg; segment++) { + bus_addr_t baddr = bus_sg_dma_address(&scatter[segment]); + unsigned int len = bus_sg_dma_len(&scatter[segment]); + + sym_build_sge(np, &data[segment], baddr, len); + cp->data_len += len; + } + } + else + segment = -2; + + return segment; +} + +/* + * Queue a SCSI command. + */ +static int sym_queue_command(hcb_p np, Scsi_Cmnd *ccb) +{ +/* Scsi_Device *device = ccb->device; */ + tcb_p tp; + lcb_p lp; + ccb_p cp; + int order; + + /* + * Minimal checkings, so that we will not + * go outside our tables. + */ + if (ccb->target == np->myaddr || + ccb->target >= SYM_CONF_MAX_TARGET || + ccb->lun >= SYM_CONF_MAX_LUN) { + sym_xpt_done2(np, ccb, CAM_DEV_NOT_THERE); + return 0; + } + + /* + * Retreive the target descriptor. + */ + tp = &np->target[ccb->target]; + + /* + * Complete the 1st INQUIRY command with error + * condition if the device is flagged NOSCAN + * at BOOT in the NVRAM. This may speed up + * the boot and maintain coherency with BIOS + * device numbering. Clearing the flag allows + * user to rescan skipped devices later. + * We also return error for devices not flagged + * for SCAN LUNS in the NVRAM since some mono-lun + * devices behave badly when asked for some non + * zero LUN. Btw, this is an absolute hack.:-) + */ + if (ccb->cmnd[0] == 0x12 || ccb->cmnd[0] == 0x0) { + if ((tp->usrflags & SYM_SCAN_BOOT_DISABLED) || + ((tp->usrflags & SYM_SCAN_LUNS_DISABLED) && + ccb->lun != 0)) { + tp->usrflags &= ~SYM_SCAN_BOOT_DISABLED; + sym_xpt_done2(np, ccb, CAM_DEV_NOT_THERE); + return 0; + } + } + + /* + * Select tagged/untagged. + */ + lp = sym_lp(np, tp, ccb->lun); + order = (lp && lp->s.reqtags) ? M_SIMPLE_TAG : 0; + + /* + * Queue the SCSI IO. + */ + cp = sym_get_ccb(np, ccb->target, ccb->lun, order); + if (!cp) + return 1; /* Means resource shortage */ + (void) sym_queue_scsiio(np, ccb, cp); + return 0; +} + +/* + * Setup buffers and pointers that address the CDB. + */ +static int __inline sym_setup_cdb(hcb_p np, Scsi_Cmnd *ccb, ccb_p cp) +{ + u32 cmd_ba; + int cmd_len; + + /* + * CDB is 16 bytes max. + */ + if (ccb->cmd_len > sizeof(cp->cdb_buf)) { + sym_set_cam_status(cp->cam_ccb, CAM_REQ_INVALID); + return -1; + } + + bcopy(ccb->cmnd, cp->cdb_buf, ccb->cmd_len); + cmd_ba = CCB_BA (cp, cdb_buf[0]); + cmd_len = ccb->cmd_len; + + cp->phys.cmd.addr = cpu_to_scr(cmd_ba); + cp->phys.cmd.size = cpu_to_scr(cmd_len); + + return 0; +} + +/* + * Setup pointers that address the data and start the I/O. + */ +int sym_setup_data_and_start(hcb_p np, Scsi_Cmnd *csio, ccb_p cp) +{ + int dir; + tcb_p tp = &np->target[cp->target]; + lcb_p lp = sym_lp(np, tp, cp->lun); + + /* + * Build the CDB. + */ + if (sym_setup_cdb(np, csio, cp)) + goto out_abort; + + /* + * No direction means no data. + */ + dir = scsi_data_direction(csio); + if (dir != SCSI_DATA_NONE) { + cp->segments = sym_scatter (np, cp, csio); + if (cp->segments < 0) { + if (cp->segments == -2) + sym_set_cam_status(csio, CAM_RESRC_UNAVAIL); + else + sym_set_cam_status(csio, CAM_REQ_TOO_BIG); + goto out_abort; + } + } + else { + cp->data_len = 0; + cp->segments = 0; + } + + /* + * Set data pointers. + */ + sym_setup_data_pointers(np, cp, dir); + + /* + * When `#ifed 1', the code below makes the driver + * panic on the first attempt to write to a SCSI device. + * It is the first test we want to do after a driver + * change that does not seem obviously safe. :) + */ +#if 0 + switch (cp->cdb_buf[0]) { + case 0x0A: case 0x2A: case 0xAA: + panic("XXXXXXXXXXXXX WRITE NOT YET ALLOWED XXXXXXXXXXXXXX\n"); + MDELAY(10000); + break; + default: + break; + } +#endif + + /* + * activate this job. + */ + if (lp) + sym_start_next_ccbs(np, lp, 2); + else + sym_put_start_queue(np, cp); + return 0; + +out_abort: + sym_free_ccb(np, cp); + sym_xpt_done(np, csio); + return 0; +} + + +/* + * timer daemon. + * + * Misused to keep the driver running when + * interrupts are not configured correctly. + */ +static void sym_timer (hcb_p np) +{ + u_long thistime = ktime_get(0); + +#if LINUX_VERSION_CODE < LinuxVersionCode(2, 4, 0) + /* + * If release process in progress, let's go + * Set the release stage from 1 to 2 to synchronize + * with the release process. + */ + + if (np->s.release_stage) { + if (np->s.release_stage == 1) + np->s.release_stage = 2; + return; + } +#endif + + /* + * Restart the timer. + */ +#ifdef SYM_CONF_PCIQ_BROKEN_INTR + np->s.timer.expires = ktime_get((HZ+99)/100); +#else + np->s.timer.expires = ktime_get(SYM_CONF_TIMER_INTERVAL); +#endif + add_timer(&np->s.timer); + + /* + * If we are resetting the ncr, wait for settle_time before + * clearing it. Then command processing will be resumed. + */ + if (np->s.settle_time_valid) { + if (ktime_dif(np->s.settle_time, thistime) <= 0){ + if (sym_verbose >= 2 ) + printk("%s: command processing resumed\n", + sym_name(np)); + np->s.settle_time_valid = 0; + } + return; + } + + /* + * Nothing to do for now, but that may come. + */ + if (np->s.lasttime + 4*HZ < thistime) { + np->s.lasttime = thistime; + } + +#ifdef SYM_CONF_PCIQ_MAY_MISS_COMPLETIONS + /* + * Some way-broken PCI bridges may lead to + * completions being lost when the clearing + * of the INTFLY flag by the CPU occurs + * concurrently with the chip raising this flag. + * If this ever happen, lost completions will + * be reaped here. + */ + sym_wakeup_done(np); +#endif + +#ifdef SYM_CONF_PCIQ_BROKEN_INTR + if (INB(nc_istat) & (INTF|SIP|DIP)) { + + /* + ** Process pending interrupts. + */ + if (DEBUG_FLAGS & DEBUG_TINY) printk ("{"); + sym_interrupt(np); + if (DEBUG_FLAGS & DEBUG_TINY) printk ("}"); + } +#endif /* SYM_CONF_PCIQ_BROKEN_INTR */ +} + + +/* + * PCI BUS error handler. + */ +void sym_log_bus_error(hcb_p np) +{ + u_short pci_sts; + pci_read_config_word(np->s.device, PCI_STATUS, &pci_sts); + if (pci_sts & 0xf900) { + pci_write_config_word(np->s.device, PCI_STATUS, + pci_sts); + printf("%s: PCI STATUS = 0x%04x\n", + sym_name(np), pci_sts & 0xf900); + } +} + + +/* + * Requeue awaiting commands. + */ +static void sym_requeue_awaiting_cmds(hcb_p np) +{ + Scsi_Cmnd *cmd; + ucmd_p ucp = SYM_UCMD_PTR(cmd); + SYM_QUEHEAD tmp_cmdq; + int sts; + + sym_que_move(&np->s.wait_cmdq, &tmp_cmdq); + + while ((ucp = (ucmd_p) sym_remque_head(&tmp_cmdq)) != 0) { + sym_insque_tail(&ucp->link_cmdq, &np->s.busy_cmdq); + cmd = SYM_SCMD_PTR(ucp); + sts = sym_queue_command(np, cmd); + if (sts) { + sym_remque(&ucp->link_cmdq); + sym_insque_head(&ucp->link_cmdq, &np->s.wait_cmdq); + } + } +} + +/* + * Linux entry point of the queuecommand() function + */ +int sym53c8xx_queue_command (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *)) +{ + hcb_p np = SYM_SOFTC_PTR(cmd); + ucmd_p ucp = SYM_UCMD_PTR(cmd); + u_long flags; + int sts = 0; + + cmd->scsi_done = done; + cmd->host_scribble = NULL; + memset(ucp, 0, sizeof(*ucp)); + + SYM_LOCK_HCB(np, flags); + + /* + * Shorten our settle_time if needed for + * this command not to time out. + */ + if (np->s.settle_time_valid && cmd->timeout_per_command) { + u_long tlimit = ktime_get(cmd->timeout_per_command); + tlimit = ktime_sub(tlimit, SYM_CONF_TIMER_INTERVAL*2); + if (ktime_dif(np->s.settle_time, tlimit) > 0) { + np->s.settle_time = tlimit; + } + } + + if (np->s.settle_time_valid || !sym_que_empty(&np->s.wait_cmdq)) { + sym_insque_tail(&ucp->link_cmdq, &np->s.wait_cmdq); + goto out; + } + + sym_insque_tail(&ucp->link_cmdq, &np->s.busy_cmdq); + sts = sym_queue_command(np, cmd); + if (sts) { + sym_remque(&ucp->link_cmdq); + sym_insque_tail(&ucp->link_cmdq, &np->s.wait_cmdq); + } +out: + SYM_UNLOCK_HCB(np, flags); + + return 0; +} + +/* + * Linux entry point of the interrupt handler. + */ +static void sym53c8xx_intr(int irq, void *dev_id, struct pt_regs * regs) +{ + unsigned long flags; + unsigned long flags1; + hcb_p np = (hcb_p) dev_id; + + if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); + + SYM_LOCK_SCSI(np, flags1); + SYM_LOCK_HCB(np, flags); + + sym_interrupt(np); + + if (!sym_que_empty(&np->s.wait_cmdq) && !np->s.settle_time_valid) + sym_requeue_awaiting_cmds(np); + + SYM_UNLOCK_HCB(np, flags); + SYM_UNLOCK_SCSI(np, flags1); + + if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("]\n"); +} + +/* + * Linux entry point of the timer handler + */ +static void sym53c8xx_timer(unsigned long npref) +{ + hcb_p np = (hcb_p) npref; + unsigned long flags; + unsigned long flags1; + + SYM_LOCK_SCSI(np, flags1); + SYM_LOCK_HCB(np, flags); + + sym_timer(np); + + if (!sym_que_empty(&np->s.wait_cmdq) && !np->s.settle_time_valid) + sym_requeue_awaiting_cmds(np); + + SYM_UNLOCK_HCB(np, flags); + SYM_UNLOCK_SCSI(np, flags1); +} + + +/* + * What the eh thread wants us to perform. + */ +#define SYM_EH_ABORT 0 +#define SYM_EH_DEVICE_RESET 1 +#define SYM_EH_BUS_RESET 2 +#define SYM_EH_HOST_RESET 3 + +/* + * What we will do regarding the involved SCSI command. + */ +#define SYM_EH_DO_IGNORE 0 +#define SYM_EH_DO_COMPLETE 1 +#define SYM_EH_DO_WAIT 2 + +/* + * Our general completion handler. + */ +static void __sym_eh_done(Scsi_Cmnd *cmd, int timed_out) +{ + struct sym_eh_wait *ep = SYM_UCMD_PTR(cmd)->eh_wait; + if (!ep) + return; + + /* Try to avoid a race here (not 100% safe) */ + if (!timed_out) { + ep->timed_out = 0; + if (ep->to_do == SYM_EH_DO_WAIT && !del_timer(&ep->timer)) + return; + } + + /* Revert everything */ + SYM_UCMD_PTR(cmd)->eh_wait = 0; + cmd->scsi_done = ep->old_done; + + /* Wake up the eh thread if it wants to sleep */ + if (ep->to_do == SYM_EH_DO_WAIT) + up(&ep->sem); +} + +/* + * scsi_done() alias when error recovery is in progress. + */ +static void sym_eh_done(Scsi_Cmnd *cmd) { __sym_eh_done(cmd, 0); } + +/* + * Some timeout handler to avoid waiting too long. + */ +static void sym_eh_timeout(u_long p) { __sym_eh_done((Scsi_Cmnd *)p, 1); } + +/* + * Generic method for our eh processing. + * The 'op' argument tells what we have to do. + */ +static int sym_eh_handler(int op, char *opname, Scsi_Cmnd *cmd) +{ + hcb_p np = SYM_SOFTC_PTR(cmd); + unsigned long flags; + SYM_QUEHEAD *qp; + int to_do = SYM_EH_DO_IGNORE; + int sts = -1; + struct sym_eh_wait eh, *ep = &eh; + char devname[20]; + + sprintf(devname, "%s:%d:%d", sym_name(np), cmd->target, cmd->lun); + + printf_warning("%s: %s operation started.\n", devname, opname); + + SYM_LOCK_HCB(np, flags); + +#if 0 + /* This one should be the result of some race, thus to ignore */ + if (cmd->serial_number != cmd->serial_number_at_timeout) + goto prepare; +#endif + + /* This one is not queued to the core driver -> to complete here */ + FOR_EACH_QUEUED_ELEMENT(&np->s.wait_cmdq, qp) { + if (SYM_SCMD_PTR(qp) == cmd) { + to_do = SYM_EH_DO_COMPLETE; + goto prepare; + } + } + + /* This one is queued in some place -> to wait for completion */ + FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { + ccb_p cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + if (cp->cam_ccb == cmd) { + to_do = SYM_EH_DO_WAIT; + goto prepare; + } + } + +prepare: + /* Prepare stuff to either ignore, complete or wait for completion */ + switch(to_do) { + default: + case SYM_EH_DO_IGNORE: + goto finish; + break; + case SYM_EH_DO_WAIT: +#if LINUX_VERSION_CODE > LinuxVersionCode(2,3,0) + init_MUTEX_LOCKED(&ep->sem); +#else + ep->sem = MUTEX_LOCKED; +#endif + /* fall through */ + case SYM_EH_DO_COMPLETE: + ep->old_done = cmd->scsi_done; + cmd->scsi_done = sym_eh_done; + SYM_UCMD_PTR(cmd)->eh_wait = ep; + } + + /* Try to proceed the operation we have been asked for */ + sts = -1; + switch(op) { + case SYM_EH_ABORT: + sts = sym_abort_scsiio(np, cmd, 1); + break; + case SYM_EH_DEVICE_RESET: + sts = sym_reset_scsi_target(np, cmd->target); + break; + case SYM_EH_BUS_RESET: + sym_reset_scsi_bus(np, 1); + sts = 0; + break; + case SYM_EH_HOST_RESET: + sym_reset_scsi_bus(np, 0); + sym_start_up (np, 1); + sts = 0; + break; + default: + break; + } + + /* On error, restore everything and cross fingers :) */ + if (sts) { + SYM_UCMD_PTR(cmd)->eh_wait = 0; + cmd->scsi_done = ep->old_done; + to_do = SYM_EH_DO_IGNORE; + } + +finish: + ep->to_do = to_do; + /* Complete the command with locks held as required by the driver */ + if (to_do == SYM_EH_DO_COMPLETE) + sym_xpt_done2(np, cmd, CAM_REQ_ABORTED); + + SYM_UNLOCK_HCB(np, flags); + + /* Wait for completion with locks released, as required by kernel */ + if (to_do == SYM_EH_DO_WAIT) { + init_timer(&ep->timer); + ep->timer.expires = jiffies + (5*HZ); + ep->timer.function = sym_eh_timeout; + ep->timer.data = (u_long)cmd; + ep->timed_out = 1; /* Be pessimistic for once :) */ + add_timer(&ep->timer); + SYM_UNLOCK_SCSI_NORESTORE(np); + down(&ep->sem); + SYM_LOCK_SCSI_NOSAVE(np); + if (ep->timed_out) + sts = -2; + } + printf_warning("%s: %s operation %s.\n", devname, opname, + sts==0?"complete":sts==-2?"timed-out":"failed"); + return sts? SCSI_FAILED : SCSI_SUCCESS; +} + + +/* + * Error handlers called from the eh thread (one thread per HBA). + */ +int sym53c8xx_eh_abort_handler(Scsi_Cmnd *cmd) +{ + return sym_eh_handler(SYM_EH_ABORT, "ABORT", cmd); +} + +int sym53c8xx_eh_device_reset_handler(Scsi_Cmnd *cmd) +{ + return sym_eh_handler(SYM_EH_DEVICE_RESET, "DEVICE RESET", cmd); +} + +int sym53c8xx_eh_bus_reset_handler(Scsi_Cmnd *cmd) +{ + return sym_eh_handler(SYM_EH_BUS_RESET, "BUS RESET", cmd); +} + +int sym53c8xx_eh_host_reset_handler(Scsi_Cmnd *cmd) +{ + return sym_eh_handler(SYM_EH_HOST_RESET, "HOST RESET", cmd); +} + +/* + * Tune device queuing depth, according to various limits. + */ +static void +sym_tune_dev_queuing(hcb_p np, int target, int lun, u_short reqtags) +{ + tcb_p tp = &np->target[target]; + lcb_p lp = sym_lp(np, tp, lun); + u_short oldtags; + + if (!lp) + return; + + oldtags = lp->s.reqtags; + + if (reqtags > lp->s.scdev_depth) + reqtags = lp->s.scdev_depth; + + lp->started_limit = reqtags ? reqtags : 2; + lp->started_max = 1; + lp->s.reqtags = reqtags; + + if (reqtags != oldtags) { + printf_info("%s:%d:%d: " + "tagged command queuing %s, command queue depth %d.\n", + sym_name(np), target, lun, + lp->s.reqtags ? "enabled" : "disabled", + lp->started_limit); + } +} + +#ifdef SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT +/* + * Linux select queue depths function + */ +#define DEF_DEPTH (sym_driver_setup.max_tag) +#define ALL_TARGETS -2 +#define NO_TARGET -1 +#define ALL_LUNS -2 +#define NO_LUN -1 + +static int device_queue_depth(hcb_p np, int target, int lun) +{ + int c, h, t, u, v; + char *p = sym_driver_setup.tag_ctrl; + char *ep; + + h = -1; + t = NO_TARGET; + u = NO_LUN; + while ((c = *p++) != 0) { + v = simple_strtoul(p, &ep, 0); + switch(c) { + case '/': + ++h; + t = ALL_TARGETS; + u = ALL_LUNS; + break; + case 't': + if (t != target) + t = (target == v) ? v : NO_TARGET; + u = ALL_LUNS; + break; + case 'u': + if (u != lun) + u = (lun == v) ? v : NO_LUN; + break; + case 'q': + if (h == np->s.unit && + (t == ALL_TARGETS || t == target) && + (u == ALL_LUNS || u == lun)) + return v; + break; + case '-': + t = ALL_TARGETS; + u = ALL_LUNS; + break; + default: + break; + } + p = ep; + } + return DEF_DEPTH; +} +#else +#define device_queue_depth(np, t, l) (sym_driver_setup.max_tag) +#endif /* SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT */ + +/* + * Linux entry point for device queue sizing. + */ +static void +sym53c8xx_select_queue_depths(struct Scsi_Host *host, + struct scsi_device *devlist) +{ + struct scsi_device *device; + + for (device = devlist; device; device = device->next) { + hcb_p np; + tcb_p tp; + lcb_p lp; + int reqtags; + + if (device->host != host) + continue; + + np = ((struct host_data *) host->hostdata)->ncb; + tp = &np->target[device->id]; + + /* + * Get user settings for transfer parameters. + */ + tp->inq_byte7_valid = (INQ7_SYNC|INQ7_WIDE16); + sym_update_trans_settings(np, tp); + + /* + * Allocate the LCB if not yet. + * If it fail, we may well be in the sh*t. :) + */ + lp = sym_alloc_lcb(np, device->id, device->lun); + if (!lp) { + device->queue_depth = 1; + continue; + } + + /* + * Get user flags. + */ + lp->curr_flags = lp->user_flags; + + /* + * Select queue depth from driver setup. + * Donnot use more than configured by user. + * Use at least 2. + * Donnot use more than our maximum. + */ + reqtags = device_queue_depth(np, device->id, device->lun); + if (reqtags > tp->usrtags) + reqtags = tp->usrtags; + if (!device->tagged_supported) + reqtags = 0; +#if 1 /* Avoid to locally queue commands for no good reasons */ + if (reqtags > SYM_CONF_MAX_TAG) + reqtags = SYM_CONF_MAX_TAG; + device->queue_depth = reqtags ? reqtags : 2; +#else + device->queue_depth = reqtags ? SYM_CONF_MAX_TAG : 2; +#endif + lp->s.scdev_depth = device->queue_depth; + sym_tune_dev_queuing(np, device->id, device->lun, reqtags); + } +} + +/* + * Linux entry point for info() function + */ +const char *sym53c8xx_info (struct Scsi_Host *host) +{ + return sym_driver_name(); +} + + +#ifdef SYM_LINUX_PROC_INFO_SUPPORT +/* + * Proc file system stuff + * + * A read operation returns adapter information. + * A write operation is a control command. + * The string is parsed in the driver code and the command is passed + * to the sym_usercmd() function. + */ + +#ifdef SYM_LINUX_USER_COMMAND_SUPPORT + +struct sym_usrcmd { + u_long target; + u_long lun; + u_long data; + u_long cmd; +}; + +#define UC_SETSYNC 10 +#define UC_SETTAGS 11 +#define UC_SETDEBUG 12 +#define UC_SETWIDE 14 +#define UC_SETFLAG 15 +#define UC_SETVERBOSE 17 +#define UC_RESETDEV 18 +#define UC_CLEARDEV 19 + +static void sym_exec_user_command (hcb_p np, struct sym_usrcmd *uc) +{ + tcb_p tp; + int t, l; + + switch (uc->cmd) { + case 0: return; + +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT + case UC_SETDEBUG: + sym_debug_flags = uc->data; + break; +#endif + case UC_SETVERBOSE: + np->verbose = uc->data; + break; + default: + /* + * We assume that other commands apply to targets. + * This should always be the case and avoid the below + * 4 lines to be repeated 6 times. + */ + for (t = 0; t < SYM_CONF_MAX_TARGET; t++) { + if (!((uc->target >> t) & 1)) + continue; + tp = &np->target[t]; + + switch (uc->cmd) { + + case UC_SETSYNC: + if (!uc->data || uc->data >= 255) { + tp->tinfo.goal.options = 0; + tp->tinfo.goal.offset = 0; + break; + } + if (uc->data <= 9 && np->minsync_dt) { + if (uc->data < np->minsync_dt) + uc->data = np->minsync_dt; + tp->tinfo.goal.options = PPR_OPT_DT; + tp->tinfo.goal.width = 1; + tp->tinfo.goal.period = uc->data; + tp->tinfo.goal.offset = np->maxoffs_dt; + } + else { + if (uc->data < np->minsync) + uc->data = np->minsync; + tp->tinfo.goal.options = 0; + tp->tinfo.goal.period = uc->data; + tp->tinfo.goal.offset = np->maxoffs; + } + break; + case UC_SETWIDE: + tp->tinfo.goal.width = uc->data ? 1 : 0; + break; + case UC_SETTAGS: + for (l = 0; l < SYM_CONF_MAX_LUN; l++) + sym_tune_dev_queuing(np, t,l, uc->data); + break; + case UC_RESETDEV: + tp->to_reset = 1; + np->istat_sem = SEM; + OUTB (nc_istat, SIGP|SEM); + break; + case UC_CLEARDEV: + for (l = 0; l < SYM_CONF_MAX_LUN; l++) { + lcb_p lp = sym_lp(np, tp, l); + if (lp) lp->to_clear = 1; + } + np->istat_sem = SEM; + OUTB (nc_istat, SIGP|SEM); + break; + case UC_SETFLAG: + tp->usrflags = uc->data; + break; + } + } + break; + } +} + +#define is_digit(c) ((c) >= '0' && (c) <= '9') +#define digit_to_bin(c) ((c) - '0') +#define is_space(c) ((c) == ' ' || (c) == '\t') + +static int skip_spaces(char *ptr, int len) +{ + int cnt, c; + + for (cnt = len; cnt > 0 && (c = *ptr++) && is_space(c); cnt--); + + return (len - cnt); +} + +static int get_int_arg(char *ptr, int len, u_long *pv) +{ + int cnt, c; + u_long v; + + for (v = 0, cnt = len; cnt > 0 && (c = *ptr++) && is_digit(c); cnt--) { + v = (v * 10) + digit_to_bin(c); + } + + if (pv) + *pv = v; + + return (len - cnt); +} + +static int is_keyword(char *ptr, int len, char *verb) +{ + int verb_len = strlen(verb); + + if (len >= strlen(verb) && !memcmp(verb, ptr, verb_len)) + return verb_len; + else + return 0; + +} + +#define SKIP_SPACES(min_spaces) \ + if ((arg_len = skip_spaces(ptr, len)) < (min_spaces)) \ + return -EINVAL; \ + ptr += arg_len; len -= arg_len; + +#define GET_INT_ARG(v) \ + if (!(arg_len = get_int_arg(ptr, len, &(v)))) \ + return -EINVAL; \ + ptr += arg_len; len -= arg_len; + + +/* + * Parse a control command + */ + +static int sym_user_command(hcb_p np, char *buffer, int length) +{ + char *ptr = buffer; + int len = length; + struct sym_usrcmd cmd, *uc = &cmd; + int arg_len; + u_long target; + + bzero(uc, sizeof(*uc)); + + if (len > 0 && ptr[len-1] == '\n') + --len; + + if ((arg_len = is_keyword(ptr, len, "setsync")) != 0) + uc->cmd = UC_SETSYNC; + else if ((arg_len = is_keyword(ptr, len, "settags")) != 0) + uc->cmd = UC_SETTAGS; + else if ((arg_len = is_keyword(ptr, len, "setverbose")) != 0) + uc->cmd = UC_SETVERBOSE; + else if ((arg_len = is_keyword(ptr, len, "setwide")) != 0) + uc->cmd = UC_SETWIDE; +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT + else if ((arg_len = is_keyword(ptr, len, "setdebug")) != 0) + uc->cmd = UC_SETDEBUG; +#endif + else if ((arg_len = is_keyword(ptr, len, "setflag")) != 0) + uc->cmd = UC_SETFLAG; + else if ((arg_len = is_keyword(ptr, len, "resetdev")) != 0) + uc->cmd = UC_RESETDEV; + else if ((arg_len = is_keyword(ptr, len, "cleardev")) != 0) + uc->cmd = UC_CLEARDEV; + else + arg_len = 0; + +#ifdef DEBUG_PROC_INFO +printk("sym_user_command: arg_len=%d, cmd=%ld\n", arg_len, uc->cmd); +#endif + + if (!arg_len) + return -EINVAL; + ptr += arg_len; len -= arg_len; + + switch(uc->cmd) { + case UC_SETSYNC: + case UC_SETTAGS: + case UC_SETWIDE: + case UC_SETFLAG: + case UC_RESETDEV: + case UC_CLEARDEV: + SKIP_SPACES(1); + if ((arg_len = is_keyword(ptr, len, "all")) != 0) { + ptr += arg_len; len -= arg_len; + uc->target = ~0; + } else { + GET_INT_ARG(target); + uc->target = (1<<target); +#ifdef DEBUG_PROC_INFO +printk("sym_user_command: target=%ld\n", target); +#endif + } + break; + } + + switch(uc->cmd) { + case UC_SETVERBOSE: + case UC_SETSYNC: + case UC_SETTAGS: + case UC_SETWIDE: + SKIP_SPACES(1); + GET_INT_ARG(uc->data); +#ifdef DEBUG_PROC_INFO +printk("sym_user_command: data=%ld\n", uc->data); +#endif + break; +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT + case UC_SETDEBUG: + while (len > 0) { + SKIP_SPACES(1); + if ((arg_len = is_keyword(ptr, len, "alloc"))) + uc->data |= DEBUG_ALLOC; + else if ((arg_len = is_keyword(ptr, len, "phase"))) + uc->data |= DEBUG_PHASE; + else if ((arg_len = is_keyword(ptr, len, "queue"))) + uc->data |= DEBUG_QUEUE; + else if ((arg_len = is_keyword(ptr, len, "result"))) + uc->data |= DEBUG_RESULT; + else if ((arg_len = is_keyword(ptr, len, "scatter"))) + uc->data |= DEBUG_SCATTER; + else if ((arg_len = is_keyword(ptr, len, "script"))) + uc->data |= DEBUG_SCRIPT; + else if ((arg_len = is_keyword(ptr, len, "tiny"))) + uc->data |= DEBUG_TINY; + else if ((arg_len = is_keyword(ptr, len, "timing"))) + uc->data |= DEBUG_TIMING; + else if ((arg_len = is_keyword(ptr, len, "nego"))) + uc->data |= DEBUG_NEGO; + else if ((arg_len = is_keyword(ptr, len, "tags"))) + uc->data |= DEBUG_TAGS; + else if ((arg_len = is_keyword(ptr, len, "pointer"))) + uc->data |= DEBUG_POINTER; + else + return -EINVAL; + ptr += arg_len; len -= arg_len; + } +#ifdef DEBUG_PROC_INFO +printk("sym_user_command: data=%ld\n", uc->data); +#endif + break; +#endif /* SYM_LINUX_DEBUG_CONTROL_SUPPORT */ + case UC_SETFLAG: + while (len > 0) { + SKIP_SPACES(1); + if ((arg_len = is_keyword(ptr, len, "no_disc"))) + uc->data &= ~SYM_DISC_ENABLED; + else + return -EINVAL; + ptr += arg_len; len -= arg_len; + } + break; + default: + break; + } + + if (len) + return -EINVAL; + else { + long flags; + + SYM_LOCK_HCB(np, flags); + sym_exec_user_command (np, uc); + SYM_UNLOCK_HCB(np, flags); + } + return length; +} + +#endif /* SYM_LINUX_USER_COMMAND_SUPPORT */ + + +#ifdef SYM_LINUX_USER_INFO_SUPPORT +/* + * Informations through the proc file system. + */ +struct info_str { + char *buffer; + int length; + int offset; + int pos; +}; + +static void copy_mem_info(struct info_str *info, char *data, int len) +{ + if (info->pos + len > info->length) + len = info->length - info->pos; + + if (info->pos + len < info->offset) { + info->pos += len; + return; + } + if (info->pos < info->offset) { + data += (info->offset - info->pos); + len -= (info->offset - info->pos); + } + + if (len > 0) { + memcpy(info->buffer + info->pos, data, len); + info->pos += len; + } +} + +static int copy_info(struct info_str *info, char *fmt, ...) +{ + va_list args; + char buf[81]; + int len; + + va_start(args, fmt); + len = vsprintf(buf, fmt, args); + va_end(args); + + copy_mem_info(info, buf, len); + return len; +} + +/* + * Copy formatted information into the input buffer. + */ +static int sym_host_info(hcb_p np, char *ptr, off_t offset, int len) +{ + struct info_str info; + + info.buffer = ptr; + info.length = len; + info.offset = offset; + info.pos = 0; + + copy_info(&info, "Chip " NAME53C "%s, device id 0x%x, " + "revision id 0x%x\n", + np->s.chip_name, np->device_id, np->revision_id); + copy_info(&info, "On PCI bus %d, device %d, function %d, " +#ifdef __sparc__ + "IRQ %s\n", +#else + "IRQ %d\n", +#endif + np->s.bus, (np->s.device_fn & 0xf8) >> 3, np->s.device_fn & 7, +#ifdef __sparc__ + __irq_itoa(np->s.irq)); +#else + (int) np->s.irq); +#endif + copy_info(&info, "Min. period factor %d, %s SCSI BUS%s\n", + (int) (np->minsync_dt ? np->minsync_dt : np->minsync), + np->maxwide ? "Wide" : "Narrow", + np->minsync_dt ? ", DT capable" : ""); + + copy_info(&info, "Max. started commands %d, " + "max. commands per LUN %d\n", + SYM_CONF_MAX_START, SYM_CONF_MAX_TAG); + + return info.pos > info.offset? info.pos - info.offset : 0; +} +#endif /* SYM_LINUX_USER_INFO_SUPPORT */ + +/* + * Entry point of the scsi proc fs of the driver. + * - func = 0 means read (returns adapter infos) + * - func = 1 means write (not yet merget from sym53c8xx) + */ +static int sym53c8xx_proc_info(char *buffer, char **start, off_t offset, + int length, int hostno, int func) +{ + struct Scsi_Host *host; + struct host_data *host_data; + hcb_p np = 0; + int retv; + + for (host = first_host; host; host = host->next) { + if (host->hostt != first_host->hostt) + continue; + if (host->host_no == hostno) { + host_data = (struct host_data *) host->hostdata; + np = host_data->ncb; + break; + } + } + + if (!np) + return -EINVAL; + + if (func) { +#ifdef SYM_LINUX_USER_COMMAND_SUPPORT + retv = sym_user_command(np, buffer, length); +#else + retv = -EINVAL; +#endif + } + else { + if (start) + *start = buffer; +#ifdef SYM_LINUX_USER_INFO_SUPPORT + retv = sym_host_info(np, buffer, offset, length); +#else + retv = -EINVAL; +#endif + } + + return retv; +} +#endif /* SYM_LINUX_PROC_INFO_SUPPORT */ + +/* + * Free controller resources. + */ +static void sym_free_resources(hcb_p np) +{ + /* + * Free O/S specific resources. + */ + if (np->s.irq) + free_irq(np->s.irq, np); + if (np->s.io_port) + release_region(np->s.io_port, np->s.io_ws); +#ifndef SYM_OPT_NO_BUS_MEMORY_MAPPING + if (np->s.mmio_va) + pci_unmap_mem(np->s.mmio_va, np->s.io_ws); + if (np->s.ram_va) + pci_unmap_mem(np->s.ram_va, np->ram_ws); +#endif + /* + * Free O/S independant resources. + */ + sym_hcb_free(np); + + sym_mfree_dma(np, sizeof(*np), "HCB"); +} + +/* + * Ask/tell the system about DMA addressing. + */ +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING +static int sym_setup_bus_dma_mask(hcb_p np) +{ +#if LINUX_VERSION_CODE < LinuxVersionCode(2,4,3) + if (!pci_dma_supported(np->s.device, 0xffffffffUL)) + goto out_err32; +#else +#if SYM_CONF_DMA_ADDRESSING_MODE == 0 + if (pci_set_dma_mask(np->s.device, 0xffffffffUL)) + goto out_err32; +#else +#if SYM_CONF_DMA_ADDRESSING_MODE == 1 +#define PciDmaMask 0xffffffffff +#elif SYM_CONF_DMA_ADDRESSING_MODE == 2 +#define PciDmaMask 0xffffffffffffffff +#endif + if (np->features & FE_DAC) { + if (!pci_set_dma_mask(np->s.device, PciDmaMask)) { + np->use_dac = 1; + printf_info("%s: using 64 bit DMA addressing\n", + sym_name(np)); + } + else { + if (!pci_set_dma_mask(np->s.device, 0xffffffffUL)) + goto out_err32; + } + } +#undef PciDmaMask +#endif +#endif + return 0; + +out_err32: + printf_warning("%s: 32 BIT DMA ADDRESSING NOT SUPPORTED\n", + sym_name(np)); + return -1; +} +#endif /* SYM_LINUX_DYNAMIC_DMA_MAPPING */ + +/* + * Host attach and initialisations. + * + * Allocate host data and ncb structure. + * Request IO region and remap MMIO region. + * Do chip initialization. + * If all is OK, install interrupt handling and + * start the timer daemon. + */ +static int __init +sym_attach (Scsi_Host_Template *tpnt, int unit, sym_device *dev) +{ + struct host_data *host_data; + hcb_p np = 0; + struct Scsi_Host *instance = 0; + u_long flags = 0; + sym_nvram *nvram = dev->nvram; + struct sym_fw *fw; + + printk(KERN_INFO + "sym%d: <%s> rev 0x%x on pci bus %d device %d function %d " +#ifdef __sparc__ + "irq %s\n", +#else + "irq %d\n", +#endif + unit, dev->chip.name, dev->chip.revision_id, + dev->s.bus, (dev->s.device_fn & 0xf8) >> 3, + dev->s.device_fn & 7, +#ifdef __sparc__ + __irq_itoa(dev->s.irq)); +#else + dev->s.irq); +#endif + + /* + * Get the firmware for this chip. + */ + fw = sym_find_firmware(&dev->chip); + if (!fw) + goto attach_failed; + + /* + * Allocate host_data structure + */ + if (!(instance = scsi_register(tpnt, sizeof(*host_data)))) + goto attach_failed; + host_data = (struct host_data *) instance->hostdata; + + /* + * Allocate immediately the host control block, + * since we are only expecting to succeed. :) + * We keep track in the HCB of all the resources that + * are to be released on error. + */ +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING + np = __sym_calloc_dma(dev->pdev, sizeof(*np), "HCB"); + if (np) { + np->s.device = dev->pdev; + np->bus_dmat = dev->pdev; /* Result in 1 DMA pool per HBA */ + } + else + goto attach_failed; +#else + np = sym_calloc_dma(sizeof(*np), "HCB"); + if (!np) + goto attach_failed; +#endif + host_data->ncb = np; + + SYM_INIT_LOCK_HCB(np); + + /* + * Copy some useful infos to the HCB. + */ + np->hcb_ba = vtobus(np); + np->verbose = sym_driver_setup.verbose; + np->s.device = dev->pdev; + np->s.unit = unit; + np->device_id = dev->chip.device_id; + np->revision_id = dev->chip.revision_id; + np->s.bus = dev->s.bus; + np->s.device_fn = dev->s.device_fn; + np->features = dev->chip.features; + np->clock_divn = dev->chip.nr_divisor; + np->maxoffs = dev->chip.offset_max; + np->maxburst = dev->chip.burst_max; + np->myaddr = dev->host_id; + + /* + * Edit its name. + */ + strncpy(np->s.chip_name, dev->chip.name, sizeof(np->s.chip_name)-1); + sprintf(np->s.inst_name, "sym%d", np->s.unit); + + /* + * Ask/tell the system about DMA addressing. + */ +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING + if (sym_setup_bus_dma_mask(np)) + goto attach_failed; +#endif + + /* + * Try to map the controller chip to + * virtual and physical memory. + */ + np->mmio_ba = (u32)dev->s.base; + np->s.io_ws = (np->features & FE_IO256)? 256 : 128; + +#ifndef SYM_CONF_IOMAPPED + np->s.mmio_va = pci_map_mem(dev->s.base_c, np->s.io_ws); + if (!np->s.mmio_va) { + printf_err("%s: can't map PCI MMIO region\n", sym_name(np)); + goto attach_failed; + } + else if (sym_verbose > 1) + printf_info("%s: using memory mapped IO\n", sym_name(np)); +#endif /* !defined SYM_CONF_IOMAPPED */ + + /* + * Try to map the controller chip into iospace. + */ + if (dev->s.io_port) { + request_region(dev->s.io_port, np->s.io_ws, NAME53C8XX); + np->s.io_port = dev->s.io_port; + } + + /* + * Map on-chip RAM if present and supported. + */ + if (!(np->features & FE_RAM)) + dev->s.base_2 = 0; + if (dev->s.base_2) { + np->ram_ba = (u32)dev->s.base_2; + if (np->features & FE_RAM8K) + np->ram_ws = 8192; + else + np->ram_ws = 4096; +#ifndef SYM_OPT_NO_BUS_MEMORY_MAPPING + np->s.ram_va = pci_map_mem(dev->s.base_2_c, np->ram_ws); + if (!np->s.ram_va) { + printf_err("%s: can't map PCI MEMORY region\n", + sym_name(np)); + goto attach_failed; + } +#endif + } + + /* + * Perform O/S independant stuff. + */ + if (sym_hcb_attach(np, fw, nvram)) + goto attach_failed; + + + /* + * Install the interrupt handler. + * If we synchonize the C code with SCRIPTS on interrupt, + * we donnot want to share the INTR line at all. + */ + if (request_irq(dev->s.irq, sym53c8xx_intr, SA_SHIRQ, + NAME53C8XX, np)) { + printf_err("%s: request irq %d failure\n", + sym_name(np), dev->s.irq); + goto attach_failed; + } + np->s.irq = dev->s.irq; + + /* + * After SCSI devices have been opened, we cannot + * reset the bus safely, so we do it here. + */ + SYM_LOCK_HCB(np, flags); + if (sym_reset_scsi_bus(np, 0)) { + printf_err("%s: FATAL ERROR: CHECK SCSI BUS - CABLES, " + "TERMINATION, DEVICE POWER etc.!\n", sym_name(np)); + SYM_UNLOCK_HCB(np, flags); + goto attach_failed; + } + + /* + * Initialize some queue headers. + */ + sym_que_init(&np->s.wait_cmdq); + sym_que_init(&np->s.busy_cmdq); + + /* + * Start the SCRIPTS. + */ + sym_start_up (np, 1); + + /* + * Start the timer daemon + */ + init_timer(&np->s.timer); + np->s.timer.data = (unsigned long) np; + np->s.timer.function = sym53c8xx_timer; + np->s.lasttime=0; + sym_timer (np); + + /* + * Done. + */ + if (!first_host) + first_host = instance; + + /* + * Fill Linux host instance structure + * and return success. + */ + instance->max_channel = 0; + instance->this_id = np->myaddr; + instance->max_id = np->maxwide ? 16 : 8; + instance->max_lun = SYM_CONF_MAX_LUN; +#ifndef SYM_CONF_IOMAPPED +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,29) + instance->base = (unsigned long) np->s.mmio_va; +#else + instance->base = (char *) np->s.mmio_va; +#endif +#endif + instance->irq = np->s.irq; + instance->unique_id = np->s.io_port; + instance->io_port = np->s.io_port; + instance->n_io_port = np->s.io_ws; + instance->dma_channel = 0; + instance->cmd_per_lun = SYM_CONF_MAX_TAG; + instance->can_queue = (SYM_CONF_MAX_START-2); + instance->sg_tablesize = SYM_CONF_MAX_SG; +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) + instance->max_cmd_len = 16; +#endif + instance->select_queue_depths = sym53c8xx_select_queue_depths; + + SYM_UNLOCK_HCB(np, flags); + + scsi_set_pci_device(instance, dev->pdev); + + /* + * Now let the generic SCSI driver + * look for the SCSI devices on the bus .. + */ + return 0; + +attach_failed: + if (!instance) return -1; + printf_info("%s: giving up ...\n", sym_name(np)); + if (np) + sym_free_resources(np); + scsi_unregister(instance); + + return -1; + } + + +/* + * Detect and try to read SYMBIOS and TEKRAM NVRAM. + */ +#if SYM_CONF_NVRAM_SUPPORT +static void __init sym_get_nvram(sym_device *devp, sym_nvram *nvp) +{ + if (!nvp) + return; + + devp->nvram = nvp; + devp->device_id = devp->chip.device_id; + nvp->type = 0; + + /* + * Get access to chip IO registers + */ +#ifdef SYM_CONF_IOMAPPED + request_region(devp->s.io_port, 128, NAME53C8XX); +#else + devp->s.mmio_va = pci_map_mem(devp->s.base_c, 128); + if (!devp->s.mmio_va) + return; +#endif + + /* + * Try to read SYMBIOS|TEKRAM nvram. + */ + (void) sym_read_nvram(devp, nvp); + + /* + * Release access to chip IO registers + */ +#ifdef SYM_CONF_IOMAPPED + release_region(devp->s.io_port, 128); +#else + pci_unmap_mem((u_long) devp->s.mmio_va, 128ul); +#endif +} +#endif /* SYM_CONF_NVRAM_SUPPORT */ + +/* + * Driver setup from the boot command line + */ +#ifdef SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT + +static struct sym_driver_setup + sym_driver_safe_setup __initdata = SYM_LINUX_DRIVER_SAFE_SETUP; +#ifdef MODULE +char *sym53c8xx = 0; /* command line passed by insmod */ +MODULE_PARM(sym53c8xx, "s"); +#endif + +static void __init sym53c8xx_print_driver_setup(void) +{ + printf_info (NAME53C8XX ": setup=" + "mpar:%d,spar:%d,tags:%d,sync:%d,burst:%d," + "led:%d,wide:%d,diff:%d,irqm:%d, buschk:%d\n", + sym_driver_setup.pci_parity, + sym_driver_setup.scsi_parity, + sym_driver_setup.max_tag, + sym_driver_setup.min_sync, + sym_driver_setup.burst_order, + sym_driver_setup.scsi_led, + sym_driver_setup.max_wide, + sym_driver_setup.scsi_diff, + sym_driver_setup.irq_mode, + sym_driver_setup.scsi_bus_check); + printf_info (NAME53C8XX ": setup=" + "hostid:%d,offs:%d,luns:%d,pcifix:%d,revprob:%d," + "verb:%d,debug:0x%x,setlle_delay:%d\n", + sym_driver_setup.host_id, + sym_driver_setup.max_offs, + sym_driver_setup.max_lun, + sym_driver_setup.pci_fix_up, + sym_driver_setup.reverse_probe, + sym_driver_setup.verbose, + sym_driver_setup.debug, + sym_driver_setup.settle_delay); +#ifdef DEBUG_2_0_X +MDELAY(5000); +#endif +}; + +#define OPT_PCI_PARITY 1 +#define OPT_SCSI_PARITY 2 +#define OPT_MAX_TAG 3 +#define OPT_MIN_SYNC 4 +#define OPT_BURST_ORDER 5 +#define OPT_SCSI_LED 6 +#define OPT_MAX_WIDE 7 +#define OPT_SCSI_DIFF 8 +#define OPT_IRQ_MODE 9 +#define OPT_SCSI_BUS_CHECK 10 +#define OPT_HOST_ID 11 +#define OPT_MAX_OFFS 12 +#define OPT_MAX_LUN 13 +#define OPT_PCI_FIX_UP 14 + +#define OPT_REVERSE_PROBE 15 +#define OPT_VERBOSE 16 +#define OPT_DEBUG 17 +#define OPT_SETTLE_DELAY 18 +#define OPT_USE_NVRAM 19 +#define OPT_EXCLUDE 20 +#define OPT_SAFE_SETUP 21 + +static char setup_token[] __initdata = + "mpar:" "spar:" + "tags:" "sync:" + "burst:" "led:" + "wide:" "diff:" + "irqm:" "buschk:" + "hostid:" "offset:" + "luns:" "pcifix:" + "revprob:" "verb:" + "debug:" "settle:" + "nvram:" "excl:" + "safe:" + ; + +#ifdef MODULE +#define ARG_SEP ' ' +#else +#define ARG_SEP ',' +#endif + +static int __init get_setup_token(char *p) +{ + char *cur = setup_token; + char *pc; + int i = 0; + + while (cur != NULL && (pc = strchr(cur, ':')) != NULL) { + ++pc; + ++i; + if (!strncmp(p, cur, pc - cur)) + return i; + cur = pc; + } + return 0; +} +#endif /* SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT */ + +int __init sym53c8xx_setup(char *str) +{ +#ifdef SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT + char *cur = str; + char *pc, *pv; + unsigned long val; + int i, c; + int xi = 0; + + while (cur != NULL && (pc = strchr(cur, ':')) != NULL) { + char *pe; + + val = 0; + pv = pc; + c = *++pv; + + if (c == 'n') + val = 0; + else if (c == 'y') + val = 1; + else + val = (int) simple_strtoul(pv, &pe, 0); + + switch (get_setup_token(cur)) { + case OPT_MAX_TAG: + sym_driver_setup.max_tag = val; + if (!(pe && *pe == '/')) + break; + i = 0; + while (*pe && *pe != ARG_SEP && + i < sizeof(sym_driver_setup.tag_ctrl)-1) { + sym_driver_setup.tag_ctrl[i++] = *pe++; + } + sym_driver_setup.tag_ctrl[i] = '\0'; + break; + case OPT_SAFE_SETUP: + memcpy(&sym_driver_setup, &sym_driver_safe_setup, + sizeof(sym_driver_setup)); + break; + case OPT_EXCLUDE: + if (xi < 8) + sym_driver_setup.excludes[xi++] = val; + break; + +#define __SIMPLE_OPTION(NAME, name) \ + case OPT_ ## NAME : \ + sym_driver_setup.name = val;\ + break; + + __SIMPLE_OPTION(PCI_PARITY, pci_parity) + __SIMPLE_OPTION(SCSI_PARITY, scsi_parity) + __SIMPLE_OPTION(MIN_SYNC, min_sync) + __SIMPLE_OPTION(BURST_ORDER, burst_order) + __SIMPLE_OPTION(SCSI_LED, scsi_led) + __SIMPLE_OPTION(MAX_WIDE, max_wide) + __SIMPLE_OPTION(SCSI_DIFF, scsi_diff) + __SIMPLE_OPTION(IRQ_MODE, irq_mode) + __SIMPLE_OPTION(SCSI_BUS_CHECK, scsi_bus_check) + __SIMPLE_OPTION(HOST_ID, host_id) + __SIMPLE_OPTION(MAX_OFFS, max_offs) + __SIMPLE_OPTION(MAX_LUN, max_lun) + __SIMPLE_OPTION(PCI_FIX_UP, pci_fix_up) + __SIMPLE_OPTION(REVERSE_PROBE, reverse_probe) + __SIMPLE_OPTION(VERBOSE, verbose) + __SIMPLE_OPTION(DEBUG, debug) + __SIMPLE_OPTION(SETTLE_DELAY, settle_delay) + __SIMPLE_OPTION(USE_NVRAM, use_nvram) + +#undef __SIMPLE_OPTION + + default: + printk("sym53c8xx_setup: unexpected boot option '%.*s' ignored\n", (int)(pc-cur+1), cur); + break; + } + + if ((cur = strchr(cur, ARG_SEP)) != NULL) + ++cur; + } +#endif /* SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT */ + return 1; +} + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,13) +#ifndef MODULE +__setup("sym53c8xx=", sym53c8xx_setup); +#endif +#endif + +#ifdef SYM_CONF_PQS_PDS_SUPPORT +/* + * Detect all NCR PQS/PDS boards and keep track of their bus nr. + * + * The NCR PQS or PDS card is constructed as a DEC bridge + * behind which sit a proprietary NCR memory controller and + * four or two 53c875s as separate devices. In its usual mode + * of operation, the 875s are slaved to the memory controller + * for all transfers. We can tell if an 875 is part of a + * PQS/PDS or not since if it is, it will be on the same bus + * as the memory controller. To operate with the Linux + * driver, the memory controller is disabled and the 875s + * freed to function independently. The only wrinkle is that + * the preset SCSI ID (which may be zero) must be read in from + * a special configuration space register of the 875 + */ +#ifndef SYM_CONF_MAX_PQS_BUS +#define SYM_CONF_MAX_PQS_BUS 16 +#endif +static int pqs_bus[SYM_CONF_MAX_PQS_BUS] __initdata = { 0 }; + +static void __init sym_detect_pqs_pds(void) +{ + short index; + pcidev_t dev = PCIDEV_NULL; + + for(index=0; index < SYM_CONF_MAX_PQS_BUS; index++) { + u_char tmp; + + dev = pci_find_device(0x101a, 0x0009, dev); + if (dev == PCIDEV_NULL) { + pqs_bus[index] = -1; + break; + } + printf_info(NAME53C8XX ": NCR PQS/PDS memory controller detected on bus %d\n", PciBusNumber(dev)); + pci_read_config_byte(dev, 0x44, &tmp); + /* bit 1: allow individual 875 configuration */ + tmp |= 0x2; + pci_write_config_byte(dev, 0x44, tmp); + pci_read_config_byte(dev, 0x45, &tmp); + /* bit 2: drive individual 875 interrupts to the bus */ + tmp |= 0x4; + pci_write_config_byte(dev, 0x45, tmp); + + pqs_bus[index] = PciBusNumber(dev); + } +} +#endif /* SYM_CONF_PQS_PDS_SUPPORT */ + +/* + * Read and check the PCI configuration for any detected NCR + * boards and save data for attaching after all boards have + * been detected. + */ +static int __init +sym53c8xx_pci_init(Scsi_Host_Template *tpnt, pcidev_t pdev, sym_device *device) +{ + u_short vendor_id, device_id, command, status_reg; + u_char cache_line_size; + u_char suggested_cache_line_size = 0; + u_char pci_fix_up = SYM_SETUP_PCI_FIX_UP; + u_char revision; + u_int irq; + u_long base, base_2, io_port; + u_long base_c, base_2_c; + int i; + sym_chip *chip; + + /* Choose some short name for this device */ + sprintf(device->s.inst_name, "sym.%d.%d.%d", + PciBusNumber(pdev), + (int) (PciDeviceFn(pdev) & 0xf8) >> 3, + (int) (PciDeviceFn(pdev) & 7)); + + /* + * Read needed minimal info from the PCI config space. + */ + vendor_id = PciVendorId(pdev); + device_id = PciDeviceId(pdev); + irq = PciIrqLine(pdev); + + i = pci_get_base_address(pdev, 0, &io_port); + io_port = pci_get_base_cookie(pdev, 0); + + base_c = pci_get_base_cookie(pdev, i); + i = pci_get_base_address(pdev, i, &base); + + base_2_c = pci_get_base_cookie(pdev, i); + (void) pci_get_base_address(pdev, i, &base_2); + + io_port &= PCI_BASE_ADDRESS_IO_MASK; + base &= PCI_BASE_ADDRESS_MEM_MASK; + base_2 &= PCI_BASE_ADDRESS_MEM_MASK; + + pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision); + + /* + * If user excluded this chip, donnot initialize it. + */ + if (io_port) { + for (i = 0 ; i < 8 ; i++) { + if (sym_driver_setup.excludes[i] == io_port) + return -1; + } + } + + /* + * Leave here if another driver attached the chip. + */ + if (io_port && check_region (io_port, 128)) { + printf_info("%s: IO region 0x%lx[0..127] is in use\n", + sym_name(device), (long) io_port); + return -1; + } + + /* + * Check if the chip is supported. + */ + chip = sym_lookup_pci_chip_table(device_id, revision); + if (!chip) { + printf_info("%s: device not supported\n", sym_name(device)); + return -1; + } + + /* + * Check if the chip has been assigned resources we need. + */ +#ifdef SYM_CONF_IOMAPPED + if (!io_port) { + printf_info("%s: IO base address disabled.\n", + sym_name(device)); + return -1; + } +#else + if (!base) { + printf_info("%s: MMIO base address disabled.\n", + sym_name(device)); + return -1; + } +#endif + + /* + * Ignore Symbios chips controlled by various RAID controllers. + * These controllers set value 0x52414944 at RAM end - 16. + */ +#if defined(__i386__) && !defined(SYM_OPT_NO_BUS_MEMORY_MAPPING) + if (base_2_c) { + unsigned int ram_size, ram_val; + u_long ram_ptr; + + if (chip->features & FE_RAM8K) + ram_size = 8192; + else + ram_size = 4096; + + ram_ptr = pci_map_mem(base_2_c, ram_size); + if (ram_ptr) { + ram_val = readl_raw(ram_ptr + ram_size - 16); + pci_unmap_mem(ram_ptr, ram_size); + if (ram_val == 0x52414944) { + printf_info("%s: not initializing, " + "driven by RAID controller.\n", + sym_name(device)); + return -1; + } + } + } +#endif /* i386 and PCI MEMORY accessible */ + + /* + * Copy the chip description to our device structure, + * so we can make it match the actual device and options. + */ + bcopy(chip, &device->chip, sizeof(device->chip)); + device->chip.revision_id = revision; + + /* + * Read additionnal info from the configuration space. + */ + pci_read_config_word(pdev, PCI_COMMAND, &command); + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size); + + /* + * Enable missing capabilities in the PCI COMMAND register. + */ +#ifdef SYM_CONF_IOMAPPED +#define PCI_COMMAND_BITS_TO_ENABLE (PCI_COMMAND_IO | \ + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_PARITY) +#else +#define PCI_COMMAND_BITS_TO_ENABLE \ + (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_PARITY) +#endif + if ((command & PCI_COMMAND_BITS_TO_ENABLE) + != PCI_COMMAND_BITS_TO_ENABLE) { + printf_info("%s: setting%s%s%s%s...\n", sym_name(device), + (command & PCI_COMMAND_IO) ? "" : " PCI_COMMAND_IO", + (command & PCI_COMMAND_MEMORY) ? "" : " PCI_COMMAND_MEMORY", + (command & PCI_COMMAND_MASTER) ? "" : " PCI_COMMAND_MASTER", + (command & PCI_COMMAND_PARITY) ? "" : " PCI_COMMAND_PARITY"); + command |= PCI_COMMAND_BITS_TO_ENABLE; + pci_write_config_word(pdev, PCI_COMMAND, command); + } +#undef PCI_COMMAND_BITS_TO_ENABLE + + /* + * If cache line size is not configured, suggest + * a value for well known CPUs. + */ +#if defined(__i386__) && !defined(MODULE) + if (!cache_line_size && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch(boot_cpu_data.x86) { + case 4: suggested_cache_line_size = 4; break; + case 6: if (boot_cpu_data.x86_model > 8) break; + case 5: suggested_cache_line_size = 8; break; + } + } +#endif /* __i386__ */ + + /* + * Some features are required to be enabled in order to + * work around some chip problems. :) ;) + * (ITEM 12 of a DEL about the 896 I haven't yet). + * We must ensure the chip will use WRITE AND INVALIDATE. + * The revision number limit is for now arbitrary. + */ + if (device_id == PCI_DEVICE_ID_NCR_53C896 && revision < 0x4) { + chip->features |= (FE_WRIE | FE_CLSE); + pci_fix_up |= 3; /* Force appropriate PCI fix-up */ + } + +#ifdef SYM_CONF_PCI_FIX_UP + /* + * Try to fix up PCI config according to wished features. + */ + if ((pci_fix_up & 1) && (chip->features & FE_CLSE) && + !cache_line_size && suggested_cache_line_size) { + cache_line_size = suggested_cache_line_size; + pci_write_config_byte(pdev, + PCI_CACHE_LINE_SIZE, cache_line_size); + printf_info("%s: PCI_CACHE_LINE_SIZE set to %d.\n", + sym_name(device), cache_line_size); + } + + if ((pci_fix_up & 2) && cache_line_size && + (chip->features & FE_WRIE) && !(command & PCI_COMMAND_INVALIDATE)) { + printf_info("%s: setting PCI_COMMAND_INVALIDATE.\n", + sym_name(device)); + command |= PCI_COMMAND_INVALIDATE; + pci_write_config_word(pdev, PCI_COMMAND, command); + } +#endif /* SYM_CONF_PCI_FIX_UP */ + + /* + * Work around for errant bit in 895A. The 66Mhz + * capable bit is set erroneously. Clear this bit. + * (Item 1 DEL 533) + * + * Make sure Config space and Features agree. + * + * Recall: writes are not normal to status register - + * write a 1 to clear and a 0 to leave unchanged. + * Can only reset bits. + */ + pci_read_config_word(pdev, PCI_STATUS, &status_reg); + if (chip->features & FE_66MHZ) { + if (!(status_reg & PCI_STATUS_66MHZ)) + chip->features &= ~FE_66MHZ; + } + else { + if (status_reg & PCI_STATUS_66MHZ) { + status_reg = PCI_STATUS_66MHZ; + pci_write_config_word(pdev, PCI_STATUS, status_reg); + pci_read_config_word(pdev, PCI_STATUS, &status_reg); + } + } + + /* + * Initialise device structure with items required by sym_attach. + */ + device->pdev = pdev; + device->s.bus = PciBusNumber(pdev); + device->s.device_fn = PciDeviceFn(pdev); + device->s.base = base; + device->s.base_2 = base_2; + device->s.base_c = base_c; + device->s.base_2_c = base_2_c; + device->s.io_port = io_port; + device->s.irq = irq; + device->attach_done = 0; + + return 0; +} + +/* + * List of supported NCR chip ids + */ +static u_short sym_chip_ids[] __initdata = { + PCI_ID_SYM53C810, + PCI_ID_SYM53C815, + PCI_ID_SYM53C825, + PCI_ID_SYM53C860, + PCI_ID_SYM53C875, + PCI_ID_SYM53C875_2, + PCI_ID_SYM53C885, + PCI_ID_SYM53C875A, + PCI_ID_SYM53C895, + PCI_ID_SYM53C896, + PCI_ID_SYM53C895A, + PCI_ID_LSI53C1510D, + PCI_ID_LSI53C1010, + PCI_ID_LSI53C1010_2 +}; + +/* + * Detect all 53c8xx hosts and then attach them. + * + * If we are using NVRAM, once all hosts are detected, we need to + * check any NVRAM for boot order in case detect and boot order + * differ and attach them using the order in the NVRAM. + * + * If no NVRAM is found or data appears invalid attach boards in + * the the order they are detected. + */ +int __init sym53c8xx_detect(Scsi_Host_Template *tpnt) +{ + pcidev_t pcidev; + int i, j, chips, hosts, count; + int attach_count = 0; + sym_device *devtbl, *devp; + sym_nvram nvram; +#if SYM_CONF_NVRAM_SUPPORT + sym_nvram nvram0, *nvp; +#endif + + /* + * PCI is required. + */ + if (!pci_present()) + return 0; + + /* + * Initialize driver general stuff. + */ +#ifdef SYM_LINUX_PROC_INFO_SUPPORT +#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,27) + tpnt->proc_dir = &proc_scsi_sym53c8xx; +#else + tpnt->proc_name = NAME53C8XX; +#endif + tpnt->proc_info = sym53c8xx_proc_info; +#endif + +#ifdef SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT +#ifdef MODULE +if (sym53c8xx) + sym53c8xx_setup(sym53c8xx); +#endif +#ifdef SYM_LINUX_DEBUG_CONTROL_SUPPORT + sym_debug_flags = sym_driver_setup.debug; +#endif + if (boot_verbose >= 2) + sym53c8xx_print_driver_setup(); +#endif /* SYM_LINUX_BOOT_COMMAND_LINE_SUPPORT */ + + /* + * Allocate the device table since we donnot want to + * overflow the kernel stack. + * 1 x 4K PAGE is enough for more than 40 devices for i386. + */ + devtbl = sym_calloc(PAGE_SIZE, "DEVTBL"); + if (!devtbl) + return 0; + + /* + * Detect all NCR PQS/PDS memory controllers. + */ +#ifdef SYM_CONF_PQS_PDS_SUPPORT + sym_detect_pqs_pds(); +#endif + + /* + * Detect all 53c8xx hosts. + * Save the first Symbios NVRAM content if any + * for the boot order. + */ + chips = sizeof(sym_chip_ids) / sizeof(sym_chip_ids[0]); + hosts = PAGE_SIZE / sizeof(*devtbl); +#if SYM_CONF_NVRAM_SUPPORT + nvp = (sym_driver_setup.use_nvram & 0x1) ? &nvram0 : 0; +#endif + j = 0; + count = 0; + pcidev = PCIDEV_NULL; + while (1) { + char *msg = ""; + if (count >= hosts) + break; + if (j >= chips) + break; + i = sym_driver_setup.reverse_probe ? chips - 1 - j : j; + pcidev = pci_find_device(PCI_VENDOR_ID_NCR, sym_chip_ids[i], + pcidev); + if (pcidev == PCIDEV_NULL) { + ++j; + continue; + } + /* This one is guaranteed by AC to do nothing :-) */ + if (pci_enable_device(pcidev)) + continue; + /* Some HW as the HP LH4 may report twice PCI devices */ + for (i = 0; i < count ; i++) { + if (devtbl[i].s.bus == PciBusNumber(pcidev) && + devtbl[i].s.device_fn == PciDeviceFn(pcidev)) + break; + } + if (i != count) /* Ignore this device if we already have it */ + continue; + devp = &devtbl[count]; + devp->host_id = SYM_SETUP_HOST_ID; + devp->attach_done = 0; + if (sym53c8xx_pci_init(tpnt, pcidev, devp)) { + continue; + } + ++count; +#if SYM_CONF_NVRAM_SUPPORT + if (nvp) { + sym_get_nvram(devp, nvp); + switch(nvp->type) { + case SYM_SYMBIOS_NVRAM: + /* + * Switch to the other nvram buffer, so that + * nvram0 will contain the first Symbios + * format NVRAM content with boot order. + */ + nvp = &nvram; + msg = "with Symbios NVRAM"; + break; + case SYM_TEKRAM_NVRAM: + msg = "with Tekram NVRAM"; + break; + } + } +#endif +#ifdef SYM_CONF_PQS_PDS_SUPPORT + /* + * Match the BUS number for PQS/PDS devices. + * Read the SCSI ID from a special register mapped + * into the configuration space of the individual + * 875s. This register is set up by the PQS bios + */ + for(i = 0; i < SYM_CONF_MAX_PQS_BUS && pqs_bus[i] != -1; i++) { + u_char tmp; + if (pqs_bus[i] == PciBusNumber(pcidev)) { + pci_read_config_byte(pcidev, 0x84, &tmp); + devp->pqs_pds = 1; + devp->host_id = tmp; + break; + } + } + if (devp->pqs_pds) + msg = "(NCR PQS/PDS)"; +#endif + if (boot_verbose) + printf_info("%s: 53c%s detected %s\n", + sym_name(devp), devp->chip.name, msg); + } + + /* + * If we have found a SYMBIOS NVRAM, use first the NVRAM boot + * sequence as device boot order. + * check devices in the boot record against devices detected. + * attach devices if we find a match. boot table records that + * do not match any detected devices will be ignored. + * devices that do not match any boot table will not be attached + * here but will attempt to be attached during the device table + * rescan. + */ +#if SYM_CONF_NVRAM_SUPPORT + if (!nvp || nvram0.type != SYM_SYMBIOS_NVRAM) + goto next; + for (i = 0; i < 4; i++) { + Symbios_host *h = &nvram0.data.Symbios.host[i]; + for (j = 0 ; j < count ; j++) { + devp = &devtbl[j]; + if (h->device_fn != devp->s.device_fn || + h->bus_nr != devp->s.bus || + h->device_id != devp->chip.device_id) + continue; + if (devp->attach_done) + continue; + if (h->flags & SYMBIOS_INIT_SCAN_AT_BOOT) { + sym_get_nvram(devp, nvp); + if (!sym_attach (tpnt, attach_count, devp)) + attach_count++; + } + else if (!(sym_driver_setup.use_nvram & 0x80)) + printf_info( + "%s: 53c%s state OFF thus not attached\n", + sym_name(devp), devp->chip.name); + else + continue; + + devp->attach_done = 1; + break; + } + } +next: +#endif + + /* + * Rescan device list to make sure all boards attached. + * Devices without boot records will not be attached yet + * so try to attach them here. + */ + for (i= 0; i < count; i++) { + devp = &devtbl[i]; + if (!devp->attach_done) { + devp->nvram = &nvram; + nvram.type = 0; +#if SYM_CONF_NVRAM_SUPPORT + sym_get_nvram(devp, nvp); +#endif + if (!sym_attach (tpnt, attach_count, devp)) + attach_count++; + } + } + + sym_mfree(devtbl, PAGE_SIZE, "DEVTBL"); + + return attach_count; +} + + + +#ifdef MODULE +/* + * Linux release module stuff. + * + * Called before unloading the module. + * Detach the host. + * We have to free resources and halt the NCR chip. + * + */ +static int sym_detach(hcb_p np) +{ + printk("%s: detaching ...\n", sym_name(np)); + + /* + * Try to delete the timer. + * In the unlikely situation where this failed, + * try to synchronize with the timer handler. + */ +#if LINUX_VERSION_CODE < LinuxVersionCode(2, 4, 0) + np->s.release_stage = 1; + if (!del_timer(&np->s.timer)) { + int i = 1000; + int k = 1; + while (1) { + u_long flags; + SYM_LOCK_HCB(np, flags); + k = np->s.release_stage; + SYM_UNLOCK_HCB(np, flags); + if (k == 2 || !--i) + break; + MDELAY(5); + } + if (!i) + printk("%s: failed to kill timer!\n", sym_name(np)); + } + np->s.release_stage = 2; +#else + (void)del_timer_sync(&np->s.timer); +#endif + + /* + * Reset NCR chip. + * We should use sym_soft_reset(), but we donnot want to do + * so, since we may not be safe if interrupts occur. + */ + printk("%s: resetting chip\n", sym_name(np)); + OUTB (nc_istat, SRST); + UDELAY (10); + OUTB (nc_istat, 0); + + /* + * Free host resources + */ + sym_free_resources(np); + + return 1; +} + +int sym53c8xx_release(struct Scsi_Host *host) +{ + sym_detach(((struct host_data *) host->hostdata)->ncb); + + return 0; +} +#endif /* MODULE */ + +/* + * For bigots to keep silent. :) + */ +#ifdef MODULE_LICENSE +MODULE_LICENSE("Dual BSD/GPL"); +#endif + +/* + * Driver host template. + */ +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) +static +#endif +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0) || defined(MODULE) +Scsi_Host_Template driver_template = SYM53C8XX; +#include "../scsi_module.c" +#endif diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h new file mode 100644 index 000000000000..8de496992087 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_glue.h @@ -0,0 +1,676 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM_GLUE_H +#define SYM_GLUE_H + +#if 0 +#define SYM_CONF_DMA_ADDRESSING_MODE 2 +#endif + +#define LinuxVersionCode(v, p, s) (((v)<<16)+((p)<<8)+(s)) +#include <linux/version.h> +#if LINUX_VERSION_CODE < LinuxVersionCode(2, 2, 0) +#error "This driver requires a kernel version not lower than 2.2.0" +#endif + +#include <asm/dma.h> +#include <asm/io.h> +#include <asm/system.h> +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,17) +#include <linux/spinlock.h> +#else +#include <asm/spinlock.h> +#endif +#include <linux/delay.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/malloc.h> +#include <linux/mm.h> +#include <linux/ioport.h> +#include <linux/time.h> +#include <linux/timer.h> +#include <linux/stat.h> + +#include <linux/blk.h> + +#ifdef __sparc__ +# include <asm/irq.h> +#endif +#include <linux/init.h> + +#ifndef __init +#define __init +#endif +#ifndef __initdata +#define __initdata +#endif + +#include "../scsi.h" +#include "../hosts.h" +#include "../constants.h" +#include "../sd.h" + +#include <linux/types.h> + +/* + * Define BITS_PER_LONG for earlier linux versions. + */ +#ifndef BITS_PER_LONG +#if (~0UL) == 0xffffffffUL +#define BITS_PER_LONG 32 +#else +#define BITS_PER_LONG 64 +#endif +#endif + +typedef u_long vm_offset_t; + +#ifndef bcopy +#define bcopy(s, d, n) memcpy((d), (s), (n)) +#endif + +#ifndef bzero +#define bzero(d, n) memset((d), 0, (n)) +#endif + +#ifndef bcmp +#define bcmp(a, b, n) memcmp((a), (b), (n)) +#endif + +/* + * General driver includes. + */ +#include "sym53c8xx.h" +#include "sym_misc.h" +#include "sym_conf.h" +#include "sym_defs.h" + +/* + * Configuration addendum for Linux. + */ +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,47) +#define SYM_LINUX_DYNAMIC_DMA_MAPPING +#endif + +#define SYM_CONF_TIMER_INTERVAL ((HZ+1)/2) + +#define SYM_OPT_HANDLE_DIR_UNKNOWN +#define SYM_OPT_HANDLE_DEVICE_QUEUEING +#define SYM_OPT_NVRAM_PRE_READ +#define SYM_OPT_SNIFF_INQUIRY +#define SYM_OPT_LIMIT_COMMAND_REORDERING +#define SYM_OPT_ANNOUNCE_TRANSFER_RATE + +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING +#define SYM_OPT_BUS_DMA_ABSTRACTION +#endif + +/* + * Print a message with severity. + */ +#define printf_emerg(args...) printk(KERN_EMERG args) +#define printf_alert(args...) printk(KERN_ALERT args) +#define printf_crit(args...) printk(KERN_CRIT args) +#define printf_err(args...) printk(KERN_ERR args) +#define printf_warning(args...) printk(KERN_WARNING args) +#define printf_notice(args...) printk(KERN_NOTICE args) +#define printf_info(args...) printk(KERN_INFO args) +#define printf_debug(args...) printk(KERN_DEBUG args) +#define printf(args...) printk(args) + +/* + * Insert a delay in micro-seconds and milli-seconds. + */ +void sym_udelay(int us); +void sym_mdelay(int ms); + +/* + * Let the compiler know about driver data structure names. + */ +typedef struct sym_tcb *tcb_p; +typedef struct sym_lcb *lcb_p; +typedef struct sym_ccb *ccb_p; +typedef struct sym_hcb *hcb_p; +typedef struct sym_stcb *stcb_p; +typedef struct sym_slcb *slcb_p; +typedef struct sym_sccb *sccb_p; +typedef struct sym_shcb *shcb_p; + +/* + * Define a reference to the O/S dependant IO request. + */ +typedef Scsi_Cmnd *cam_ccb_p; /* Generic */ +typedef Scsi_Cmnd *cam_scsiio_p;/* SCSI I/O */ + + +/* + * IO functions definition for big/little endian CPU support. + * For now, PCI chips are only supported in little endian addressing mode, + */ + +#ifdef __BIG_ENDIAN + +#define inw_l2b inw +#define inl_l2b inl +#define outw_b2l outw +#define outl_b2l outl +#define readw_l2b readw +#define readl_l2b readl +#define writew_b2l writew +#define writel_b2l writel + +#else /* little endian */ + +#if defined(__i386__) /* i386 implements full FLAT memory/MMIO model */ +#define inw_raw inw +#define inl_raw inl +#define outw_raw outw +#define outl_raw outl +#define readb_raw(a) (*(volatile unsigned char *) (a)) +#define readw_raw(a) (*(volatile unsigned short *) (a)) +#define readl_raw(a) (*(volatile unsigned int *) (a)) +#define writeb_raw(b,a) ((*(volatile unsigned char *) (a)) = (b)) +#define writew_raw(b,a) ((*(volatile unsigned short *) (a)) = (b)) +#define writel_raw(b,a) ((*(volatile unsigned int *) (a)) = (b)) + +#else /* Other little-endian */ +#define inw_raw inw +#define inl_raw inl +#define outw_raw outw +#define outl_raw outl +#define readw_raw readw +#define readl_raw readl +#define writew_raw writew +#define writel_raw writel + +#endif +#endif + +#ifdef SYM_CONF_CHIP_BIG_ENDIAN +#error "Chips in BIG ENDIAN addressing mode are not (yet) supported" +#endif + + +/* + * If the chip uses big endian addressing mode over the + * PCI, actual io register addresses for byte and word + * accesses must be changed according to lane routing. + * Btw, sym_offb() and sym_offw() macros only apply to + * constants and so donnot generate bloated code. + */ + +#if defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define sym_offb(o) (((o)&~3)+((~((o)&3))&3)) +#define sym_offw(o) (((o)&~3)+((~((o)&3))&2)) + +#else + +#define sym_offb(o) (o) +#define sym_offw(o) (o) + +#endif + +/* + * If the CPU and the chip use same endian-ness adressing, + * no byte reordering is needed for script patching. + * Macro cpu_to_scr() is to be used for script patching. + * Macro scr_to_cpu() is to be used for getting a DWORD + * from the script. + */ + +#if defined(__BIG_ENDIAN) && !defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define cpu_to_scr(dw) cpu_to_le32(dw) +#define scr_to_cpu(dw) le32_to_cpu(dw) + +#elif defined(__LITTLE_ENDIAN) && defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define cpu_to_scr(dw) cpu_to_be32(dw) +#define scr_to_cpu(dw) be32_to_cpu(dw) + +#else + +#define cpu_to_scr(dw) (dw) +#define scr_to_cpu(dw) (dw) + +#endif + +/* + * Access to the controller chip. + * + * If SYM_CONF_IOMAPPED is defined, the driver will use + * normal IOs instead of the MEMORY MAPPED IO method + * recommended by PCI specifications. + * If all PCI bridges, host brigdes and architectures + * would have been correctly designed for PCI, this + * option would be useless. + * + * If the CPU and the chip use same endian-ness adressing, + * no byte reordering is needed for accessing chip io + * registers. Functions suffixed by '_raw' are assumed + * to access the chip over the PCI without doing byte + * reordering. Functions suffixed by '_l2b' are + * assumed to perform little-endian to big-endian byte + * reordering, those suffixed by '_b2l' blah, blah, + * blah, ... + */ + +#if defined(SYM_CONF_IOMAPPED) + +/* + * IO mapped only input / ouput + */ + +#define INB_OFF(o) inb (np->s.io_port + sym_offb(o)) +#define OUTB_OFF(o, val) outb ((val), np->s.io_port + sym_offb(o)) + +#if defined(__BIG_ENDIAN) && !defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define INW_OFF(o) inw_l2b (np->s.io_port + sym_offw(o)) +#define INL_OFF(o) inl_l2b (np->s.io_port + (o)) + +#define OUTW_OFF(o, val) outw_b2l ((val), np->s.io_port + sym_offw(o)) +#define OUTL_OFF(o, val) outl_b2l ((val), np->s.io_port + (o)) + +#elif defined(__LITTLE_ENDIAN) && defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define INW_OFF(o) inw_b2l (np->s.io_port + sym_offw(o)) +#define INL_OFF(o) inl_b2l (np->s.io_port + (o)) + +#define OUTW_OFF(o, val) outw_l2b ((val), np->s.io_port + sym_offw(o)) +#define OUTL_OFF(o, val) outl_l2b ((val), np->s.io_port + (o)) + +#else + +#define INW_OFF(o) inw_raw (np->s.io_port + sym_offw(o)) +#define INL_OFF(o) inl_raw (np->s.io_port + (o)) + +#define OUTW_OFF(o, val) outw_raw ((val), np->s.io_port + sym_offw(o)) +#define OUTL_OFF(o, val) outl_raw ((val), np->s.io_port + (o)) + +#endif /* ENDIANs */ + +#else /* defined SYM_CONF_IOMAPPED */ + +/* + * MEMORY mapped IO input / output + */ + +#define INB_OFF(o) readb((char *)np->s.mmio_va + sym_offb(o)) +#define OUTB_OFF(o, val) writeb((val), (char *)np->s.mmio_va + sym_offb(o)) + +#if defined(__BIG_ENDIAN) && !defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define INW_OFF(o) readw_l2b((char *)np->s.mmio_va + sym_offw(o)) +#define INL_OFF(o) readl_l2b((char *)np->s.mmio_va + (o)) + +#define OUTW_OFF(o, val) writew_b2l((val), (char *)np->s.mmio_va + sym_offw(o)) +#define OUTL_OFF(o, val) writel_b2l((val), (char *)np->s.mmio_va + (o)) + +#elif defined(__LITTLE_ENDIAN) && defined(SYM_CONF_CHIP_BIG_ENDIAN) + +#define INW_OFF(o) readw_b2l((char *)np->s.mmio_va + sym_offw(o)) +#define INL_OFF(o) readl_b2l((char *)np->s.mmio_va + (o)) + +#define OUTW_OFF(o, val) writew_l2b((val), (char *)np->s.mmio_va + sym_offw(o)) +#define OUTL_OFF(o, val) writel_l2b((val), (char *)np->s.mmio_va + (o)) + +#else + +#define INW_OFF(o) readw_raw((char *)np->s.mmio_va + sym_offw(o)) +#define INL_OFF(o) readl_raw((char *)np->s.mmio_va + (o)) + +#define OUTW_OFF(o, val) writew_raw((val), (char *)np->s.mmio_va + sym_offw(o)) +#define OUTL_OFF(o, val) writel_raw((val), (char *)np->s.mmio_va + (o)) + +#endif + +#endif /* defined SYM_CONF_IOMAPPED */ + +#define OUTRAM_OFF(o, a, l) memcpy_toio(np->s.ram_va + (o), (a), (l)) + +/* + * Remap some status field values. + */ +#define CAM_REQ_CMP DID_OK +#define CAM_SEL_TIMEOUT DID_NO_CONNECT +#define CAM_CMD_TIMEOUT DID_TIME_OUT +#define CAM_REQ_ABORTED DID_ABORT +#define CAM_UNCOR_PARITY DID_PARITY +#define CAM_SCSI_BUS_RESET DID_RESET +#define CAM_REQUEUE_REQ DID_SOFT_ERROR +#define CAM_UNEXP_BUSFREE DID_ERROR +#define CAM_SCSI_BUSY DID_BUS_BUSY + +#define CAM_DEV_NOT_THERE DID_NO_CONNECT +#define CAM_REQ_INVALID DID_ERROR +#define CAM_REQ_TOO_BIG DID_ERROR + +#define CAM_RESRC_UNAVAIL DID_ERROR + +/* + * Remap SCSI data direction values. + */ +#ifndef SCSI_DATA_UNKNOWN +#define SCSI_DATA_UNKNOWN 0 +#define SCSI_DATA_WRITE 1 +#define SCSI_DATA_READ 2 +#define SCSI_DATA_NONE 3 +#endif +#define CAM_DIR_NONE SCSI_DATA_NONE +#define CAM_DIR_IN SCSI_DATA_READ +#define CAM_DIR_OUT SCSI_DATA_WRITE +#define CAM_DIR_UNKNOWN SCSI_DATA_UNKNOWN + +/* + * These ones are used as return code from + * error recovery handlers under Linux. + */ +#define SCSI_SUCCESS SUCCESS +#define SCSI_FAILED FAILED + +/* + * System specific target data structure. + * None for now, under Linux. + */ +/* #define SYM_HAVE_STCB */ + +/* + * System specific lun data structure. + */ +#define SYM_HAVE_SLCB +struct sym_slcb { + u_short reqtags; /* Number of tags requested by user */ + u_short scdev_depth; /* Queue depth set in select_queue_depth() */ +}; + +/* + * System specific command data structure. + * Not needed under Linux. + */ +/* struct sym_sccb */ + +/* + * System specific host data structure. + */ +struct sym_shcb { + /* + * Chip and controller indentification. + */ + int unit; + char inst_name[16]; + char chip_name[8]; + struct pci_dev *device; + + u_char bus; /* PCI BUS number */ + u_char device_fn; /* PCI BUS device and function */ + + spinlock_t smp_lock; /* Lock for SMP threading */ + + vm_offset_t mmio_va; /* MMIO kernel virtual address */ + vm_offset_t ram_va; /* RAM kernel virtual address */ + u32 io_port; /* IO port address */ + u_short io_ws; /* IO window size */ + int irq; /* IRQ number */ + + SYM_QUEHEAD wait_cmdq; /* Awaiting SCSI commands */ + SYM_QUEHEAD busy_cmdq; /* Enqueued SCSI commands */ + + struct timer_list timer; /* Timer handler link header */ + u_long lasttime; + u_long settle_time; /* Resetting the SCSI BUS */ + u_char settle_time_valid; +#if LINUX_VERSION_CODE < LinuxVersionCode(2, 4, 0) + u_char release_stage; /* Synchronisation on release */ +#endif +}; + +/* + * Return the name of the controller. + */ +#define sym_name(np) (np)->s.inst_name + +/* + * Data structure used as input for the NVRAM reading. + * Must resolve the IO macros and sym_name(), when + * used as sub-field 's' of another structure. + */ +typedef struct { + int bus; + u_char device_fn; + u_long base; + u_long base_2; + u_long base_c; + u_long base_2_c; + int irq; +/* port and address fields to fit INB, OUTB macros */ + u_long io_port; + vm_offset_t mmio_va; + char inst_name[16]; +} sym_slot; + +typedef struct sym_nvram sym_nvram; +typedef struct sym_pci_chip sym_chip; + +typedef struct { + struct pci_dev *pdev; + sym_slot s; + sym_chip chip; + sym_nvram *nvram; + u_short device_id; + u_char host_id; +#ifdef SYM_CONF_PQS_PDS_SUPPORT + u_char pqs_pds; +#endif + int attach_done; +} sym_device; + +typedef sym_device *sdev_p; + +/* + * The driver definitions (sym_hipd.h) must know about a + * couple of things related to the memory allocator. + */ +typedef u_long m_addr_t; /* Enough bits to represent any address */ +#define SYM_MEM_PAGE_ORDER 0 /* 1 PAGE maximum */ +#define SYM_MEM_CLUSTER_SHIFT (PAGE_SHIFT+SYM_MEM_PAGE_ORDER) +#ifdef MODULE +#define SYM_MEM_FREE_UNUSED /* Free unused pages immediately */ +#endif +#ifdef SYM_LINUX_DYNAMIC_DMA_MAPPING +typedef struct pci_dev *m_pool_ident_t; +#endif + +/* + * Include driver soft definitions. + */ +#include "sym_fw.h" +#include "sym_hipd.h" + +/* + * Memory allocator related stuff. + */ + +#define SYM_MEM_GFP_FLAGS GFP_ATOMIC +#define SYM_MEM_WARN 1 /* Warn on failed operations */ + +#define sym_get_mem_cluster() \ + __get_free_pages(SYM_MEM_GFP_FLAGS, SYM_MEM_PAGE_ORDER) +#define sym_free_mem_cluster(p) \ + free_pages(p, SYM_MEM_PAGE_ORDER) + +void *sym_calloc(int size, char *name); +void sym_mfree(void *m, int size, char *name); + +#ifndef SYM_LINUX_DYNAMIC_DMA_MAPPING +/* + * Simple case. + * All the memory assummed DMAable and O/S providing virtual + * to bus physical address translation. + */ +#define __sym_calloc_dma(pool_id, size, name) sym_calloc(size, name) +#define __sym_mfree_dma(pool_id, m, size, name) sym_mfree(m, size, name) +#define __vtobus(b, p) virt_to_bus(p) + +#else /* SYM_LINUX_DYNAMIC_DMA_MAPPING */ +/* + * Complex case. + * We have to provide the driver memory allocator with methods for + * it to maintain virtual to bus physical address translations. + */ + +#define sym_m_pool_match(mp_id1, mp_id2) (mp_id1 == mp_id2) + +static __inline m_addr_t sym_m_get_dma_mem_cluster(m_pool_p mp, m_vtob_p vbp) +{ + void *vaddr = 0; + dma_addr_t baddr = 0; + + vaddr = pci_alloc_consistent(mp->dev_dmat,SYM_MEM_CLUSTER_SIZE, &baddr); + if (vaddr) { + vbp->vaddr = (m_addr_t) vaddr; + vbp->baddr = (m_addr_t) baddr; + } + return (m_addr_t) vaddr; +} + +static __inline void sym_m_free_dma_mem_cluster(m_pool_p mp, m_vtob_p vbp) +{ + pci_free_consistent(mp->dev_dmat, SYM_MEM_CLUSTER_SIZE, + (void *)vbp->vaddr, (dma_addr_t)vbp->baddr); +} + +#define sym_m_create_dma_mem_tag(mp) (0) + +#define sym_m_delete_dma_mem_tag(mp) do { ; } while (0) + +void *__sym_calloc_dma(m_pool_ident_t dev_dmat, int size, char *name); +void __sym_mfree_dma(m_pool_ident_t dev_dmat, void *m, int size, char *name); +m_addr_t __vtobus(m_pool_ident_t dev_dmat, void *m); + +#endif /* SYM_LINUX_DYNAMIC_DMA_MAPPING */ + +/* + * Set the status field of a CAM CCB. + */ +static __inline void +sym_set_cam_status(Scsi_Cmnd *ccb, int status) +{ + ccb->result &= ~(0xff << 16); + ccb->result |= (status << 16); +} + +/* + * Get the status field of a CAM CCB. + */ +static __inline int +sym_get_cam_status(Scsi_Cmnd *ccb) +{ + return ((ccb->result >> 16) & 0xff); +} + +/* + * The dma mapping is mostly handled by the + * SCSI layer and the driver glue under Linux. + */ +#define sym_data_dmamap_create(np, cp) (0) +#define sym_data_dmamap_destroy(np, cp) do { ; } while (0) +#define sym_data_dmamap_unload(np, cp) do { ; } while (0) +#define sym_data_dmamap_presync(np, cp) do { ; } while (0) +#define sym_data_dmamap_postsync(np, cp) do { ; } while (0) + +/* + * Async handler for negotiations. + */ +void sym_xpt_async_nego_wide(hcb_p np, int target); +#define sym_xpt_async_nego_sync(np, target) \ + sym_announce_transfer_rate(np, target) +#define sym_xpt_async_nego_ppr(np, target) \ + sym_announce_transfer_rate(np, target) + +/* + * Build CAM result for a successful IO and for a failed IO. + */ +static __inline void sym_set_cam_result_ok(hcb_p np, ccb_p cp, int resid) +{ + Scsi_Cmnd *cmd = cp->cam_ccb; + +#if LINUX_VERSION_CODE >= LinuxVersionCode(2,3,99) + cmd->resid = resid; +#endif + cmd->result = (((DID_OK) << 16) + ((cp->ssss_status) & 0x7f)); +} +void sym_set_cam_result_error(hcb_p np, ccb_p cp, int resid); + +/* + * Other O/S specific methods. + */ +#define sym_cam_target_id(ccb) (ccb)->target +#define sym_cam_target_lun(ccb) (ccb)->lun +#define sym_freeze_cam_ccb(ccb) do { ; } while (0) +void sym_xpt_done(hcb_p np, cam_ccb_p ccb); +void sym_xpt_done2(hcb_p np, cam_ccb_p ccb, int cam_status); +void sym_print_addr (ccb_p cp); +void sym_xpt_async_bus_reset(hcb_p np); +void sym_xpt_async_sent_bdr(hcb_p np, int target); +int sym_setup_data_and_start (hcb_p np, cam_scsiio_p csio, ccb_p cp); +void sym_log_bus_error(hcb_p np); +#ifdef SYM_OPT_SNIFF_INQUIRY +void sym_sniff_inquiry(hcb_p np, Scsi_Cmnd *cmd, int resid); +#endif + +#endif /* SYM_GLUE_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c new file mode 100644 index 000000000000..2464f4c27cbc --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -0,0 +1,6007 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define SYM_DRIVER_NAME "sym-2.1.16a" + +#ifdef __FreeBSD__ +#include <dev/sym/sym_glue.h> +#else +#include "sym_glue.h" +#endif + +#if 0 +#define SYM_DEBUG_GENERIC_SUPPORT +#endif + +/* + * Needed function prototypes. + */ +static void sym_int_ma (hcb_p np); +static void sym_int_sir (hcb_p np); +static ccb_p sym_alloc_ccb(hcb_p np); +static ccb_p sym_ccb_from_dsa(hcb_p np, u32 dsa); +static void sym_alloc_lcb_tags (hcb_p np, u_char tn, u_char ln); +static void sym_complete_error (hcb_p np, ccb_p cp); +static void sym_complete_ok (hcb_p np, ccb_p cp); +static int sym_compute_residual(hcb_p np, ccb_p cp); + +/* + * Returns the name of this driver. + */ +char *sym_driver_name(void) +{ + return SYM_DRIVER_NAME; +} +/* + * Print a buffer in hexadecimal format. + */ +static void sym_printb_hex (u_char *p, int n) +{ + while (n-- > 0) + printf (" %x", *p++); +} + +/* + * Same with a label at beginning and .\n at end. + */ +static void sym_printl_hex (char *label, u_char *p, int n) +{ + printf ("%s", label); + sym_printb_hex (p, n); + printf (".\n"); +} + +/* + * Print something which allows to retrieve the controler type, + * unit, target, lun concerned by a kernel message. + */ +static void sym_print_target (hcb_p np, int target) +{ + printf ("%s:%d:", sym_name(np), target); +} + +static void sym_print_lun(hcb_p np, int target, int lun) +{ + printf ("%s:%d:%d:", sym_name(np), target, lun); +} + +/* + * Print out the content of a SCSI message. + */ +static int sym_show_msg (u_char * msg) +{ + u_char i; + printf ("%x",*msg); + if (*msg==M_EXTENDED) { + for (i=1;i<8;i++) { + if (i-1>msg[1]) break; + printf ("-%x",msg[i]); + }; + return (i+1); + } else if ((*msg & 0xf0) == 0x20) { + printf ("-%x",msg[1]); + return (2); + }; + return (1); +} + +static void sym_print_msg (ccb_p cp, char *label, u_char *msg) +{ + PRINT_ADDR(cp); + if (label) + printf ("%s: ", label); + + (void) sym_show_msg (msg); + printf (".\n"); +} + +static void sym_print_nego_msg (hcb_p np, int target, char *label, u_char *msg) +{ + PRINT_TARGET(np, target); + if (label) + printf ("%s: ", label); + + (void) sym_show_msg (msg); + printf (".\n"); +} + +/* + * Print something that tells about extended errors. + */ +void sym_print_xerr(ccb_p cp, int x_status) +{ + if (x_status & XE_PARITY_ERR) { + PRINT_ADDR(cp); + printf ("unrecovered SCSI parity error.\n"); + } + if (x_status & XE_EXTRA_DATA) { + PRINT_ADDR(cp); + printf ("extraneous data discarded.\n"); + } + if (x_status & XE_BAD_PHASE) { + PRINT_ADDR(cp); + printf ("illegal scsi phase (4/5).\n"); + } + if (x_status & XE_SODL_UNRUN) { + PRINT_ADDR(cp); + printf ("ODD transfer in DATA OUT phase.\n"); + } + if (x_status & XE_SWIDE_OVRUN) { + PRINT_ADDR(cp); + printf ("ODD transfer in DATA IN phase.\n"); + } +} + +/* + * Return a string for SCSI BUS mode. + */ +static char *sym_scsi_bus_mode(int mode) +{ + switch(mode) { + case SMODE_HVD: return "HVD"; + case SMODE_SE: return "SE"; + case SMODE_LVD: return "LVD"; + } + return "??"; +} + +/* + * Soft reset the chip. + * + * Raising SRST when the chip is running may cause + * problems on dual function chips (see below). + * On the other hand, LVD devices need some delay + * to settle and report actual BUS mode in STEST4. + */ +static void sym_chip_reset (hcb_p np) +{ + OUTB (nc_istat, SRST); + UDELAY (10); + OUTB (nc_istat, 0); + UDELAY(2000); /* For BUS MODE to settle */ +} + +/* + * Really soft reset the chip.:) + * + * Some 896 and 876 chip revisions may hang-up if we set + * the SRST (soft reset) bit at the wrong time when SCRIPTS + * are running. + * So, we need to abort the current operation prior to + * soft resetting the chip. + */ +static void sym_soft_reset (hcb_p np) +{ + u_char istat; + int i; + + if (!(np->features & FE_ISTAT1) || !(INB (nc_istat1) & SCRUN)) + goto do_chip_reset; + + OUTB (nc_istat, CABRT); + for (i = 100000 ; i ; --i) { + istat = INB (nc_istat); + if (istat & SIP) { + INW (nc_sist); + } + else if (istat & DIP) { + if (INB (nc_dstat) & ABRT); + break; + } + UDELAY(5); + } + OUTB (nc_istat, 0); + if (!i) + printf("%s: unable to abort current chip operation, " + "ISTAT=0x%02x.\n", sym_name(np), istat); +do_chip_reset: + sym_chip_reset (np); +} + +/* + * Start reset process. + * + * The interrupt handler will reinitialize the chip. + */ +static void sym_start_reset(hcb_p np) +{ + (void) sym_reset_scsi_bus(np, 1); +} + +int sym_reset_scsi_bus(hcb_p np, int enab_int) +{ + u32 term; + int retv = 0; + + sym_soft_reset(np); /* Soft reset the chip */ + if (enab_int) + OUTW (nc_sien, RST); + /* + * Enable Tolerant, reset IRQD if present and + * properly set IRQ mode, prior to resetting the bus. + */ + OUTB (nc_stest3, TE); + OUTB (nc_dcntl, (np->rv_dcntl & IRQM)); + OUTB (nc_scntl1, CRST); + UDELAY (200); + + if (!SYM_SETUP_SCSI_BUS_CHECK) + goto out; + /* + * Check for no terminators or SCSI bus shorts to ground. + * Read SCSI data bus, data parity bits and control signals. + * We are expecting RESET to be TRUE and other signals to be + * FALSE. + */ + term = INB(nc_sstat0); + term = ((term & 2) << 7) + ((term & 1) << 17); /* rst sdp0 */ + term |= ((INB(nc_sstat2) & 0x01) << 26) | /* sdp1 */ + ((INW(nc_sbdl) & 0xff) << 9) | /* d7-0 */ + ((INW(nc_sbdl) & 0xff00) << 10) | /* d15-8 */ + INB(nc_sbcl); /* req ack bsy sel atn msg cd io */ + + if (!(np->features & FE_WIDE)) + term &= 0x3ffff; + + if (term != (2<<7)) { + printf("%s: suspicious SCSI data while resetting the BUS.\n", + sym_name(np)); + printf("%s: %sdp0,d7-0,rst,req,ack,bsy,sel,atn,msg,c/d,i/o = " + "0x%lx, expecting 0x%lx\n", + sym_name(np), + (np->features & FE_WIDE) ? "dp1,d15-8," : "", + (u_long)term, (u_long)(2<<7)); + if (SYM_SETUP_SCSI_BUS_CHECK == 1) + retv = 1; + } +out: + OUTB (nc_scntl1, 0); + /* MDELAY(100); */ + return retv; +} + +/* + * Select SCSI clock frequency + */ +static void sym_selectclock(hcb_p np, u_char scntl3) +{ + /* + * If multiplier not present or not selected, leave here. + */ + if (np->multiplier <= 1) { + OUTB(nc_scntl3, scntl3); + return; + } + + if (sym_verbose >= 2) + printf ("%s: enabling clock multiplier\n", sym_name(np)); + + OUTB(nc_stest1, DBLEN); /* Enable clock multiplier */ + /* + * Wait for the LCKFRQ bit to be set if supported by the chip. + * Otherwise wait 50 micro-seconds (at least). + */ + if (np->features & FE_LCKFRQ) { + int i = 20; + while (!(INB(nc_stest4) & LCKFRQ) && --i > 0) + UDELAY (20); + if (!i) + printf("%s: the chip cannot lock the frequency\n", + sym_name(np)); + } else + UDELAY ((50+10)); + OUTB(nc_stest3, HSC); /* Halt the scsi clock */ + OUTB(nc_scntl3, scntl3); + OUTB(nc_stest1, (DBLEN|DBLSEL));/* Select clock multiplier */ + OUTB(nc_stest3, 0x00); /* Restart scsi clock */ +} + + +/* + * Determine the chip's clock frequency. + * + * This is essential for the negotiation of the synchronous + * transfer rate. + * + * Note: we have to return the correct value. + * THERE IS NO SAFE DEFAULT VALUE. + * + * Most NCR/SYMBIOS boards are delivered with a 40 Mhz clock. + * 53C860 and 53C875 rev. 1 support fast20 transfers but + * do not have a clock doubler and so are provided with a + * 80 MHz clock. All other fast20 boards incorporate a doubler + * and so should be delivered with a 40 MHz clock. + * The recent fast40 chips (895/896/895A/1010) use a 40 Mhz base + * clock and provide a clock quadrupler (160 Mhz). + */ + +/* + * calculate SCSI clock frequency (in KHz) + */ +static unsigned getfreq (hcb_p np, int gen) +{ + unsigned int ms = 0; + unsigned int f; + + /* + * Measure GEN timer delay in order + * to calculate SCSI clock frequency + * + * This code will never execute too + * many loop iterations (if DELAY is + * reasonably correct). It could get + * too low a delay (too high a freq.) + * if the CPU is slow executing the + * loop for some reason (an NMI, for + * example). For this reason we will + * if multiple measurements are to be + * performed trust the higher delay + * (lower frequency returned). + */ + OUTW (nc_sien , 0); /* mask all scsi interrupts */ + (void) INW (nc_sist); /* clear pending scsi interrupt */ + OUTB (nc_dien , 0); /* mask all dma interrupts */ + (void) INW (nc_sist); /* another one, just to be sure :) */ + /* + * The C1010-33 core does not report GEN in SIST, + * if this interrupt is masked in SIEN. + * I don't know yet if the C1010-66 behaves the same way. + */ + if (np->features & FE_C10) { + OUTW (nc_sien, GEN); + OUTB (nc_istat1, SIRQD); + } + OUTB (nc_scntl3, 4); /* set pre-scaler to divide by 3 */ + OUTB (nc_stime1, 0); /* disable general purpose timer */ + OUTB (nc_stime1, gen); /* set to nominal delay of 1<<gen * 125us */ + while (!(INW(nc_sist) & GEN) && ms++ < 100000) + UDELAY (1000/4);/* count in 1/4 of ms */ + OUTB (nc_stime1, 0); /* disable general purpose timer */ + /* + * Undo C1010-33 specific settings. + */ + if (np->features & FE_C10) { + OUTW (nc_sien, 0); + OUTB (nc_istat1, 0); + } + /* + * set prescaler to divide by whatever 0 means + * 0 ought to choose divide by 2, but appears + * to set divide by 3.5 mode in my 53c810 ... + */ + OUTB (nc_scntl3, 0); + + /* + * adjust for prescaler, and convert into KHz + */ + f = ms ? ((1 << gen) * (4340*4)) / ms : 0; + + /* + * The C1010-33 result is biased by a factor + * of 2/3 compared to earlier chips. + */ + if (np->features & FE_C10) + f = (f * 2) / 3; + + if (sym_verbose >= 2) + printf ("%s: Delay (GEN=%d): %u msec, %u KHz\n", + sym_name(np), gen, ms/4, f); + + return f; +} + +static unsigned sym_getfreq (hcb_p np) +{ + u_int f1, f2; + int gen = 8; + + (void) getfreq (np, gen); /* throw away first result */ + f1 = getfreq (np, gen); + f2 = getfreq (np, gen); + if (f1 > f2) f1 = f2; /* trust lower result */ + return f1; +} + +/* + * Get/probe chip SCSI clock frequency + */ +static void sym_getclock (hcb_p np, int mult) +{ + unsigned char scntl3 = np->sv_scntl3; + unsigned char stest1 = np->sv_stest1; + unsigned f1; + + np->multiplier = 1; + f1 = 40000; + /* + * True with 875/895/896/895A with clock multiplier selected + */ + if (mult > 1 && (stest1 & (DBLEN+DBLSEL)) == DBLEN+DBLSEL) { + if (sym_verbose >= 2) + printf ("%s: clock multiplier found\n", sym_name(np)); + np->multiplier = mult; + } + + /* + * If multiplier not found or scntl3 not 7,5,3, + * reset chip and get frequency from general purpose timer. + * Otherwise trust scntl3 BIOS setting. + */ + if (np->multiplier != mult || (scntl3 & 7) < 3 || !(scntl3 & 1)) { + OUTB (nc_stest1, 0); /* make sure doubler is OFF */ + f1 = sym_getfreq (np); + + if (sym_verbose) + printf ("%s: chip clock is %uKHz\n", sym_name(np), f1); + + if (f1 < 45000) f1 = 40000; + else if (f1 < 55000) f1 = 50000; + else f1 = 80000; + + if (f1 < 80000 && mult > 1) { + if (sym_verbose >= 2) + printf ("%s: clock multiplier assumed\n", + sym_name(np)); + np->multiplier = mult; + } + } else { + if ((scntl3 & 7) == 3) f1 = 40000; + else if ((scntl3 & 7) == 5) f1 = 80000; + else f1 = 160000; + + f1 /= np->multiplier; + } + + /* + * Compute controller synchronous parameters. + */ + f1 *= np->multiplier; + np->clock_khz = f1; +} + +/* + * Get/probe PCI clock frequency + */ +static int sym_getpciclock (hcb_p np) +{ + int f = 0; + + /* + * For now, we only need to know about the actual + * PCI BUS clock frequency for C1010-66 chips. + */ +#if 1 + if (np->features & FE_66MHZ) { +#else + if (1) { +#endif + OUTB (nc_stest1, SCLK); /* Use the PCI clock as SCSI clock */ + f = (int) sym_getfreq (np); + OUTB (nc_stest1, 0); + } + np->pciclk_khz = f; + + return f; +} + +/* + * SYMBIOS chip clock divisor table. + * + * Divisors are multiplied by 10,000,000 in order to make + * calculations more simple. + */ +#define _5M 5000000 +static u32 div_10M[] = {2*_5M, 3*_5M, 4*_5M, 6*_5M, 8*_5M, 12*_5M, 16*_5M}; + +/* + * Get clock factor and sync divisor for a given + * synchronous factor period. + */ +static int +sym_getsync(hcb_p np, u_char dt, u_char sfac, u_char *divp, u_char *fakp) +{ + u32 clk = np->clock_khz; /* SCSI clock frequency in kHz */ + int div = np->clock_divn; /* Number of divisors supported */ + u32 fak; /* Sync factor in sxfer */ + u32 per; /* Period in tenths of ns */ + u32 kpc; /* (per * clk) */ + int ret; + + /* + * Compute the synchronous period in tenths of nano-seconds + */ + if (dt && sfac <= 9) per = 125; + else if (sfac <= 10) per = 250; + else if (sfac == 11) per = 303; + else if (sfac == 12) per = 500; + else per = 40 * sfac; + ret = per; + + kpc = per * clk; + if (dt) + kpc <<= 1; + + /* + * For earliest C10 revision 0, we cannot use extra + * clocks for the setting of the SCSI clocking. + * Note that this limits the lowest sync data transfer + * to 5 Mega-transfers per second and may result in + * using higher clock divisors. + */ +#if 1 + if ((np->features & (FE_C10|FE_U3EN)) == FE_C10) { + /* + * Look for the lowest clock divisor that allows an + * output speed not faster than the period. + */ + while (div > 0) { + --div; + if (kpc > (div_10M[div] << 2)) { + ++div; + break; + } + } + fak = 0; /* No extra clocks */ + if (div == np->clock_divn) { /* Are we too fast ? */ + ret = -1; + } + *divp = div; + *fakp = fak; + return ret; + } +#endif + + /* + * Look for the greatest clock divisor that allows an + * input speed faster than the period. + */ + while (div-- > 0) + if (kpc >= (div_10M[div] << 2)) break; + + /* + * Calculate the lowest clock factor that allows an output + * speed not faster than the period, and the max output speed. + * If fak >= 1 we will set both XCLKH_ST and XCLKH_DT. + * If fak >= 2 we will also set XCLKS_ST and XCLKS_DT. + */ + if (dt) { + fak = (kpc - 1) / (div_10M[div] << 1) + 1 - 2; + /* ret = ((2+fak)*div_10M[div])/np->clock_khz; */ + } + else { + fak = (kpc - 1) / div_10M[div] + 1 - 4; + /* ret = ((4+fak)*div_10M[div])/np->clock_khz; */ + } + + /* + * Check against our hardware limits, or bugs :). + */ + if (fak < 0) {fak = 0; ret = -1;} + if (fak > 2) {fak = 2; ret = -1;} + + /* + * Compute and return sync parameters. + */ + *divp = div; + *fakp = fak; + + return ret; +} + +/* + * SYMBIOS chips allow burst lengths of 2, 4, 8, 16, 32, 64, + * 128 transfers. All chips support at least 16 transfers + * bursts. The 825A, 875 and 895 chips support bursts of up + * to 128 transfers and the 895A and 896 support bursts of up + * to 64 transfers. All other chips support up to 16 + * transfers bursts. + * + * For PCI 32 bit data transfers each transfer is a DWORD. + * It is a QUADWORD (8 bytes) for PCI 64 bit data transfers. + * + * We use log base 2 (burst length) as internal code, with + * value 0 meaning "burst disabled". + */ + +/* + * Burst length from burst code. + */ +#define burst_length(bc) (!(bc))? 0 : 1 << (bc) + +/* + * Burst code from io register bits. + */ +#define burst_code(dmode, ctest4, ctest5) \ + (ctest4) & 0x80? 0 : (((dmode) & 0xc0) >> 6) + ((ctest5) & 0x04) + 1 + +/* + * Set initial io register bits from burst code. + */ +static __inline void sym_init_burst(hcb_p np, u_char bc) +{ + np->rv_ctest4 &= ~0x80; + np->rv_dmode &= ~(0x3 << 6); + np->rv_ctest5 &= ~0x4; + + if (!bc) { + np->rv_ctest4 |= 0x80; + } + else { + --bc; + np->rv_dmode |= ((bc & 0x3) << 6); + np->rv_ctest5 |= (bc & 0x4); + } +} + + +/* + * Print out the list of targets that have some flag disabled by user. + */ +static void sym_print_targets_flag(hcb_p np, int mask, char *msg) +{ + int cnt; + int i; + + for (cnt = 0, i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) { + if (i == np->myaddr) + continue; + if (np->target[i].usrflags & mask) { + if (!cnt++) + printf("%s: %s disabled for targets", + sym_name(np), msg); + printf(" %d", i); + } + } + if (cnt) + printf(".\n"); +} + +/* + * Save initial settings of some IO registers. + * Assumed to have been set by BIOS. + * We cannot reset the chip prior to reading the + * IO registers, since informations will be lost. + * Since the SCRIPTS processor may be running, this + * is not safe on paper, but it seems to work quite + * well. :) + */ +static void sym_save_initial_setting (hcb_p np) +{ + np->sv_scntl0 = INB(nc_scntl0) & 0x0a; + np->sv_scntl3 = INB(nc_scntl3) & 0x07; + np->sv_dmode = INB(nc_dmode) & 0xce; + np->sv_dcntl = INB(nc_dcntl) & 0xa8; + np->sv_ctest3 = INB(nc_ctest3) & 0x01; + np->sv_ctest4 = INB(nc_ctest4) & 0x80; + np->sv_gpcntl = INB(nc_gpcntl); + np->sv_stest1 = INB(nc_stest1); + np->sv_stest2 = INB(nc_stest2) & 0x20; + np->sv_stest4 = INB(nc_stest4); + if (np->features & FE_C10) { /* Always large DMA fifo + ultra3 */ + np->sv_scntl4 = INB(nc_scntl4); + np->sv_ctest5 = INB(nc_ctest5) & 0x04; + } + else + np->sv_ctest5 = INB(nc_ctest5) & 0x24; +} + +/* + * Prepare io register values used by sym_start_up() + * according to selected and supported features. + */ +static int sym_prepare_setting(hcb_p np, struct sym_nvram *nvram) +{ + u_char burst_max; + u32 period; + int i; + + /* + * Wide ? + */ + np->maxwide = (np->features & FE_WIDE)? 1 : 0; + + /* + * Guess the frequency of the chip's clock. + */ + if (np->features & (FE_ULTRA3 | FE_ULTRA2)) + np->clock_khz = 160000; + else if (np->features & FE_ULTRA) + np->clock_khz = 80000; + else + np->clock_khz = 40000; + + /* + * Get the clock multiplier factor. + */ + if (np->features & FE_QUAD) + np->multiplier = 4; + else if (np->features & FE_DBLR) + np->multiplier = 2; + else + np->multiplier = 1; + + /* + * Measure SCSI clock frequency for chips + * it may vary from assumed one. + */ + if (np->features & FE_VARCLK) + sym_getclock(np, np->multiplier); + + /* + * Divisor to be used for async (timer pre-scaler). + */ + i = np->clock_divn - 1; + while (--i >= 0) { + if (10ul * SYM_CONF_MIN_ASYNC * np->clock_khz > div_10M[i]) { + ++i; + break; + } + } + np->rv_scntl3 = i+1; + + /* + * The C1010 uses hardwired divisors for async. + * So, we just throw away, the async. divisor.:-) + */ + if (np->features & FE_C10) + np->rv_scntl3 = 0; + + /* + * Minimum synchronous period factor supported by the chip. + * Btw, 'period' is in tenths of nanoseconds. + */ + period = (4 * div_10M[0] + np->clock_khz - 1) / np->clock_khz; + if (period <= 250) np->minsync = 10; + else if (period <= 303) np->minsync = 11; + else if (period <= 500) np->minsync = 12; + else np->minsync = (period + 40 - 1) / 40; + + /* + * Check against chip SCSI standard support (SCSI-2,ULTRA,ULTRA2). + */ + if (np->minsync < 25 && + !(np->features & (FE_ULTRA|FE_ULTRA2|FE_ULTRA3))) + np->minsync = 25; + else if (np->minsync < 12 && + !(np->features & (FE_ULTRA2|FE_ULTRA3))) + np->minsync = 12; + + /* + * Maximum synchronous period factor supported by the chip. + */ + period = (11 * div_10M[np->clock_divn - 1]) / (4 * np->clock_khz); + np->maxsync = period > 2540 ? 254 : period / 10; + + /* + * If chip is a C1010, guess the sync limits in DT mode. + */ + if ((np->features & (FE_C10|FE_ULTRA3)) == (FE_C10|FE_ULTRA3)) { + if (np->clock_khz == 160000) { + np->minsync_dt = 9; + np->maxsync_dt = 50; + np->maxoffs_dt = nvram->type ? 62 : 31; + } + } + + /* + * 64 bit addressing (895A/896/1010) ? + */ + if (np->features & FE_DAC) { +#if SYM_CONF_DMA_ADDRESSING_MODE == 0 + np->rv_ccntl1 |= (DDAC); +#elif SYM_CONF_DMA_ADDRESSING_MODE == 1 + if (!np->use_dac) + np->rv_ccntl1 |= (DDAC); + else + np->rv_ccntl1 |= (XTIMOD | EXTIBMV); +#elif SYM_CONF_DMA_ADDRESSING_MODE == 2 + if (!np->use_dac) + np->rv_ccntl1 |= (DDAC); + else + np->rv_ccntl1 |= (0 | EXTIBMV); +#endif + } + + /* + * Phase mismatch handled by SCRIPTS (895A/896/1010) ? + */ + if (np->features & FE_NOPM) + np->rv_ccntl0 |= (ENPMJ); + + /* + * C1010-33 Errata: Part Number:609-039638 (rev. 1) is fixed. + * In dual channel mode, contention occurs if internal cycles + * are used. Disable internal cycles. + */ + if (np->device_id == PCI_ID_LSI53C1010 && + np->revision_id < 0x1) + np->rv_ccntl0 |= DILS; + + /* + * Select burst length (dwords) + */ + burst_max = SYM_SETUP_BURST_ORDER; + if (burst_max == 255) + burst_max = burst_code(np->sv_dmode, np->sv_ctest4, + np->sv_ctest5); + if (burst_max > 7) + burst_max = 7; + if (burst_max > np->maxburst) + burst_max = np->maxburst; + + /* + * DEL 352 - 53C810 Rev x11 - Part Number 609-0392140 - ITEM 2. + * This chip and the 860 Rev 1 may wrongly use PCI cache line + * based transactions on LOAD/STORE instructions. So we have + * to prevent these chips from using such PCI transactions in + * this driver. The generic ncr driver that does not use + * LOAD/STORE instructions does not need this work-around. + */ + if ((np->device_id == PCI_ID_SYM53C810 && + np->revision_id >= 0x10 && np->revision_id <= 0x11) || + (np->device_id == PCI_ID_SYM53C860 && + np->revision_id <= 0x1)) + np->features &= ~(FE_WRIE|FE_ERL|FE_ERMP); + + /* + * Select all supported special features. + * If we are using on-board RAM for scripts, prefetch (PFEN) + * does not help, but burst op fetch (BOF) does. + * Disabling PFEN makes sure BOF will be used. + */ + if (np->features & FE_ERL) + np->rv_dmode |= ERL; /* Enable Read Line */ + if (np->features & FE_BOF) + np->rv_dmode |= BOF; /* Burst Opcode Fetch */ + if (np->features & FE_ERMP) + np->rv_dmode |= ERMP; /* Enable Read Multiple */ +#if 1 + if ((np->features & FE_PFEN) && !np->ram_ba) +#else + if (np->features & FE_PFEN) +#endif + np->rv_dcntl |= PFEN; /* Prefetch Enable */ + if (np->features & FE_CLSE) + np->rv_dcntl |= CLSE; /* Cache Line Size Enable */ + if (np->features & FE_WRIE) + np->rv_ctest3 |= WRIE; /* Write and Invalidate */ + if (np->features & FE_DFS) + np->rv_ctest5 |= DFS; /* Dma Fifo Size */ + + /* + * Select some other + */ + if (SYM_SETUP_PCI_PARITY) + np->rv_ctest4 |= MPEE; /* Master parity checking */ + if (SYM_SETUP_SCSI_PARITY) + np->rv_scntl0 |= 0x0a; /* full arb., ena parity, par->ATN */ + + /* + * Get parity checking, host ID and verbose mode from NVRAM + */ + np->myaddr = 255; + sym_nvram_setup_host (np, nvram); + + /* + * Get SCSI addr of host adapter (set by bios?). + */ + if (np->myaddr == 255) { + np->myaddr = INB(nc_scid) & 0x07; + if (!np->myaddr) + np->myaddr = SYM_SETUP_HOST_ID; + } + + /* + * Prepare initial io register bits for burst length + */ + sym_init_burst(np, burst_max); + + /* + * Set SCSI BUS mode. + * - LVD capable chips (895/895A/896/1010) report the + * current BUS mode through the STEST4 IO register. + * - For previous generation chips (825/825A/875), + * user has to tell us how to check against HVD, + * since a 100% safe algorithm is not possible. + */ + np->scsi_mode = SMODE_SE; + if (np->features & (FE_ULTRA2|FE_ULTRA3)) + np->scsi_mode = (np->sv_stest4 & SMODE); + else if (np->features & FE_DIFF) { + if (SYM_SETUP_SCSI_DIFF == 1) { + if (np->sv_scntl3) { + if (np->sv_stest2 & 0x20) + np->scsi_mode = SMODE_HVD; + } + else if (nvram->type == SYM_SYMBIOS_NVRAM) { + if (!(INB(nc_gpreg) & 0x08)) + np->scsi_mode = SMODE_HVD; + } + } + else if (SYM_SETUP_SCSI_DIFF == 2) + np->scsi_mode = SMODE_HVD; + } + if (np->scsi_mode == SMODE_HVD) + np->rv_stest2 |= 0x20; + + /* + * Set LED support from SCRIPTS. + * Ignore this feature for boards known to use a + * specific GPIO wiring and for the 895A, 896 + * and 1010 that drive the LED directly. + */ + if ((SYM_SETUP_SCSI_LED || + (nvram->type == SYM_SYMBIOS_NVRAM || + (nvram->type == SYM_TEKRAM_NVRAM && + np->device_id == PCI_ID_SYM53C895))) && + !(np->features & FE_LEDC) && !(np->sv_gpcntl & 0x01)) + np->features |= FE_LED0; + + /* + * Set irq mode. + */ + switch(SYM_SETUP_IRQ_MODE & 3) { + case 2: + np->rv_dcntl |= IRQM; + break; + case 1: + np->rv_dcntl |= (np->sv_dcntl & IRQM); + break; + default: + break; + } + + /* + * Configure targets according to driver setup. + * If NVRAM present get targets setup from NVRAM. + */ + for (i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) { + tcb_p tp = &np->target[i]; + + tp->tinfo.user.scsi_version = tp->tinfo.curr.scsi_version= 2; + tp->tinfo.user.spi_version = tp->tinfo.curr.spi_version = 2; + tp->tinfo.user.period = np->minsync; + tp->tinfo.user.offset = np->maxoffs; + tp->tinfo.user.width = np->maxwide ? BUS_16_BIT : BUS_8_BIT; + tp->usrflags |= (SYM_DISC_ENABLED | SYM_TAGS_ENABLED); + tp->usrtags = SYM_SETUP_MAX_TAG; + + sym_nvram_setup_target (np, i, nvram); + + /* + * For now, guess PPR/DT support from the period + * and BUS width. + */ + if (np->features & FE_ULTRA3) { + if (tp->tinfo.user.period <= 9 && + tp->tinfo.user.width == BUS_16_BIT) { + tp->tinfo.user.options |= PPR_OPT_DT; + tp->tinfo.user.offset = np->maxoffs_dt; + tp->tinfo.user.spi_version = 3; + } + } + + if (!tp->usrtags) + tp->usrflags &= ~SYM_TAGS_ENABLED; + } + + /* + * Let user know about the settings. + */ + i = nvram->type; + printf("%s: %s NVRAM, ID %d, Fast-%d, %s, %s\n", sym_name(np), + i == SYM_SYMBIOS_NVRAM ? "Symbios" : + (i == SYM_TEKRAM_NVRAM ? "Tekram" : "No"), + np->myaddr, + (np->features & FE_ULTRA3) ? 80 : + (np->features & FE_ULTRA2) ? 40 : + (np->features & FE_ULTRA) ? 20 : 10, + sym_scsi_bus_mode(np->scsi_mode), + (np->rv_scntl0 & 0xa) ? "parity checking" : "NO parity"); + /* + * Tell him more on demand. + */ + if (sym_verbose) { + printf("%s: %s IRQ line driver%s\n", + sym_name(np), + np->rv_dcntl & IRQM ? "totem pole" : "open drain", + np->ram_ba ? ", using on-chip SRAM" : ""); + printf("%s: using %s firmware.\n", sym_name(np), np->fw_name); + if (np->features & FE_NOPM) + printf("%s: handling phase mismatch from SCRIPTS.\n", + sym_name(np)); + } + /* + * And still more. + */ + if (sym_verbose >= 2) { + printf ("%s: initial SCNTL3/DMODE/DCNTL/CTEST3/4/5 = " + "(hex) %02x/%02x/%02x/%02x/%02x/%02x\n", + sym_name(np), np->sv_scntl3, np->sv_dmode, np->sv_dcntl, + np->sv_ctest3, np->sv_ctest4, np->sv_ctest5); + + printf ("%s: final SCNTL3/DMODE/DCNTL/CTEST3/4/5 = " + "(hex) %02x/%02x/%02x/%02x/%02x/%02x\n", + sym_name(np), np->rv_scntl3, np->rv_dmode, np->rv_dcntl, + np->rv_ctest3, np->rv_ctest4, np->rv_ctest5); + } + /* + * Let user be aware of targets that have some disable flags set. + */ + sym_print_targets_flag(np, SYM_SCAN_BOOT_DISABLED, "SCAN AT BOOT"); + if (sym_verbose) + sym_print_targets_flag(np, SYM_SCAN_LUNS_DISABLED, + "SCAN FOR LUNS"); + + return 0; +} + +/* + * Test the pci bus snoop logic :-( + * + * Has to be called with interrupts disabled. + */ +#ifndef SYM_CONF_IOMAPPED +static int sym_regtest (hcb_p np) +{ + register volatile u32 data; + /* + * chip registers may NOT be cached. + * write 0xffffffff to a read only register area, + * and try to read it back. + */ + data = 0xffffffff; + OUTL_OFF(offsetof(struct sym_reg, nc_dstat), data); + data = INL_OFF(offsetof(struct sym_reg, nc_dstat)); +#if 1 + if (data == 0xffffffff) { +#else + if ((data & 0xe2f0fffd) != 0x02000080) { +#endif + printf ("CACHE TEST FAILED: reg dstat-sstat2 readback %x.\n", + (unsigned) data); + return (0x10); + }; + return (0); +} +#endif + +static int sym_snooptest (hcb_p np) +{ + u32 sym_rd, sym_wr, sym_bk, host_rd, host_wr, pc, dstat; + int i, err=0; +#ifndef SYM_CONF_IOMAPPED + err |= sym_regtest (np); + if (err) return (err); +#endif +restart_test: + /* + * Enable Master Parity Checking as we intend + * to enable it for normal operations. + */ + OUTB (nc_ctest4, (np->rv_ctest4 & MPEE)); + /* + * init + */ + pc = SCRIPTZ_BA (np, snooptest); + host_wr = 1; + sym_wr = 2; + /* + * Set memory and register. + */ + np->scratch = cpu_to_scr(host_wr); + OUTL (nc_temp, sym_wr); + /* + * Start script (exchange values) + */ + OUTL (nc_dsa, np->hcb_ba); + OUTL_DSP (pc); + /* + * Wait 'til done (with timeout) + */ + for (i=0; i<SYM_SNOOP_TIMEOUT; i++) + if (INB(nc_istat) & (INTF|SIP|DIP)) + break; + if (i>=SYM_SNOOP_TIMEOUT) { + printf ("CACHE TEST FAILED: timeout.\n"); + return (0x20); + }; + /* + * Check for fatal DMA errors. + */ + dstat = INB (nc_dstat); +#if 1 /* Band aiding for broken hardwares that fail PCI parity */ + if ((dstat & MDPE) && (np->rv_ctest4 & MPEE)) { + printf ("%s: PCI DATA PARITY ERROR DETECTED - " + "DISABLING MASTER DATA PARITY CHECKING.\n", + sym_name(np)); + np->rv_ctest4 &= ~MPEE; + goto restart_test; + } +#endif + if (dstat & (MDPE|BF|IID)) { + printf ("CACHE TEST FAILED: DMA error (dstat=0x%02x).", dstat); + return (0x80); + } + /* + * Save termination position. + */ + pc = INL (nc_dsp); + /* + * Read memory and register. + */ + host_rd = scr_to_cpu(np->scratch); + sym_rd = INL (nc_scratcha); + sym_bk = INL (nc_temp); + /* + * Check termination position. + */ + if (pc != SCRIPTZ_BA (np, snoopend)+8) { + printf ("CACHE TEST FAILED: script execution failed.\n"); + printf ("start=%08lx, pc=%08lx, end=%08lx\n", + (u_long) SCRIPTZ_BA (np, snooptest), (u_long) pc, + (u_long) SCRIPTZ_BA (np, snoopend) +8); + return (0x40); + }; + /* + * Show results. + */ + if (host_wr != sym_rd) { + printf ("CACHE TEST FAILED: host wrote %d, chip read %d.\n", + (int) host_wr, (int) sym_rd); + err |= 1; + }; + if (host_rd != sym_wr) { + printf ("CACHE TEST FAILED: chip wrote %d, host read %d.\n", + (int) sym_wr, (int) host_rd); + err |= 2; + }; + if (sym_bk != sym_wr) { + printf ("CACHE TEST FAILED: chip wrote %d, read back %d.\n", + (int) sym_wr, (int) sym_bk); + err |= 4; + }; + + return (err); +} + +/* + * log message for real hard errors + * + * sym0 targ 0?: ERROR (ds:si) (so-si-sd) (sx/s3/s4) @ name (dsp:dbc). + * reg: r0 r1 r2 r3 r4 r5 r6 ..... rf. + * + * exception register: + * ds: dstat + * si: sist + * + * SCSI bus lines: + * so: control lines as driven by chip. + * si: control lines as seen by chip. + * sd: scsi data lines as seen by chip. + * + * wide/fastmode: + * sx: sxfer (see the manual) + * s3: scntl3 (see the manual) + * s4: scntl4 (see the manual) + * + * current script command: + * dsp: script adress (relative to start of script). + * dbc: first word of script command. + * + * First 24 register of the chip: + * r0..rf + */ +static void sym_log_hard_error(hcb_p np, u_short sist, u_char dstat) +{ + u32 dsp; + int script_ofs; + int script_size; + char *script_name; + u_char *script_base; + int i; + + dsp = INL (nc_dsp); + + if (dsp > np->scripta_ba && + dsp <= np->scripta_ba + np->scripta_sz) { + script_ofs = dsp - np->scripta_ba; + script_size = np->scripta_sz; + script_base = (u_char *) np->scripta0; + script_name = "scripta"; + } + else if (np->scriptb_ba < dsp && + dsp <= np->scriptb_ba + np->scriptb_sz) { + script_ofs = dsp - np->scriptb_ba; + script_size = np->scriptb_sz; + script_base = (u_char *) np->scriptb0; + script_name = "scriptb"; + } else { + script_ofs = dsp; + script_size = 0; + script_base = 0; + script_name = "mem"; + } + + printf ("%s:%d: ERROR (%x:%x) (%x-%x-%x) (%x/%x/%x) @ (%s %x:%08x).\n", + sym_name (np), (unsigned)INB (nc_sdid)&0x0f, dstat, sist, + (unsigned)INB (nc_socl), (unsigned)INB (nc_sbcl), + (unsigned)INB (nc_sbdl), (unsigned)INB (nc_sxfer), + (unsigned)INB (nc_scntl3), + (np->features & FE_C10) ? (unsigned)INB (nc_scntl4) : 0, + script_name, script_ofs, (unsigned)INL (nc_dbc)); + + if (((script_ofs & 3) == 0) && + (unsigned)script_ofs < script_size) { + printf ("%s: script cmd = %08x\n", sym_name(np), + scr_to_cpu((int) *(u32 *)(script_base + script_ofs))); + } + + printf ("%s: regdump:", sym_name(np)); + for (i=0; i<24;i++) + printf (" %02x", (unsigned)INB_OFF(i)); + printf (".\n"); + + /* + * PCI BUS error. + */ + if (dstat & (MDPE|BF)) + sym_log_bus_error(np); +} + +static struct sym_pci_chip sym_pci_dev_table[] = { + {PCI_ID_SYM53C810, 0x0f, "810", 4, 8, 4, 64, + FE_ERL} + , +#ifdef SYM_DEBUG_GENERIC_SUPPORT + {PCI_ID_SYM53C810, 0xff, "810a", 4, 8, 4, 1, + FE_BOF} + , +#else + {PCI_ID_SYM53C810, 0xff, "810a", 4, 8, 4, 1, + FE_CACHE_SET|FE_LDSTR|FE_PFEN|FE_BOF} + , +#endif + {PCI_ID_SYM53C815, 0xff, "815", 4, 8, 4, 64, + FE_BOF|FE_ERL} + , + {PCI_ID_SYM53C825, 0x0f, "825", 6, 8, 4, 64, + FE_WIDE|FE_BOF|FE_ERL|FE_DIFF} + , + {PCI_ID_SYM53C825, 0xff, "825a", 6, 8, 4, 2, + FE_WIDE|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN|FE_RAM|FE_DIFF} + , + {PCI_ID_SYM53C860, 0xff, "860", 4, 8, 5, 1, + FE_ULTRA|FE_CACHE_SET|FE_BOF|FE_LDSTR|FE_PFEN} + , + {PCI_ID_SYM53C875, 0x01, "875", 6, 16, 5, 2, + FE_WIDE|FE_ULTRA|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_DIFF|FE_VARCLK} + , + {PCI_ID_SYM53C875, 0xff, "875", 6, 16, 5, 2, + FE_WIDE|FE_ULTRA|FE_DBLR|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_DIFF|FE_VARCLK} + , + {PCI_ID_SYM53C875_2, 0xff, "875", 6, 16, 5, 2, + FE_WIDE|FE_ULTRA|FE_DBLR|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_DIFF|FE_VARCLK} + , + {PCI_ID_SYM53C885, 0xff, "885", 6, 16, 5, 2, + FE_WIDE|FE_ULTRA|FE_DBLR|FE_CACHE0_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_DIFF|FE_VARCLK} + , +#ifdef SYM_DEBUG_GENERIC_SUPPORT + {PCI_ID_SYM53C895, 0xff, "895", 6, 31, 7, 2, + FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS| + FE_RAM|FE_LCKFRQ} + , +#else + {PCI_ID_SYM53C895, 0xff, "895", 6, 31, 7, 2, + FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_LCKFRQ} + , +#endif + {PCI_ID_SYM53C896, 0xff, "896", 6, 31, 7, 4, + FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_LCKFRQ} + , + {PCI_ID_SYM53C895A, 0xff, "895a", 6, 31, 7, 4, + FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_RAM8K|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_LCKFRQ} + , + {PCI_ID_SYM53C875A, 0xff, "875a", 6, 31, 7, 4, + FE_WIDE|FE_ULTRA|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_LCKFRQ} + , + {PCI_ID_LSI53C1010, 0x00, "1010-33", 6, 31, 7, 8, + FE_WIDE|FE_ULTRA3|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFBC|FE_LDSTR|FE_PFEN| + FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_CRC| + FE_C10} + , + {PCI_ID_LSI53C1010, 0xff, "1010-33", 6, 31, 7, 8, + FE_WIDE|FE_ULTRA3|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFBC|FE_LDSTR|FE_PFEN| + FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_CRC| + FE_C10|FE_U3EN} + , + {PCI_ID_LSI53C1010_2, 0xff, "1010-66", 6, 31, 7, 8, + FE_WIDE|FE_ULTRA3|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFBC|FE_LDSTR|FE_PFEN| + FE_RAM|FE_RAM8K|FE_64BIT|FE_DAC|FE_IO256|FE_NOPM|FE_LEDC|FE_66MHZ|FE_CRC| + FE_C10|FE_U3EN} + , + {PCI_ID_LSI53C1510D, 0xff, "1510d", 6, 31, 7, 4, + FE_WIDE|FE_ULTRA2|FE_QUAD|FE_CACHE_SET|FE_BOF|FE_DFS|FE_LDSTR|FE_PFEN| + FE_RAM|FE_IO256|FE_LEDC} +}; + +#define sym_pci_num_devs \ + (sizeof(sym_pci_dev_table) / sizeof(sym_pci_dev_table[0])) + +/* + * Look up the chip table. + * + * Return a pointer to the chip entry if found, + * zero otherwise. + */ +struct sym_pci_chip * +sym_lookup_pci_chip_table (u_short device_id, u_char revision) +{ + struct sym_pci_chip *chip; + int i; + + for (i = 0; i < sym_pci_num_devs; i++) { + chip = &sym_pci_dev_table[i]; + if (device_id != chip->device_id) + continue; + if (revision > chip->revision_id) + continue; + return chip; + } + + return 0; +} + +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 +/* + * Lookup the 64 bit DMA segments map. + * This is only used if the direct mapping + * has been unsuccessful. + */ +int sym_lookup_dmap(hcb_p np, u32 h, int s) +{ + int i; + + if (!np->use_dac) + goto weird; + + /* Look up existing mappings */ + for (i = SYM_DMAP_SIZE-1; i > 0; i--) { + if (h == np->dmap_bah[i]) + return i; + } + /* If direct mapping is free, get it */ + if (!np->dmap_bah[s]) + goto new; + /* Collision -> lookup free mappings */ + for (s = SYM_DMAP_SIZE-1; s > 0; s--) { + if (!np->dmap_bah[s]) + goto new; + } +weird: + panic("sym: ran out of 64 bit DMA segment registers"); + return -1; +new: + np->dmap_bah[s] = h; + np->dmap_dirty = 1; + return s; +} + +/* + * Update IO registers scratch C..R so they will be + * in sync. with queued CCB expectations. + */ +static void sym_update_dmap_regs(hcb_p np) +{ + int o, i; + + if (!np->dmap_dirty) + return; + o = offsetof(struct sym_reg, nc_scrx[0]); + for (i = 0; i < SYM_DMAP_SIZE; i++) { + OUTL_OFF(o, np->dmap_bah[i]); + o += 4; + } + np->dmap_dirty = 0; +} +#endif + +/* + * Prepare the next negotiation message if needed. + * + * Fill in the part of message buffer that contains the + * negotiation and the nego_status field of the CCB. + * Returns the size of the message in bytes. + */ +static int sym_prepare_nego(hcb_p np, ccb_p cp, int nego, u_char *msgptr) +{ + tcb_p tp = &np->target[cp->target]; + int msglen = 0; + + /* + * Early C1010 chips need a work-around for DT + * data transfer to work. + */ + if (!(np->features & FE_U3EN)) + tp->tinfo.goal.options = 0; + /* + * negotiate using PPR ? + */ + if (tp->tinfo.goal.options & PPR_OPT_MASK) + nego = NS_PPR; + /* + * negotiate wide transfers ? + */ + else if (tp->tinfo.curr.width != tp->tinfo.goal.width) + nego = NS_WIDE; + /* + * negotiate synchronous transfers? + */ + else if (tp->tinfo.curr.period != tp->tinfo.goal.period || + tp->tinfo.curr.offset != tp->tinfo.goal.offset) + nego = NS_SYNC; + + switch (nego) { + case NS_SYNC: + msgptr[msglen++] = M_EXTENDED; + msgptr[msglen++] = 3; + msgptr[msglen++] = M_X_SYNC_REQ; + msgptr[msglen++] = tp->tinfo.goal.period; + msgptr[msglen++] = tp->tinfo.goal.offset; + break; + case NS_WIDE: + msgptr[msglen++] = M_EXTENDED; + msgptr[msglen++] = 2; + msgptr[msglen++] = M_X_WIDE_REQ; + msgptr[msglen++] = tp->tinfo.goal.width; + break; + case NS_PPR: + msgptr[msglen++] = M_EXTENDED; + msgptr[msglen++] = 6; + msgptr[msglen++] = M_X_PPR_REQ; + msgptr[msglen++] = tp->tinfo.goal.period; + msgptr[msglen++] = 0; + msgptr[msglen++] = tp->tinfo.goal.offset; + msgptr[msglen++] = tp->tinfo.goal.width; + msgptr[msglen++] = tp->tinfo.goal.options & PPR_OPT_DT; + break; + }; + + cp->nego_status = nego; + + if (nego) { + tp->nego_cp = cp; /* Keep track a nego will be performed */ + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, cp->target, + nego == NS_SYNC ? "sync msgout" : + nego == NS_WIDE ? "wide msgout" : + "ppr msgout", msgptr); + }; + }; + + return msglen; +} + +/* + * Insert a job into the start queue. + */ +void sym_put_start_queue(hcb_p np, ccb_p cp) +{ + u_short qidx; + +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If the previously queued CCB is not yet done, + * set the IARB hint. The SCRIPTS will go with IARB + * for this job when starting the previous one. + * We leave devices a chance to win arbitration by + * not using more than 'iarb_max' consecutive + * immediate arbitrations. + */ + if (np->last_cp && np->iarb_count < np->iarb_max) { + np->last_cp->host_flags |= HF_HINT_IARB; + ++np->iarb_count; + } + else + np->iarb_count = 0; + np->last_cp = cp; +#endif + +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 + /* + * Make SCRIPTS aware of the 64 bit DMA + * segment registers not being up-to-date. + */ + if (np->dmap_dirty) + cp->host_xflags |= HX_DMAP_DIRTY; +#endif + + /* + * Optionnaly, set the IO timeout condition. + */ +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + sym_timeout_ccb(np, cp, sym_cam_timeout(cp->cam_ccb)); +#endif + + /* + * Insert first the idle task and then our job. + * The MBs should ensure proper ordering. + */ + qidx = np->squeueput + 2; + if (qidx >= MAX_QUEUE*2) qidx = 0; + + np->squeue [qidx] = cpu_to_scr(np->idletask_ba); + MEMORY_WRITE_BARRIER(); + np->squeue [np->squeueput] = cpu_to_scr(cp->ccb_ba); + + np->squeueput = qidx; + + if (DEBUG_FLAGS & DEBUG_QUEUE) + printf ("%s: queuepos=%d.\n", sym_name (np), np->squeueput); + + /* + * Script processor may be waiting for reselect. + * Wake it up. + */ + MEMORY_WRITE_BARRIER(); + OUTB (nc_istat, SIGP|np->istat_sem); +} + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING +/* + * Start next ready-to-start CCBs. + */ +void sym_start_next_ccbs(hcb_p np, lcb_p lp, int maxn) +{ + SYM_QUEHEAD *qp; + ccb_p cp; + + /* + * Paranoia, as usual. :-) + */ + assert(!lp->started_tags || !lp->started_no_tag); + + /* + * Try to start as many commands as asked by caller. + * Prevent from having both tagged and untagged + * commands queued to the device at the same time. + */ + while (maxn--) { + qp = sym_remque_head(&lp->waiting_ccbq); + if (!qp) + break; + cp = sym_que_entry(qp, struct sym_ccb, link2_ccbq); + if (cp->tag != NO_TAG) { + if (lp->started_no_tag || + lp->started_tags >= lp->started_max) { + sym_insque_head(qp, &lp->waiting_ccbq); + break; + } + lp->itlq_tbl[cp->tag] = cpu_to_scr(cp->ccb_ba); + lp->head.resel_sa = + cpu_to_scr(SCRIPTA_BA (np, resel_tag)); + ++lp->started_tags; + } else { + if (lp->started_no_tag || lp->started_tags) { + sym_insque_head(qp, &lp->waiting_ccbq); + break; + } + lp->head.itl_task_sa = cpu_to_scr(cp->ccb_ba); + lp->head.resel_sa = + cpu_to_scr(SCRIPTA_BA (np, resel_no_tag)); + ++lp->started_no_tag; + } + cp->started = 1; + sym_insque_tail(qp, &lp->started_ccbq); + sym_put_start_queue(np, cp); + } +} +#endif /* SYM_OPT_HANDLE_DEVICE_QUEUEING */ + +/* + * The chip may have completed jobs. Look at the DONE QUEUE. + * + * On paper, memory read barriers may be needed here to + * prevent out of order LOADs by the CPU from having + * prefetched stale data prior to DMA having occurred. + */ +static int sym_wakeup_done (hcb_p np) +{ + ccb_p cp; + int i, n; + u32 dsa; + + n = 0; + i = np->dqueueget; + + /* MEMORY_READ_BARRIER(); */ + while (1) { + dsa = scr_to_cpu(np->dqueue[i]); + if (!dsa) + break; + np->dqueue[i] = 0; + if ((i = i+2) >= MAX_QUEUE*2) + i = 0; + + cp = sym_ccb_from_dsa(np, dsa); + if (cp) { + MEMORY_READ_BARRIER(); + sym_complete_ok (np, cp); + ++n; + } + else + printf ("%s: bad DSA (%x) in done queue.\n", + sym_name(np), (u_int) dsa); + } + np->dqueueget = i; + + return n; +} + +/* + * Complete all active CCBs with error. + * Used on CHIP/SCSI RESET. + */ +static void sym_flush_busy_queue (hcb_p np, int cam_status) +{ + /* + * Move all active CCBs to the COMP queue + * and flush this queue. + */ + sym_que_splice(&np->busy_ccbq, &np->comp_ccbq); + sym_que_init(&np->busy_ccbq); + sym_flush_comp_queue(np, cam_status); +} + +/* + * Start chip. + * + * 'reason' means: + * 0: initialisation. + * 1: SCSI BUS RESET delivered or received. + * 2: SCSI BUS MODE changed. + */ +void sym_start_up (hcb_p np, int reason) +{ + int i; + u32 phys; + + /* + * Reset chip if asked, otherwise just clear fifos. + */ + if (reason == 1) + sym_soft_reset(np); + else { + OUTB (nc_stest3, TE|CSF); + OUTONB (nc_ctest3, CLF); + } + + /* + * Clear Start Queue + */ + phys = np->squeue_ba; + for (i = 0; i < MAX_QUEUE*2; i += 2) { + np->squeue[i] = cpu_to_scr(np->idletask_ba); + np->squeue[i+1] = cpu_to_scr(phys + (i+2)*4); + } + np->squeue[MAX_QUEUE*2-1] = cpu_to_scr(phys); + + /* + * Start at first entry. + */ + np->squeueput = 0; + + /* + * Clear Done Queue + */ + phys = np->dqueue_ba; + for (i = 0; i < MAX_QUEUE*2; i += 2) { + np->dqueue[i] = 0; + np->dqueue[i+1] = cpu_to_scr(phys + (i+2)*4); + } + np->dqueue[MAX_QUEUE*2-1] = cpu_to_scr(phys); + + /* + * Start at first entry. + */ + np->dqueueget = 0; + + /* + * Install patches in scripts. + * This also let point to first position the start + * and done queue pointers used from SCRIPTS. + */ + np->fw_patch(np); + + /* + * Wakeup all pending jobs. + */ + sym_flush_busy_queue(np, CAM_SCSI_BUS_RESET); + + /* + * Init chip. + */ + OUTB (nc_istat, 0x00 ); /* Remove Reset, abort */ + UDELAY (2000); /* The 895 needs time for the bus mode to settle */ + + OUTB (nc_scntl0, np->rv_scntl0 | 0xc0); + /* full arb., ena parity, par->ATN */ + OUTB (nc_scntl1, 0x00); /* odd parity, and remove CRST!! */ + + sym_selectclock(np, np->rv_scntl3); /* Select SCSI clock */ + + OUTB (nc_scid , RRE|np->myaddr); /* Adapter SCSI address */ + OUTW (nc_respid, 1ul<<np->myaddr); /* Id to respond to */ + OUTB (nc_istat , SIGP ); /* Signal Process */ + OUTB (nc_dmode , np->rv_dmode); /* Burst length, dma mode */ + OUTB (nc_ctest5, np->rv_ctest5); /* Large fifo + large burst */ + + OUTB (nc_dcntl , NOCOM|np->rv_dcntl); /* Protect SFBR */ + OUTB (nc_ctest3, np->rv_ctest3); /* Write and invalidate */ + OUTB (nc_ctest4, np->rv_ctest4); /* Master parity checking */ + + /* Extended Sreq/Sack filtering not supported on the C10 */ + if (np->features & FE_C10) + OUTB (nc_stest2, np->rv_stest2); + else + OUTB (nc_stest2, EXT|np->rv_stest2); + + OUTB (nc_stest3, TE); /* TolerANT enable */ + OUTB (nc_stime0, 0x0c); /* HTH disabled STO 0.25 sec */ + + /* + * For now, disable AIP generation on C1010-66. + */ + if (np->device_id == PCI_ID_LSI53C1010_2) + OUTB (nc_aipcntl1, DISAIP); + + /* + * C10101 rev. 0 errata. + * Errant SGE's when in narrow. Write bits 4 & 5 of + * STEST1 register to disable SGE. We probably should do + * that from SCRIPTS for each selection/reselection, but + * I just don't want. :) + */ + if (np->device_id == PCI_ID_LSI53C1010 && + np->revision_id < 1) + OUTB (nc_stest1, INB(nc_stest1) | 0x30); + + /* + * DEL 441 - 53C876 Rev 5 - Part Number 609-0392787/2788 - ITEM 2. + * Disable overlapped arbitration for some dual function devices, + * regardless revision id (kind of post-chip-design feature. ;-)) + */ + if (np->device_id == PCI_ID_SYM53C875) + OUTB (nc_ctest0, (1<<5)); + else if (np->device_id == PCI_ID_SYM53C896) + np->rv_ccntl0 |= DPR; + + /* + * Write CCNTL0/CCNTL1 for chips capable of 64 bit addressing + * and/or hardware phase mismatch, since only such chips + * seem to support those IO registers. + */ + if (np->features & (FE_DAC|FE_NOPM)) { + OUTB (nc_ccntl0, np->rv_ccntl0); + OUTB (nc_ccntl1, np->rv_ccntl1); + } + +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 + /* + * Set up scratch C and DRS IO registers to map the 32 bit + * DMA address range our data structures are located in. + */ + if (np->use_dac) { + np->dmap_bah[0] = 0; /* ??? */ + OUTL (nc_scrx[0], np->dmap_bah[0]); + OUTL (nc_drs, np->dmap_bah[0]); + } +#endif + + /* + * If phase mismatch handled by scripts (895A/896/1010), + * set PM jump addresses. + */ + if (np->features & FE_NOPM) { + OUTL (nc_pmjad1, SCRIPTB_BA (np, pm_handle)); + OUTL (nc_pmjad2, SCRIPTB_BA (np, pm_handle)); + } + + /* + * Enable GPIO0 pin for writing if LED support from SCRIPTS. + * Also set GPIO5 and clear GPIO6 if hardware LED control. + */ + if (np->features & FE_LED0) + OUTB(nc_gpcntl, INB(nc_gpcntl) & ~0x01); + else if (np->features & FE_LEDC) + OUTB(nc_gpcntl, (INB(nc_gpcntl) & ~0x41) | 0x20); + + /* + * enable ints + */ + OUTW (nc_sien , STO|HTH|MA|SGE|UDC|RST|PAR); + OUTB (nc_dien , MDPE|BF|SSI|SIR|IID); + + /* + * For 895/6 enable SBMC interrupt and save current SCSI bus mode. + * Try to eat the spurious SBMC interrupt that may occur when + * we reset the chip but not the SCSI BUS (at initialization). + */ + if (np->features & (FE_ULTRA2|FE_ULTRA3)) { + OUTONW (nc_sien, SBMC); + if (reason == 0) { + MDELAY(100); + INW (nc_sist); + } + np->scsi_mode = INB (nc_stest4) & SMODE; + } + + /* + * Fill in target structure. + * Reinitialize usrsync. + * Reinitialize usrwide. + * Prepare sync negotiation according to actual SCSI bus mode. + */ + for (i=0;i<SYM_CONF_MAX_TARGET;i++) { + tcb_p tp = &np->target[i]; + + tp->to_reset = 0; + tp->head.sval = 0; + tp->head.wval = np->rv_scntl3; + tp->head.uval = 0; + + tp->tinfo.curr.period = 0; + tp->tinfo.curr.offset = 0; + tp->tinfo.curr.width = BUS_8_BIT; + tp->tinfo.curr.options = 0; + } + + /* + * Download SCSI SCRIPTS to on-chip RAM if present, + * and start script processor. + * We do the download preferently from the CPU. + * For platforms that may not support PCI memory mapping, + * we use simple SCRIPTS that performs MEMORY MOVEs. + */ + if (np->ram_ba) { + if (sym_verbose >= 2) + printf ("%s: Downloading SCSI SCRIPTS.\n", + sym_name(np)); +#ifdef SYM_OPT_NO_BUS_MEMORY_MAPPING + np->fw_patch(np); + if (np->ram_ws == 8192) + phys = SCRIPTZ_BA (np, start_ram64); + else + phys = SCRIPTZ_BA (np, start_ram); +#else + if (np->ram_ws == 8192) { + OUTRAM_OFF(4096, np->scriptb0, np->scriptb_sz); + phys = scr_to_cpu(np->scr_ram_seg); + OUTL (nc_mmws, phys); + OUTL (nc_mmrs, phys); + OUTL (nc_sfs, phys); + phys = SCRIPTB_BA (np, start64); + } + else + phys = SCRIPTA_BA (np, init); + OUTRAM_OFF(0, np->scripta0, np->scripta_sz); +#endif + } + else + phys = SCRIPTA_BA (np, init); + + np->istat_sem = 0; + + OUTL (nc_dsa, np->hcb_ba); + OUTL_DSP (phys); + + /* + * Notify the XPT about the RESET condition. + */ + if (reason != 0) + sym_xpt_async_bus_reset(np); +} + +/* + * Switch trans mode for current job and it's target. + */ +static void sym_settrans(hcb_p np, int target, u_char dt, u_char ofs, + u_char per, u_char wide, u_char div, u_char fak) +{ + SYM_QUEHEAD *qp; + u_char sval, wval, uval; + tcb_p tp = &np->target[target]; + + assert(target == (INB (nc_sdid) & 0x0f)); + + sval = tp->head.sval; + wval = tp->head.wval; + uval = tp->head.uval; + +#if 0 + printf("XXXX sval=%x wval=%x uval=%x (%x)\n", + sval, wval, uval, np->rv_scntl3); +#endif + /* + * Set the offset. + */ + if (!(np->features & FE_C10)) + sval = (sval & ~0x1f) | ofs; + else + sval = (sval & ~0x3f) | ofs; + + /* + * Set the sync divisor and extra clock factor. + */ + if (ofs != 0) { + wval = (wval & ~0x70) | ((div+1) << 4); + if (!(np->features & FE_C10)) + sval = (sval & ~0xe0) | (fak << 5); + else { + uval = uval & ~(XCLKH_ST|XCLKH_DT|XCLKS_ST|XCLKS_DT); + if (fak >= 1) uval |= (XCLKH_ST|XCLKH_DT); + if (fak >= 2) uval |= (XCLKS_ST|XCLKS_DT); + } + } + + /* + * Set the bus width. + */ + wval = wval & ~EWS; + if (wide != 0) + wval |= EWS; + + /* + * Set misc. ultra enable bits. + */ + if (np->features & FE_C10) { + uval = uval & ~(U3EN|AIPCKEN); + if (dt) { + assert(np->features & FE_U3EN); + uval |= U3EN; + } + } + else { + wval = wval & ~ULTRA; + if (per <= 12) wval |= ULTRA; + } + + /* + * Stop there if sync parameters are unchanged. + */ + if (tp->head.sval == sval && + tp->head.wval == wval && + tp->head.uval == uval) + return; + tp->head.sval = sval; + tp->head.wval = wval; + tp->head.uval = uval; + + /* + * Disable extended Sreq/Sack filtering if per < 50. + * Not supported on the C1010. + */ + if (per < 50 && !(np->features & FE_C10)) + OUTOFFB (nc_stest2, EXT); + + /* + * set actual value and sync_status + */ + OUTB (nc_sxfer, tp->head.sval); + OUTB (nc_scntl3, tp->head.wval); + + if (np->features & FE_C10) { + OUTB (nc_scntl4, tp->head.uval); + } + + /* + * patch ALL busy ccbs of this target. + */ + FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { + ccb_p cp; + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + if (cp->target != target) + continue; + cp->phys.select.sel_scntl3 = tp->head.wval; + cp->phys.select.sel_sxfer = tp->head.sval; + if (np->features & FE_C10) { + cp->phys.select.sel_scntl4 = tp->head.uval; + } + } +} + +/* + * We received a WDTR. + * Let everything be aware of the changes. + */ +static void sym_setwide(hcb_p np, int target, u_char wide) +{ + tcb_p tp = &np->target[target]; + + sym_settrans(np, target, 0, 0, 0, wide, 0, 0); + + tp->tinfo.goal.width = tp->tinfo.curr.width = wide; + tp->tinfo.curr.offset = 0; + tp->tinfo.curr.period = 0; + tp->tinfo.curr.options = 0; + + sym_xpt_async_nego_wide(np, target); +} + +/* + * We received a SDTR. + * Let everything be aware of the changes. + */ +static void +sym_setsync(hcb_p np, int target, + u_char ofs, u_char per, u_char div, u_char fak) +{ + tcb_p tp = &np->target[target]; + u_char wide = (tp->head.wval & EWS) ? BUS_16_BIT : BUS_8_BIT; + + sym_settrans(np, target, 0, ofs, per, wide, div, fak); + + tp->tinfo.goal.period = tp->tinfo.curr.period = per; + tp->tinfo.goal.offset = tp->tinfo.curr.offset = ofs; + tp->tinfo.goal.options = tp->tinfo.curr.options = 0; + + sym_xpt_async_nego_sync(np, target); +} + +/* + * We received a PPR. + * Let everything be aware of the changes. + */ +static void +sym_setpprot(hcb_p np, int target, u_char dt, u_char ofs, + u_char per, u_char wide, u_char div, u_char fak) +{ + tcb_p tp = &np->target[target]; + + sym_settrans(np, target, dt, ofs, per, wide, div, fak); + + tp->tinfo.goal.width = tp->tinfo.curr.width = wide; + tp->tinfo.goal.period = tp->tinfo.curr.period = per; + tp->tinfo.goal.offset = tp->tinfo.curr.offset = ofs; + tp->tinfo.goal.options = tp->tinfo.curr.options = dt; + + sym_xpt_async_nego_ppr(np, target); +} + +/* + * generic recovery from scsi interrupt + * + * The doc says that when the chip gets an SCSI interrupt, + * it tries to stop in an orderly fashion, by completing + * an instruction fetch that had started or by flushing + * the DMA fifo for a write to memory that was executing. + * Such a fashion is not enough to know if the instruction + * that was just before the current DSP value has been + * executed or not. + * + * There are some small SCRIPTS sections that deal with + * the start queue and the done queue that may break any + * assomption from the C code if we are interrupted + * inside, so we reset if this happens. Btw, since these + * SCRIPTS sections are executed while the SCRIPTS hasn't + * started SCSI operations, it is very unlikely to happen. + * + * All the driver data structures are supposed to be + * allocated from the same 4 GB memory window, so there + * is a 1 to 1 relationship between DSA and driver data + * structures. Since we are careful :) to invalidate the + * DSA when we complete a command or when the SCRIPTS + * pushes a DSA into a queue, we can trust it when it + * points to a CCB. + */ +static void sym_recover_scsi_int (hcb_p np, u_char hsts) +{ + u32 dsp = INL (nc_dsp); + u32 dsa = INL (nc_dsa); + ccb_p cp = sym_ccb_from_dsa(np, dsa); + + /* + * If we haven't been interrupted inside the SCRIPTS + * critical pathes, we can safely restart the SCRIPTS + * and trust the DSA value if it matches a CCB. + */ + if ((!(dsp > SCRIPTA_BA (np, getjob_begin) && + dsp < SCRIPTA_BA (np, getjob_end) + 1)) && + (!(dsp > SCRIPTA_BA (np, ungetjob) && + dsp < SCRIPTA_BA (np, reselect) + 1)) && + (!(dsp > SCRIPTB_BA (np, sel_for_abort) && + dsp < SCRIPTB_BA (np, sel_for_abort_1) + 1)) && + (!(dsp > SCRIPTA_BA (np, done) && + dsp < SCRIPTA_BA (np, done_end) + 1))) { + OUTB (nc_ctest3, np->rv_ctest3 | CLF); /* clear dma fifo */ + OUTB (nc_stest3, TE|CSF); /* clear scsi fifo */ + /* + * If we have a CCB, let the SCRIPTS call us back for + * the handling of the error with SCRATCHA filled with + * STARTPOS. This way, we will be able to freeze the + * device queue and requeue awaiting IOs. + */ + if (cp) { + cp->host_status = hsts; + OUTL_DSP (SCRIPTA_BA (np, complete_error)); + } + /* + * Otherwise just restart the SCRIPTS. + */ + else { + OUTL (nc_dsa, 0xffffff); + OUTL_DSP (SCRIPTA_BA (np, start)); + } + } + else + goto reset_all; + + return; + +reset_all: + sym_start_reset(np); +} + +/* + * chip exception handler for selection timeout + */ +static void sym_int_sto (hcb_p np) +{ + u32 dsp = INL (nc_dsp); + + if (DEBUG_FLAGS & DEBUG_TINY) printf ("T"); + + if (dsp == SCRIPTA_BA (np, wf_sel_done) + 8) + sym_recover_scsi_int(np, HS_SEL_TIMEOUT); + else + sym_start_reset(np); +} + +/* + * chip exception handler for unexpected disconnect + */ +static void sym_int_udc (hcb_p np) +{ + printf ("%s: unexpected disconnect\n", sym_name(np)); + sym_recover_scsi_int(np, HS_UNEXPECTED); +} + +/* + * chip exception handler for SCSI bus mode change + * + * spi2-r12 11.2.3 says a transceiver mode change must + * generate a reset event and a device that detects a reset + * event shall initiate a hard reset. It says also that a + * device that detects a mode change shall set data transfer + * mode to eight bit asynchronous, etc... + * So, just reinitializing all except chip should be enough. + */ +static void sym_int_sbmc (hcb_p np) +{ + u_char scsi_mode = INB (nc_stest4) & SMODE; + + /* + * Notify user. + */ + printf("%s: SCSI BUS mode change from %s to %s.\n", sym_name(np), + sym_scsi_bus_mode(np->scsi_mode), sym_scsi_bus_mode(scsi_mode)); + + /* + * Should suspend command processing for a few seconds and + * reinitialize all except the chip. + */ + sym_start_up (np, 2); +} + +/* + * chip exception handler for SCSI parity error. + * + * When the chip detects a SCSI parity error and is + * currently executing a (CH)MOV instruction, it does + * not interrupt immediately, but tries to finish the + * transfer of the current scatter entry before + * interrupting. The following situations may occur: + * + * - The complete scatter entry has been transferred + * without the device having changed phase. + * The chip will then interrupt with the DSP pointing + * to the instruction that follows the MOV. + * + * - A phase mismatch occurs before the MOV finished + * and phase errors are to be handled by the C code. + * The chip will then interrupt with both PAR and MA + * conditions set. + * + * - A phase mismatch occurs before the MOV finished and + * phase errors are to be handled by SCRIPTS. + * The chip will load the DSP with the phase mismatch + * JUMP address and interrupt the host processor. + */ +static void sym_int_par (hcb_p np, u_short sist) +{ + u_char hsts = INB (HS_PRT); + u32 dsp = INL (nc_dsp); + u32 dbc = INL (nc_dbc); + u32 dsa = INL (nc_dsa); + u_char sbcl = INB (nc_sbcl); + u_char cmd = dbc >> 24; + int phase = cmd & 7; + ccb_p cp = sym_ccb_from_dsa(np, dsa); + + printf("%s: SCSI parity error detected: SCR1=%d DBC=%x SBCL=%x\n", + sym_name(np), hsts, dbc, sbcl); + + /* + * Check that the chip is connected to the SCSI BUS. + */ + if (!(INB (nc_scntl1) & ISCON)) { + sym_recover_scsi_int(np, HS_UNEXPECTED); + return; + } + + /* + * If the nexus is not clearly identified, reset the bus. + * We will try to do better later. + */ + if (!cp) + goto reset_all; + + /* + * Check instruction was a MOV, direction was INPUT and + * ATN is asserted. + */ + if ((cmd & 0xc0) || !(phase & 1) || !(sbcl & 0x8)) + goto reset_all; + + /* + * Keep track of the parity error. + */ + OUTONB (HF_PRT, HF_EXT_ERR); + cp->xerr_status |= XE_PARITY_ERR; + + /* + * Prepare the message to send to the device. + */ + np->msgout[0] = (phase == 7) ? M_PARITY : M_ID_ERROR; + + /* + * If the old phase was DATA IN phase, we have to deal with + * the 3 situations described above. + * For other input phases (MSG IN and STATUS), the device + * must resend the whole thing that failed parity checking + * or signal error. So, jumping to dispatcher should be OK. + */ + if (phase == 1 || phase == 5) { + /* Phase mismatch handled by SCRIPTS */ + if (dsp == SCRIPTB_BA (np, pm_handle)) + OUTL_DSP (dsp); + /* Phase mismatch handled by the C code */ + else if (sist & MA) + sym_int_ma (np); + /* No phase mismatch occurred */ + else { + sym_set_script_dp (np, cp, dsp); + OUTL_DSP (SCRIPTA_BA (np, dispatch)); + } + } + else if (phase == 7) /* We definitely cannot handle parity errors */ +#if 1 /* in message-in phase due to the relection */ + goto reset_all; /* path and various message anticipations. */ +#else + OUTL_DSP (SCRIPTA_BA (np, clrack)); +#endif + else + OUTL_DSP (SCRIPTA_BA (np, dispatch)); + return; + +reset_all: + sym_start_reset(np); + return; +} + +/* + * chip exception handler for phase errors. + * + * We have to construct a new transfer descriptor, + * to transfer the rest of the current block. + */ +static void sym_int_ma (hcb_p np) +{ + u32 dbc; + u32 rest; + u32 dsp; + u32 dsa; + u32 nxtdsp; + u32 *vdsp; + u32 oadr, olen; + u32 *tblp; + u32 newcmd; + u_int delta; + u_char cmd; + u_char hflags, hflags0; + struct sym_pmc *pm; + ccb_p cp; + + dsp = INL (nc_dsp); + dbc = INL (nc_dbc); + dsa = INL (nc_dsa); + + cmd = dbc >> 24; + rest = dbc & 0xffffff; + delta = 0; + + /* + * locate matching cp if any. + */ + cp = sym_ccb_from_dsa(np, dsa); + + /* + * Donnot take into account dma fifo and various buffers in + * INPUT phase since the chip flushes everything before + * raising the MA interrupt for interrupted INPUT phases. + * For DATA IN phase, we will check for the SWIDE later. + */ + if ((cmd & 7) != 1 && (cmd & 7) != 5) { + u_char ss0, ss2; + + if (np->features & FE_DFBC) + delta = INW (nc_dfbc); + else { + u32 dfifo; + + /* + * Read DFIFO, CTEST[4-6] using 1 PCI bus ownership. + */ + dfifo = INL(nc_dfifo); + + /* + * Calculate remaining bytes in DMA fifo. + * (CTEST5 = dfifo >> 16) + */ + if (dfifo & (DFS << 16)) + delta = ((((dfifo >> 8) & 0x300) | + (dfifo & 0xff)) - rest) & 0x3ff; + else + delta = ((dfifo & 0xff) - rest) & 0x7f; + } + + /* + * The data in the dma fifo has not been transfered to + * the target -> add the amount to the rest + * and clear the data. + * Check the sstat2 register in case of wide transfer. + */ + rest += delta; + ss0 = INB (nc_sstat0); + if (ss0 & OLF) rest++; + if (!(np->features & FE_C10)) + if (ss0 & ORF) rest++; + if (cp && (cp->phys.select.sel_scntl3 & EWS)) { + ss2 = INB (nc_sstat2); + if (ss2 & OLF1) rest++; + if (!(np->features & FE_C10)) + if (ss2 & ORF1) rest++; + }; + + /* + * Clear fifos. + */ + OUTB (nc_ctest3, np->rv_ctest3 | CLF); /* dma fifo */ + OUTB (nc_stest3, TE|CSF); /* scsi fifo */ + } + + /* + * log the information + */ + if (DEBUG_FLAGS & (DEBUG_TINY|DEBUG_PHASE)) + printf ("P%x%x RL=%d D=%d ", cmd&7, INB(nc_sbcl)&7, + (unsigned) rest, (unsigned) delta); + + /* + * try to find the interrupted script command, + * and the address at which to continue. + */ + vdsp = 0; + nxtdsp = 0; + if (dsp > np->scripta_ba && + dsp <= np->scripta_ba + np->scripta_sz) { + vdsp = (u32 *)((char*)np->scripta0 + (dsp-np->scripta_ba-8)); + nxtdsp = dsp; + } + else if (dsp > np->scriptb_ba && + dsp <= np->scriptb_ba + np->scriptb_sz) { + vdsp = (u32 *)((char*)np->scriptb0 + (dsp-np->scriptb_ba-8)); + nxtdsp = dsp; + } + + /* + * log the information + */ + if (DEBUG_FLAGS & DEBUG_PHASE) { + printf ("\nCP=%p DSP=%x NXT=%x VDSP=%p CMD=%x ", + cp, (unsigned)dsp, (unsigned)nxtdsp, vdsp, cmd); + }; + + if (!vdsp) { + printf ("%s: interrupted SCRIPT address not found.\n", + sym_name (np)); + goto reset_all; + } + + if (!cp) { + printf ("%s: SCSI phase error fixup: CCB already dequeued.\n", + sym_name (np)); + goto reset_all; + } + + /* + * get old startaddress and old length. + */ + oadr = scr_to_cpu(vdsp[1]); + + if (cmd & 0x10) { /* Table indirect */ + tblp = (u32 *) ((char*) &cp->phys + oadr); + olen = scr_to_cpu(tblp[0]); + oadr = scr_to_cpu(tblp[1]); + } else { + tblp = (u32 *) 0; + olen = scr_to_cpu(vdsp[0]) & 0xffffff; + }; + + if (DEBUG_FLAGS & DEBUG_PHASE) { + printf ("OCMD=%x\nTBLP=%p OLEN=%x OADR=%x\n", + (unsigned) (scr_to_cpu(vdsp[0]) >> 24), + tblp, + (unsigned) olen, + (unsigned) oadr); + }; + + /* + * check cmd against assumed interrupted script command. + * If dt data phase, the MOVE instruction hasn't bit 4 of + * the phase. + */ + if (((cmd & 2) ? cmd : (cmd & ~4)) != (scr_to_cpu(vdsp[0]) >> 24)) { + PRINT_ADDR(cp); + printf ("internal error: cmd=%02x != %02x=(vdsp[0] >> 24)\n", + (unsigned)cmd, (unsigned)scr_to_cpu(vdsp[0]) >> 24); + + goto reset_all; + }; + + /* + * if old phase not dataphase, leave here. + */ + if (cmd & 2) { + PRINT_ADDR(cp); + printf ("phase change %x-%x %d@%08x resid=%d.\n", + cmd&7, INB(nc_sbcl)&7, (unsigned)olen, + (unsigned)oadr, (unsigned)rest); + goto unexpected_phase; + }; + + /* + * Choose the correct PM save area. + * + * Look at the PM_SAVE SCRIPT if you want to understand + * this stuff. The equivalent code is implemented in + * SCRIPTS for the 895A, 896 and 1010 that are able to + * handle PM from the SCRIPTS processor. + */ + hflags0 = INB (HF_PRT); + hflags = hflags0; + + if (hflags & (HF_IN_PM0 | HF_IN_PM1 | HF_DP_SAVED)) { + if (hflags & HF_IN_PM0) + nxtdsp = scr_to_cpu(cp->phys.pm0.ret); + else if (hflags & HF_IN_PM1) + nxtdsp = scr_to_cpu(cp->phys.pm1.ret); + + if (hflags & HF_DP_SAVED) + hflags ^= HF_ACT_PM; + } + + if (!(hflags & HF_ACT_PM)) { + pm = &cp->phys.pm0; + newcmd = SCRIPTA_BA (np, pm0_data); + } + else { + pm = &cp->phys.pm1; + newcmd = SCRIPTA_BA (np, pm1_data); + } + + hflags &= ~(HF_IN_PM0 | HF_IN_PM1 | HF_DP_SAVED); + if (hflags != hflags0) + OUTB (HF_PRT, hflags); + + /* + * fillin the phase mismatch context + */ + pm->sg.addr = cpu_to_scr(oadr + olen - rest); + pm->sg.size = cpu_to_scr(rest); + pm->ret = cpu_to_scr(nxtdsp); + + /* + * If we have a SWIDE, + * - prepare the address to write the SWIDE from SCRIPTS, + * - compute the SCRIPTS address to restart from, + * - move current data pointer context by one byte. + */ + nxtdsp = SCRIPTA_BA (np, dispatch); + if ((cmd & 7) == 1 && cp && (cp->phys.select.sel_scntl3 & EWS) && + (INB (nc_scntl2) & WSR)) { + u32 tmp; + + /* + * Set up the table indirect for the MOVE + * of the residual byte and adjust the data + * pointer context. + */ + tmp = scr_to_cpu(pm->sg.addr); + cp->phys.wresid.addr = cpu_to_scr(tmp); + pm->sg.addr = cpu_to_scr(tmp + 1); + tmp = scr_to_cpu(pm->sg.size); + cp->phys.wresid.size = cpu_to_scr((tmp&0xff000000) | 1); + pm->sg.size = cpu_to_scr(tmp - 1); + + /* + * If only the residual byte is to be moved, + * no PM context is needed. + */ + if ((tmp&0xffffff) == 1) + newcmd = pm->ret; + + /* + * Prepare the address of SCRIPTS that will + * move the residual byte to memory. + */ + nxtdsp = SCRIPTB_BA (np, wsr_ma_helper); + } + + if (DEBUG_FLAGS & DEBUG_PHASE) { + PRINT_ADDR(cp); + printf ("PM %x %x %x / %x %x %x.\n", + hflags0, hflags, newcmd, + (unsigned)scr_to_cpu(pm->sg.addr), + (unsigned)scr_to_cpu(pm->sg.size), + (unsigned)scr_to_cpu(pm->ret)); + } + + /* + * Restart the SCRIPTS processor. + */ + sym_set_script_dp (np, cp, newcmd); + OUTL_DSP (nxtdsp); + return; + + /* + * Unexpected phase changes that occurs when the current phase + * is not a DATA IN or DATA OUT phase are due to error conditions. + * Such event may only happen when the SCRIPTS is using a + * multibyte SCSI MOVE. + * + * Phase change Some possible cause + * + * COMMAND --> MSG IN SCSI parity error detected by target. + * COMMAND --> STATUS Bad command or refused by target. + * MSG OUT --> MSG IN Message rejected by target. + * MSG OUT --> COMMAND Bogus target that discards extended + * negotiation messages. + * + * The code below does not care of the new phase and so + * trusts the target. Why to annoy it ? + * If the interrupted phase is COMMAND phase, we restart at + * dispatcher. + * If a target does not get all the messages after selection, + * the code assumes blindly that the target discards extended + * messages and clears the negotiation status. + * If the target does not want all our response to negotiation, + * we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids + * bloat for such a should_not_happen situation). + * In all other situation, we reset the BUS. + * Are these assumptions reasonnable ? (Wait and see ...) + */ +unexpected_phase: + dsp -= 8; + nxtdsp = 0; + + switch (cmd & 7) { + case 2: /* COMMAND phase */ + nxtdsp = SCRIPTA_BA (np, dispatch); + break; +#if 0 + case 3: /* STATUS phase */ + nxtdsp = SCRIPTA_BA (np, dispatch); + break; +#endif + case 6: /* MSG OUT phase */ + /* + * If the device may want to use untagged when we want + * tagged, we prepare an IDENTIFY without disc. granted, + * since we will not be able to handle reselect. + * Otherwise, we just don't care. + */ + if (dsp == SCRIPTA_BA (np, send_ident)) { + if (cp->tag != NO_TAG && olen - rest <= 3) { + cp->host_status = HS_BUSY; + np->msgout[0] = M_IDENTIFY | cp->lun; + nxtdsp = SCRIPTB_BA (np, ident_break_atn); + } + else + nxtdsp = SCRIPTB_BA (np, ident_break); + } + else if (dsp == SCRIPTB_BA (np, send_wdtr) || + dsp == SCRIPTB_BA (np, send_sdtr) || + dsp == SCRIPTB_BA (np, send_ppr)) { + nxtdsp = SCRIPTB_BA (np, nego_bad_phase); + } + break; +#if 0 + case 7: /* MSG IN phase */ + nxtdsp = SCRIPTA_BA (np, clrack); + break; +#endif + } + + if (nxtdsp) { + OUTL_DSP (nxtdsp); + return; + } + +reset_all: + sym_start_reset(np); +} + +/* + * chip interrupt handler + * + * In normal situations, interrupt conditions occur one at + * a time. But when something bad happens on the SCSI BUS, + * the chip may raise several interrupt flags before + * stopping and interrupting the CPU. The additionnal + * interrupt flags are stacked in some extra registers + * after the SIP and/or DIP flag has been raised in the + * ISTAT. After the CPU has read the interrupt condition + * flag from SIST or DSTAT, the chip unstacks the other + * interrupt flags and sets the corresponding bits in + * SIST or DSTAT. Since the chip starts stacking once the + * SIP or DIP flag is set, there is a small window of time + * where the stacking does not occur. + * + * Typically, multiple interrupt conditions may happen in + * the following situations: + * + * - SCSI parity error + Phase mismatch (PAR|MA) + * When an parity error is detected in input phase + * and the device switches to msg-in phase inside a + * block MOV. + * - SCSI parity error + Unexpected disconnect (PAR|UDC) + * When a stupid device does not want to handle the + * recovery of an SCSI parity error. + * - Some combinations of STO, PAR, UDC, ... + * When using non compliant SCSI stuff, when user is + * doing non compliant hot tampering on the BUS, when + * something really bad happens to a device, etc ... + * + * The heuristic suggested by SYMBIOS to handle + * multiple interrupts is to try unstacking all + * interrupts conditions and to handle them on some + * priority based on error severity. + * This will work when the unstacking has been + * successful, but we cannot be 100 % sure of that, + * since the CPU may have been faster to unstack than + * the chip is able to stack. Hmmm ... But it seems that + * such a situation is very unlikely to happen. + * + * If this happen, for example STO caught by the CPU + * then UDC happenning before the CPU have restarted + * the SCRIPTS, the driver may wrongly complete the + * same command on UDC, since the SCRIPTS didn't restart + * and the DSA still points to the same command. + * We avoid this situation by setting the DSA to an + * invalid value when the CCB is completed and before + * restarting the SCRIPTS. + * + * Another issue is that we need some section of our + * recovery procedures to be somehow uninterruptible but + * the SCRIPTS processor does not provides such a + * feature. For this reason, we handle recovery preferently + * from the C code and check against some SCRIPTS critical + * sections from the C code. + * + * Hopefully, the interrupt handling of the driver is now + * able to resist to weird BUS error conditions, but donnot + * ask me for any guarantee that it will never fail. :-) + * Use at your own decision and risk. + */ + +void sym_interrupt (hcb_p np) +{ + u_char istat, istatc; + u_char dstat; + u_short sist; + + /* + * interrupt on the fly ? + * (SCRIPTS may still be running) + * + * A `dummy read' is needed to ensure that the + * clear of the INTF flag reaches the device + * and that posted writes are flushed to memory + * before the scanning of the DONE queue. + * Note that SCRIPTS also (dummy) read to memory + * prior to deliver the INTF interrupt condition. + */ + istat = INB (nc_istat); + if (istat & INTF) { + OUTB (nc_istat, (istat & SIGP) | INTF | np->istat_sem); + istat = INB (nc_istat); /* DUMMY READ */ + if (DEBUG_FLAGS & DEBUG_TINY) printf ("F "); + (void)sym_wakeup_done (np); + }; + + if (!(istat & (SIP|DIP))) + return; + +#if 0 /* We should never get this one */ + if (istat & CABRT) + OUTB (nc_istat, CABRT); +#endif + + /* + * PAR and MA interrupts may occur at the same time, + * and we need to know of both in order to handle + * this situation properly. We try to unstack SCSI + * interrupts for that reason. BTW, I dislike a LOT + * such a loop inside the interrupt routine. + * Even if DMA interrupt stacking is very unlikely to + * happen, we also try unstacking these ones, since + * this has no performance impact. + */ + sist = 0; + dstat = 0; + istatc = istat; + do { + if (istatc & SIP) + sist |= INW (nc_sist); + if (istatc & DIP) + dstat |= INB (nc_dstat); + istatc = INB (nc_istat); + istat |= istatc; + } while (istatc & (SIP|DIP)); + + if (DEBUG_FLAGS & DEBUG_TINY) + printf ("<%d|%x:%x|%x:%x>", + (int)INB(nc_scr0), + dstat,sist, + (unsigned)INL(nc_dsp), + (unsigned)INL(nc_dbc)); + /* + * On paper, a memory read barrier may be needed here to + * prevent out of order LOADs by the CPU from having + * prefetched stale data prior to DMA having occurred. + * And since we are paranoid ... :) + */ + MEMORY_READ_BARRIER(); + + /* + * First, interrupts we want to service cleanly. + * + * Phase mismatch (MA) is the most frequent interrupt + * for chip earlier than the 896 and so we have to service + * it as quickly as possible. + * A SCSI parity error (PAR) may be combined with a phase + * mismatch condition (MA). + * Programmed interrupts (SIR) are used to call the C code + * from SCRIPTS. + * The single step interrupt (SSI) is not used in this + * driver. + */ + if (!(sist & (STO|GEN|HTH|SGE|UDC|SBMC|RST)) && + !(dstat & (MDPE|BF|ABRT|IID))) { + if (sist & PAR) sym_int_par (np, sist); + else if (sist & MA) sym_int_ma (np); + else if (dstat & SIR) sym_int_sir (np); + else if (dstat & SSI) OUTONB_STD (); + else goto unknown_int; + return; + }; + + /* + * Now, interrupts that donnot happen in normal + * situations and that we may need to recover from. + * + * On SCSI RESET (RST), we reset everything. + * On SCSI BUS MODE CHANGE (SBMC), we complete all + * active CCBs with RESET status, prepare all devices + * for negotiating again and restart the SCRIPTS. + * On STO and UDC, we complete the CCB with the corres- + * ponding status and restart the SCRIPTS. + */ + if (sist & RST) { + printf("%s: SCSI BUS reset detected.\n", sym_name(np)); + sym_start_up (np, 1); + return; + }; + + OUTB (nc_ctest3, np->rv_ctest3 | CLF); /* clear dma fifo */ + OUTB (nc_stest3, TE|CSF); /* clear scsi fifo */ + + if (!(sist & (GEN|HTH|SGE)) && + !(dstat & (MDPE|BF|ABRT|IID))) { + if (sist & SBMC) sym_int_sbmc (np); + else if (sist & STO) sym_int_sto (np); + else if (sist & UDC) sym_int_udc (np); + else goto unknown_int; + return; + }; + + /* + * Now, interrupts we are not able to recover cleanly. + * + * Log message for hard errors. + * Reset everything. + */ + + sym_log_hard_error(np, sist, dstat); + + if ((sist & (GEN|HTH|SGE)) || + (dstat & (MDPE|BF|ABRT|IID))) { + sym_start_reset(np); + return; + }; + +unknown_int: + /* + * We just miss the cause of the interrupt. :( + * Print a message. The timeout will do the real work. + */ + printf( "%s: unknown interrupt(s) ignored, " + "ISTAT=0x%x DSTAT=0x%x SIST=0x%x\n", + sym_name(np), istat, dstat, sist); +} + +/* + * Dequeue from the START queue all CCBs that match + * a given target/lun/task condition (-1 means all), + * and move them from the BUSY queue to the COMP queue + * with CAM_REQUEUE_REQ status condition. + * This function is used during error handling/recovery. + * It is called with SCRIPTS not running. + */ +static int +sym_dequeue_from_squeue(hcb_p np, int i, int target, int lun, int task) +{ + int j; + ccb_p cp; + + /* + * Make sure the starting index is within range. + */ + assert((i >= 0) && (i < 2*MAX_QUEUE)); + + /* + * Walk until end of START queue and dequeue every job + * that matches the target/lun/task condition. + */ + j = i; + while (i != np->squeueput) { + cp = sym_ccb_from_dsa(np, scr_to_cpu(np->squeue[i])); + assert(cp); +#ifdef SYM_CONF_IARB_SUPPORT + /* Forget hints for IARB, they may be no longer relevant */ + cp->host_flags &= ~HF_HINT_IARB; +#endif + if ((target == -1 || cp->target == target) && + (lun == -1 || cp->lun == lun) && + (task == -1 || cp->tag == task)) { + sym_set_cam_status(cp->cam_ccb, CAM_REQUEUE_REQ); + sym_remque(&cp->link_ccbq); + sym_insque_tail(&cp->link_ccbq, &np->comp_ccbq); + } + else { + if (i != j) + np->squeue[j] = np->squeue[i]; + if ((j += 2) >= MAX_QUEUE*2) j = 0; + } + if ((i += 2) >= MAX_QUEUE*2) i = 0; + } + if (i != j) /* Copy back the idle task if needed */ + np->squeue[j] = np->squeue[i]; + np->squeueput = j; /* Update our current start queue pointer */ + + return (i - j) / 2; +} + +/* + * Complete all CCBs queued to the COMP queue. + * + * These CCBs are assumed: + * - Not to be referenced either by devices or + * SCRIPTS-related queues and datas. + * - To have to be completed with an error condition + * or requeued. + * + * The device queue freeze count is incremented + * for each CCB that does not prevent this. + * This function is called when all CCBs involved + * in error handling/recovery have been reaped. + */ +void sym_flush_comp_queue(hcb_p np, int cam_status) +{ + SYM_QUEHEAD *qp; + ccb_p cp; + + while ((qp = sym_remque_head(&np->comp_ccbq)) != 0) { + cam_ccb_p ccb; + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + sym_insque_tail(&cp->link_ccbq, &np->busy_ccbq); + /* Leave quiet CCBs waiting for resources */ + if (cp->host_status == HS_WAIT) + continue; + ccb = cp->cam_ccb; + if (cam_status) + sym_set_cam_status(ccb, cam_status); +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + if (sym_get_cam_status(ccb) == CAM_REQUEUE_REQ) { + tcb_p tp = &np->target[cp->target]; + lcb_p lp = sym_lp(np, tp, cp->lun); + if (lp) { + sym_remque(&cp->link2_ccbq); + sym_insque_tail(&cp->link2_ccbq, + &lp->waiting_ccbq); + if (cp->started) { + if (cp->tag != NO_TAG) + --lp->started_tags; + else + --lp->started_no_tag; + } + } + cp->started = 0; + continue; + } +#endif + sym_free_ccb(np, cp); + sym_freeze_cam_ccb(ccb); + sym_xpt_done(np, ccb); + } +} + +/* + * chip handler for bad SCSI status condition + * + * In case of bad SCSI status, we unqueue all the tasks + * currently queued to the controller but not yet started + * and then restart the SCRIPTS processor immediately. + * + * QUEUE FULL and BUSY conditions are handled the same way. + * Basically all the not yet started tasks are requeued in + * device queue and the queue is frozen until a completion. + * + * For CHECK CONDITION and COMMAND TERMINATED status, we use + * the CCB of the failed command to prepare a REQUEST SENSE + * SCSI command and queue it to the controller queue. + * + * SCRATCHA is assumed to have been loaded with STARTPOS + * before the SCRIPTS called the C code. + */ +static void sym_sir_bad_scsi_status(hcb_p np, int num, ccb_p cp) +{ + tcb_p tp = &np->target[cp->target]; + u32 startp; + u_char s_status = cp->ssss_status; + u_char h_flags = cp->host_flags; + int msglen; + int nego; + int i; + + /* + * Compute the index of the next job to start from SCRIPTS. + */ + i = (INL (nc_scratcha) - np->squeue_ba) / 4; + + /* + * The last CCB queued used for IARB hint may be + * no longer relevant. Forget it. + */ +#ifdef SYM_CONF_IARB_SUPPORT + if (np->last_cp) + np->last_cp = 0; +#endif + + /* + * Now deal with the SCSI status. + */ + switch(s_status) { + case S_BUSY: + case S_QUEUE_FULL: + if (sym_verbose >= 2) { + PRINT_ADDR(cp); + printf ("%s\n", + s_status == S_BUSY ? "BUSY" : "QUEUE FULL\n"); + } + default: /* S_INT, S_INT_COND_MET, S_CONFLICT */ + sym_complete_error (np, cp); + break; + case S_TERMINATED: + case S_CHECK_COND: + /* + * If we get an SCSI error when requesting sense, give up. + */ + if (h_flags & HF_SENSE) { + sym_complete_error (np, cp); + break; + } + + /* + * Dequeue all queued CCBs for that device not yet started, + * and restart the SCRIPTS processor immediately. + */ + (void) sym_dequeue_from_squeue(np, i, cp->target, cp->lun, -1); + OUTL_DSP (SCRIPTA_BA (np, start)); + + /* + * Save some info of the actual IO. + * Compute the data residual. + */ + cp->sv_scsi_status = cp->ssss_status; + cp->sv_xerr_status = cp->xerr_status; + cp->sv_resid = sym_compute_residual(np, cp); + + /* + * Prepare all needed data structures for + * requesting sense data. + */ + + /* + * identify message + */ + cp->scsi_smsg2[0] = M_IDENTIFY | cp->lun; + msglen = 1; + + /* + * If we are currently using anything different from + * async. 8 bit data transfers with that target, + * start a negotiation, since the device may want + * to report us a UNIT ATTENTION condition due to + * a cause we currently ignore, and we donnot want + * to be stuck with WIDE and/or SYNC data transfer. + * + * cp->nego_status is filled by sym_prepare_nego(). + */ + cp->nego_status = 0; + nego = 0; + if (tp->tinfo.curr.options & PPR_OPT_MASK) + nego = NS_PPR; + else if (tp->tinfo.curr.width != BUS_8_BIT) + nego = NS_WIDE; + else if (tp->tinfo.curr.offset != 0) + nego = NS_SYNC; + if (nego) + msglen += + sym_prepare_nego (np,cp, nego, &cp->scsi_smsg2[msglen]); + /* + * Message table indirect structure. + */ + cp->phys.smsg.addr = cpu_to_scr(CCB_BA (cp, scsi_smsg2)); + cp->phys.smsg.size = cpu_to_scr(msglen); + + /* + * sense command + */ + cp->phys.cmd.addr = cpu_to_scr(CCB_BA (cp, sensecmd)); + cp->phys.cmd.size = cpu_to_scr(6); + + /* + * patch requested size into sense command + */ + cp->sensecmd[0] = 0x03; + cp->sensecmd[1] = 0; + if (tp->tinfo.curr.scsi_version <= 2 && cp->lun <= 7) + cp->sensecmd[1] = cp->lun << 5; + cp->sensecmd[4] = SYM_SNS_BBUF_LEN; + cp->data_len = SYM_SNS_BBUF_LEN; + + /* + * sense data + */ + bzero(cp->sns_bbuf, SYM_SNS_BBUF_LEN); + cp->phys.sense.addr = cpu_to_scr(vtobus(cp->sns_bbuf)); + cp->phys.sense.size = cpu_to_scr(SYM_SNS_BBUF_LEN); + + /* + * requeue the command. + */ + startp = SCRIPTB_BA (np, sdata_in); + + cp->phys.head.savep = cpu_to_scr(startp); + cp->phys.head.lastp = cpu_to_scr(startp); + cp->startp = cpu_to_scr(startp); + cp->goalp = cpu_to_scr(startp + 16); + + cp->host_xflags = 0; + cp->host_status = cp->nego_status ? HS_NEGOTIATE : HS_BUSY; + cp->ssss_status = S_ILLEGAL; + cp->host_flags = (HF_SENSE|HF_DATA_IN); + cp->xerr_status = 0; + cp->extra_bytes = 0; + + cp->phys.head.go.start = cpu_to_scr(SCRIPTA_BA (np, select)); + + /* + * Requeue the command. + */ + sym_put_start_queue(np, cp); + + /* + * Give back to upper layer everything we have dequeued. + */ + sym_flush_comp_queue(np, 0); + break; + } +} + +/* + * After a device has accepted some management message + * as BUS DEVICE RESET, ABORT TASK, etc ..., or when + * a device signals a UNIT ATTENTION condition, some + * tasks are thrown away by the device. We are required + * to reflect that on our tasks list since the device + * will never complete these tasks. + * + * This function move from the BUSY queue to the COMP + * queue all disconnected CCBs for a given target that + * match the following criteria: + * - lun=-1 means any logical UNIT otherwise a given one. + * - task=-1 means any task, otherwise a given one. + */ +int sym_clear_tasks(hcb_p np, int cam_status, int target, int lun, int task) +{ + SYM_QUEHEAD qtmp, *qp; + int i = 0; + ccb_p cp; + + /* + * Move the entire BUSY queue to our temporary queue. + */ + sym_que_init(&qtmp); + sym_que_splice(&np->busy_ccbq, &qtmp); + sym_que_init(&np->busy_ccbq); + + /* + * Put all CCBs that matches our criteria into + * the COMP queue and put back other ones into + * the BUSY queue. + */ + while ((qp = sym_remque_head(&qtmp)) != 0) { + cam_ccb_p ccb; + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + ccb = cp->cam_ccb; + if (cp->host_status != HS_DISCONNECT || + cp->target != target || + (lun != -1 && cp->lun != lun) || + (task != -1 && + (cp->tag != NO_TAG && cp->scsi_smsg[2] != task))) { + sym_insque_tail(&cp->link_ccbq, &np->busy_ccbq); + continue; + } + sym_insque_tail(&cp->link_ccbq, &np->comp_ccbq); + + /* Preserve the software timeout condition */ + if (sym_get_cam_status(ccb) != CAM_CMD_TIMEOUT) + sym_set_cam_status(ccb, cam_status); + ++i; +#if 0 +printf("XXXX TASK @%p CLEARED\n", cp); +#endif + } + return i; +} + +/* + * chip handler for TASKS recovery + * + * We cannot safely abort a command, while the SCRIPTS + * processor is running, since we just would be in race + * with it. + * + * As long as we have tasks to abort, we keep the SEM + * bit set in the ISTAT. When this bit is set, the + * SCRIPTS processor interrupts (SIR_SCRIPT_STOPPED) + * each time it enters the scheduler. + * + * If we have to reset a target, clear tasks of a unit, + * or to perform the abort of a disconnected job, we + * restart the SCRIPTS for selecting the target. Once + * selected, the SCRIPTS interrupts (SIR_TARGET_SELECTED). + * If it loses arbitration, the SCRIPTS will interrupt again + * the next time it will enter its scheduler, and so on ... + * + * On SIR_TARGET_SELECTED, we scan for the more + * appropriate thing to do: + * + * - If nothing, we just sent a M_ABORT message to the + * target to get rid of the useless SCSI bus ownership. + * According to the specs, no tasks shall be affected. + * - If the target is to be reset, we send it a M_RESET + * message. + * - If a logical UNIT is to be cleared , we send the + * IDENTIFY(lun) + M_ABORT. + * - If an untagged task is to be aborted, we send the + * IDENTIFY(lun) + M_ABORT. + * - If a tagged task is to be aborted, we send the + * IDENTIFY(lun) + task attributes + M_ABORT_TAG. + * + * Once our 'kiss of death' :) message has been accepted + * by the target, the SCRIPTS interrupts again + * (SIR_ABORT_SENT). On this interrupt, we complete + * all the CCBs that should have been aborted by the + * target according to our message. + */ +static void sym_sir_task_recovery(hcb_p np, int num) +{ + SYM_QUEHEAD *qp; + ccb_p cp; + tcb_p tp; + int target=-1, lun=-1, task; + int i, k; + + switch(num) { + /* + * The SCRIPTS processor stopped before starting + * the next command in order to allow us to perform + * some task recovery. + */ + case SIR_SCRIPT_STOPPED: + /* + * Do we have any target to reset or unit to clear ? + */ + for (i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) { + tp = &np->target[i]; + if (tp->to_reset || + (tp->lun0p && tp->lun0p->to_clear)) { + target = i; + break; + } + if (!tp->lunmp) + continue; + for (k = 1 ; k < SYM_CONF_MAX_LUN ; k++) { + if (tp->lunmp[k] && tp->lunmp[k]->to_clear) { + target = i; + break; + } + } + if (target != -1) + break; + } + + /* + * If not, walk the busy queue for any + * disconnected CCB to be aborted. + */ + if (target == -1) { + FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { + cp = sym_que_entry(qp,struct sym_ccb,link_ccbq); + if (cp->host_status != HS_DISCONNECT) + continue; + if (cp->to_abort) { + target = cp->target; + break; + } + } + } + + /* + * If some target is to be selected, + * prepare and start the selection. + */ + if (target != -1) { + tp = &np->target[target]; + np->abrt_sel.sel_id = target; + np->abrt_sel.sel_scntl3 = tp->head.wval; + np->abrt_sel.sel_sxfer = tp->head.sval; + OUTL(nc_dsa, np->hcb_ba); + OUTL_DSP (SCRIPTB_BA (np, sel_for_abort)); + return; + } + + /* + * Now look for a CCB to abort that haven't started yet. + * Btw, the SCRIPTS processor is still stopped, so + * we are not in race. + */ + i = 0; + cp = 0; + FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + if (cp->host_status != HS_BUSY && + cp->host_status != HS_NEGOTIATE) + continue; + if (!cp->to_abort) + continue; +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If we are using IMMEDIATE ARBITRATION, we donnot + * want to cancel the last queued CCB, since the + * SCRIPTS may have anticipated the selection. + */ + if (cp == np->last_cp) { + cp->to_abort = 0; + continue; + } +#endif + i = 1; /* Means we have found some */ + break; + } + if (!i) { + /* + * We are done, so we donnot need + * to synchronize with the SCRIPTS anylonger. + * Remove the SEM flag from the ISTAT. + */ + np->istat_sem = 0; + OUTB (nc_istat, SIGP); + break; + } + /* + * Compute index of next position in the start + * queue the SCRIPTS intends to start and dequeue + * all CCBs for that device that haven't been started. + */ + i = (INL (nc_scratcha) - np->squeue_ba) / 4; + i = sym_dequeue_from_squeue(np, i, cp->target, cp->lun, -1); + + /* + * Make sure at least our IO to abort has been dequeued. + */ +#ifndef SYM_OPT_HANDLE_DEVICE_QUEUEING + assert(i && sym_get_cam_status(cp->cam_ccb) == CAM_REQUEUE_REQ); +#else + sym_remque(&cp->link_ccbq); + sym_insque_tail(&cp->link_ccbq, &np->comp_ccbq); +#endif + /* + * Keep track in cam status of the reason of the abort. + */ + if (cp->to_abort == 2) + sym_set_cam_status(cp->cam_ccb, CAM_CMD_TIMEOUT); + else + sym_set_cam_status(cp->cam_ccb, CAM_REQ_ABORTED); + + /* + * Complete with error everything that we have dequeued. + */ + sym_flush_comp_queue(np, 0); + break; + /* + * The SCRIPTS processor has selected a target + * we may have some manual recovery to perform for. + */ + case SIR_TARGET_SELECTED: + target = (INB (nc_sdid) & 0xf); + tp = &np->target[target]; + + np->abrt_tbl.addr = cpu_to_scr(vtobus(np->abrt_msg)); + + /* + * If the target is to be reset, prepare a + * M_RESET message and clear the to_reset flag + * since we donnot expect this operation to fail. + */ + if (tp->to_reset) { + np->abrt_msg[0] = M_RESET; + np->abrt_tbl.size = 1; + tp->to_reset = 0; + break; + } + + /* + * Otherwise, look for some logical unit to be cleared. + */ + if (tp->lun0p && tp->lun0p->to_clear) + lun = 0; + else if (tp->lunmp) { + for (k = 1 ; k < SYM_CONF_MAX_LUN ; k++) { + if (tp->lunmp[k] && tp->lunmp[k]->to_clear) { + lun = k; + break; + } + } + } + + /* + * If a logical unit is to be cleared, prepare + * an IDENTIFY(lun) + ABORT MESSAGE. + */ + if (lun != -1) { + lcb_p lp = sym_lp(np, tp, lun); + lp->to_clear = 0; /* We donnot expect to fail here */ + np->abrt_msg[0] = M_IDENTIFY | lun; + np->abrt_msg[1] = M_ABORT; + np->abrt_tbl.size = 2; + break; + } + + /* + * Otherwise, look for some disconnected job to + * abort for this target. + */ + i = 0; + cp = 0; + FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + if (cp->host_status != HS_DISCONNECT) + continue; + if (cp->target != target) + continue; + if (!cp->to_abort) + continue; + i = 1; /* Means we have some */ + break; + } + + /* + * If we have none, probably since the device has + * completed the command before we won abitration, + * send a M_ABORT message without IDENTIFY. + * According to the specs, the device must just + * disconnect the BUS and not abort any task. + */ + if (!i) { + np->abrt_msg[0] = M_ABORT; + np->abrt_tbl.size = 1; + break; + } + + /* + * We have some task to abort. + * Set the IDENTIFY(lun) + */ + np->abrt_msg[0] = M_IDENTIFY | cp->lun; + + /* + * If we want to abort an untagged command, we + * will send a IDENTIFY + M_ABORT. + * Otherwise (tagged command), we will send + * a IDENTITFY + task attributes + ABORT TAG. + */ + if (cp->tag == NO_TAG) { + np->abrt_msg[1] = M_ABORT; + np->abrt_tbl.size = 2; + } + else { + np->abrt_msg[1] = cp->scsi_smsg[1]; + np->abrt_msg[2] = cp->scsi_smsg[2]; + np->abrt_msg[3] = M_ABORT_TAG; + np->abrt_tbl.size = 4; + } + /* + * Keep track of software timeout condition, since the + * peripheral driver may not count retries on abort + * conditions not due to timeout. + */ + if (cp->to_abort == 2) + sym_set_cam_status(cp->cam_ccb, CAM_CMD_TIMEOUT); + cp->to_abort = 0; /* We donnot expect to fail here */ + break; + + /* + * The target has accepted our message and switched + * to BUS FREE phase as we expected. + */ + case SIR_ABORT_SENT: + target = (INB (nc_sdid) & 0xf); + tp = &np->target[target]; + + /* + ** If we didn't abort anything, leave here. + */ + if (np->abrt_msg[0] == M_ABORT) + break; + + /* + * If we sent a M_RESET, then a hardware reset has + * been performed by the target. + * - Reset everything to async 8 bit + * - Tell ourself to negotiate next time :-) + * - Prepare to clear all disconnected CCBs for + * this target from our task list (lun=task=-1) + */ + lun = -1; + task = -1; + if (np->abrt_msg[0] == M_RESET) { + tp->head.sval = 0; + tp->head.wval = np->rv_scntl3; + tp->head.uval = 0; + tp->tinfo.curr.period = 0; + tp->tinfo.curr.offset = 0; + tp->tinfo.curr.width = BUS_8_BIT; + tp->tinfo.curr.options = 0; + } + + /* + * Otherwise, check for the LUN and TASK(s) + * concerned by the cancelation. + * If it is not ABORT_TAG then it is CLEAR_QUEUE + * or an ABORT message :-) + */ + else { + lun = np->abrt_msg[0] & 0x3f; + if (np->abrt_msg[1] == M_ABORT_TAG) + task = np->abrt_msg[2]; + } + + /* + * Complete all the CCBs the device should have + * aborted due to our 'kiss of death' message. + */ + i = (INL (nc_scratcha) - np->squeue_ba) / 4; + (void) sym_dequeue_from_squeue(np, i, target, lun, -1); + (void) sym_clear_tasks(np, CAM_REQ_ABORTED, target, lun, task); + sym_flush_comp_queue(np, 0); + + /* + * If we sent a BDR, make upper layer aware of that. + */ + if (np->abrt_msg[0] == M_RESET) + sym_xpt_async_sent_bdr(np, target); + break; + } + + /* + * Print to the log the message we intend to send. + */ + if (num == SIR_TARGET_SELECTED) { + PRINT_TARGET(np, target); + sym_printl_hex("control msgout:", np->abrt_msg, + np->abrt_tbl.size); + np->abrt_tbl.size = cpu_to_scr(np->abrt_tbl.size); + } + + /* + * Let the SCRIPTS processor continue. + */ + OUTONB_STD (); +} + +/* + * Gerard's alchemy:) that deals with with the data + * pointer for both MDP and the residual calculation. + * + * I didn't want to bloat the code by more than 200 + * lignes for the handling of both MDP and the residual. + * This has been achieved by using a data pointer + * representation consisting in an index in the data + * array (dp_sg) and a negative offset (dp_ofs) that + * have the following meaning: + * + * - dp_sg = SYM_CONF_MAX_SG + * we are at the end of the data script. + * - dp_sg < SYM_CONF_MAX_SG + * dp_sg points to the next entry of the scatter array + * we want to transfer. + * - dp_ofs < 0 + * dp_ofs represents the residual of bytes of the + * previous entry scatter entry we will send first. + * - dp_ofs = 0 + * no residual to send first. + * + * The function sym_evaluate_dp() accepts an arbitray + * offset (basically from the MDP message) and returns + * the corresponding values of dp_sg and dp_ofs. + */ + +static int sym_evaluate_dp(hcb_p np, ccb_p cp, u32 scr, int *ofs) +{ + u32 dp_scr; + int dp_ofs, dp_sg, dp_sgmin; + int tmp; + struct sym_pmc *pm; + + /* + * Compute the resulted data pointer in term of a script + * address within some DATA script and a signed byte offset. + */ + dp_scr = scr; + dp_ofs = *ofs; + if (dp_scr == SCRIPTA_BA (np, pm0_data)) + pm = &cp->phys.pm0; + else if (dp_scr == SCRIPTA_BA (np, pm1_data)) + pm = &cp->phys.pm1; + else + pm = 0; + + if (pm) { + dp_scr = scr_to_cpu(pm->ret); + dp_ofs -= scr_to_cpu(pm->sg.size); + } + + /* + * If we are auto-sensing, then we are done. + */ + if (cp->host_flags & HF_SENSE) { + *ofs = dp_ofs; + return 0; + } + + /* + * Deduce the index of the sg entry. + * Keep track of the index of the first valid entry. + * If result is dp_sg = SYM_CONF_MAX_SG, then we are at the + * end of the data. + */ + tmp = scr_to_cpu(sym_goalp(cp)); + dp_sg = SYM_CONF_MAX_SG; + if (dp_scr != tmp) + dp_sg -= (tmp - 8 - (int)dp_scr) / (2*4); + dp_sgmin = SYM_CONF_MAX_SG - cp->segments; + + /* + * Move to the sg entry the data pointer belongs to. + * + * If we are inside the data area, we expect result to be: + * + * Either, + * dp_ofs = 0 and dp_sg is the index of the sg entry + * the data pointer belongs to (or the end of the data) + * Or, + * dp_ofs < 0 and dp_sg is the index of the sg entry + * the data pointer belongs to + 1. + */ + if (dp_ofs < 0) { + int n; + while (dp_sg > dp_sgmin) { + --dp_sg; + tmp = scr_to_cpu(cp->phys.data[dp_sg].size); + n = dp_ofs + (tmp & 0xffffff); + if (n > 0) { + ++dp_sg; + break; + } + dp_ofs = n; + } + } + else if (dp_ofs > 0) { + while (dp_sg < SYM_CONF_MAX_SG) { + tmp = scr_to_cpu(cp->phys.data[dp_sg].size); + dp_ofs -= (tmp & 0xffffff); + ++dp_sg; + if (dp_ofs <= 0) + break; + } + } + + /* + * Make sure the data pointer is inside the data area. + * If not, return some error. + */ + if (dp_sg < dp_sgmin || (dp_sg == dp_sgmin && dp_ofs < 0)) + goto out_err; + else if (dp_sg > SYM_CONF_MAX_SG || + (dp_sg == SYM_CONF_MAX_SG && dp_ofs > 0)) + goto out_err; + + /* + * Save the extreme pointer if needed. + */ + if (dp_sg > cp->ext_sg || + (dp_sg == cp->ext_sg && dp_ofs > cp->ext_ofs)) { + cp->ext_sg = dp_sg; + cp->ext_ofs = dp_ofs; + } + + /* + * Return data. + */ + *ofs = dp_ofs; + return dp_sg; + +out_err: + return -1; +} + +/* + * chip handler for MODIFY DATA POINTER MESSAGE + * + * We also call this function on IGNORE WIDE RESIDUE + * messages that do not match a SWIDE full condition. + * Btw, we assume in that situation that such a message + * is equivalent to a MODIFY DATA POINTER (offset=-1). + */ + +static void sym_modify_dp(hcb_p np, tcb_p tp, ccb_p cp, int ofs) +{ + int dp_ofs = ofs; + u32 dp_scr = sym_get_script_dp (np, cp); + u32 dp_ret; + u32 tmp; + u_char hflags; + int dp_sg; + struct sym_pmc *pm; + + /* + * Not supported for auto-sense. + */ + if (cp->host_flags & HF_SENSE) + goto out_reject; + + /* + * Apply our alchemy:) (see comments in sym_evaluate_dp()), + * to the resulted data pointer. + */ + dp_sg = sym_evaluate_dp(np, cp, dp_scr, &dp_ofs); + if (dp_sg < 0) + goto out_reject; + + /* + * And our alchemy:) allows to easily calculate the data + * script address we want to return for the next data phase. + */ + dp_ret = cpu_to_scr(sym_goalp(cp)); + dp_ret = dp_ret - 8 - (SYM_CONF_MAX_SG - dp_sg) * (2*4); + + /* + * If offset / scatter entry is zero we donnot need + * a context for the new current data pointer. + */ + if (dp_ofs == 0) { + dp_scr = dp_ret; + goto out_ok; + } + + /* + * Get a context for the new current data pointer. + */ + hflags = INB (HF_PRT); + + if (hflags & HF_DP_SAVED) + hflags ^= HF_ACT_PM; + + if (!(hflags & HF_ACT_PM)) { + pm = &cp->phys.pm0; + dp_scr = SCRIPTA_BA (np, pm0_data); + } + else { + pm = &cp->phys.pm1; + dp_scr = SCRIPTA_BA (np, pm1_data); + } + + hflags &= ~(HF_DP_SAVED); + + OUTB (HF_PRT, hflags); + + /* + * Set up the new current data pointer. + * ofs < 0 there, and for the next data phase, we + * want to transfer part of the data of the sg entry + * corresponding to index dp_sg-1 prior to returning + * to the main data script. + */ + pm->ret = cpu_to_scr(dp_ret); + tmp = scr_to_cpu(cp->phys.data[dp_sg-1].addr); + tmp += scr_to_cpu(cp->phys.data[dp_sg-1].size) + dp_ofs; + pm->sg.addr = cpu_to_scr(tmp); + pm->sg.size = cpu_to_scr(-dp_ofs); + +out_ok: + sym_set_script_dp (np, cp, dp_scr); + OUTL_DSP (SCRIPTA_BA (np, clrack)); + return; + +out_reject: + OUTL_DSP (SCRIPTB_BA (np, msg_bad)); +} + + +/* + * chip calculation of the data residual. + * + * As I used to say, the requirement of data residual + * in SCSI is broken, useless and cannot be achieved + * without huge complexity. + * But most OSes and even the official CAM require it. + * When stupidity happens to be so widely spread inside + * a community, it gets hard to convince. + * + * Anyway, I don't care, since I am not going to use + * any software that considers this data residual as + * a relevant information. :) + */ + +int sym_compute_residual(hcb_p np, ccb_p cp) +{ + int dp_sg, dp_sgmin, resid = 0; + int dp_ofs = 0; + + /* + * Check for some data lost or just thrown away. + * We are not required to be quite accurate in this + * situation. Btw, if we are odd for output and the + * device claims some more data, it may well happen + * than our residual be zero. :-) + */ + if (cp->xerr_status & (XE_EXTRA_DATA|XE_SODL_UNRUN|XE_SWIDE_OVRUN)) { + if (cp->xerr_status & XE_EXTRA_DATA) + resid -= cp->extra_bytes; + if (cp->xerr_status & XE_SODL_UNRUN) + ++resid; + if (cp->xerr_status & XE_SWIDE_OVRUN) + --resid; + } + + /* + * If all data has been transferred, + * there is no residual. + */ + if (cp->phys.head.lastp == sym_goalp(cp)) + return resid; + + /* + * If no data transfer occurs, or if the data + * pointer is weird, return full residual. + */ + if (cp->startp == cp->phys.head.lastp || + sym_evaluate_dp(np, cp, scr_to_cpu(cp->phys.head.lastp), + &dp_ofs) < 0) { + return cp->data_len; + } + + /* + * If we were auto-sensing, then we are done. + */ + if (cp->host_flags & HF_SENSE) { + return -dp_ofs; + } + + /* + * We are now full comfortable in the computation + * of the data residual (2's complement). + */ + dp_sgmin = SYM_CONF_MAX_SG - cp->segments; + resid = -cp->ext_ofs; + for (dp_sg = cp->ext_sg; dp_sg < SYM_CONF_MAX_SG; ++dp_sg) { + u_int tmp = scr_to_cpu(cp->phys.data[dp_sg].size); + resid += (tmp & 0xffffff); + } + + /* + * Hopefully, the result is not too wrong. + */ + return resid; +} + +/* + * Negotiation for WIDE and SYNCHRONOUS DATA TRANSFER. + * + * When we try to negotiate, we append the negotiation message + * to the identify and (maybe) simple tag message. + * The host status field is set to HS_NEGOTIATE to mark this + * situation. + * + * If the target doesn't answer this message immediately + * (as required by the standard), the SIR_NEGO_FAILED interrupt + * will be raised eventually. + * The handler removes the HS_NEGOTIATE status, and sets the + * negotiated value to the default (async / nowide). + * + * If we receive a matching answer immediately, we check it + * for validity, and set the values. + * + * If we receive a Reject message immediately, we assume the + * negotiation has failed, and fall back to standard values. + * + * If we receive a negotiation message while not in HS_NEGOTIATE + * state, it's a target initiated negotiation. We prepare a + * (hopefully) valid answer, set our parameters, and send back + * this answer to the target. + * + * If the target doesn't fetch the answer (no message out phase), + * we assume the negotiation has failed, and fall back to default + * settings (SIR_NEGO_PROTO interrupt). + * + * When we set the values, we adjust them in all ccbs belonging + * to this target, in the controller's register, and in the "phys" + * field of the controller's struct sym_hcb. + */ + +/* + * chip handler for SYNCHRONOUS DATA TRANSFER REQUEST (SDTR) message. + */ +static int +sym_sync_nego_check(hcb_p np, int req, int target) +{ + tcb_p tp = &np->target[target]; + u_char chg, ofs, per, fak, div; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, target, "sync msgin", np->msgin); + }; + + /* + * Get requested values. + */ + chg = 0; + per = np->msgin[3]; + ofs = np->msgin[4]; + + /* + * Check values against our limits. + */ + if (ofs) { + if (ofs > np->maxoffs) + {chg = 1; ofs = np->maxoffs;} + if (req) { + if (ofs > tp->tinfo.user.offset) + {chg = 1; ofs = tp->tinfo.user.offset;} + } + } + + if (ofs) { + if (per < np->minsync) + {chg = 1; per = np->minsync;} + if (req) { + if (per < tp->tinfo.user.period) + {chg = 1; per = tp->tinfo.user.period;} + } + } + + /* + * Get new chip synchronous parameters value. + */ + div = fak = 0; + if (ofs && sym_getsync(np, 0, per, &div, &fak) < 0) + goto reject_it; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + PRINT_TARGET(np, target); + printf ("sdtr: ofs=%d per=%d div=%d fak=%d chg=%d.\n", + ofs, per, div, fak, chg); + } + + /* + * If it was an answer we want to change, + * then it isn't acceptable. Reject it. + */ + if (!req && chg) + goto reject_it; + + /* + * Apply new values. + */ + sym_setsync (np, target, ofs, per, div, fak); + + /* + * It was an answer. We are done. + */ + if (!req) + return 0; + + /* + * It was a request. Prepare an answer message. + */ + np->msgout[0] = M_EXTENDED; + np->msgout[1] = 3; + np->msgout[2] = M_X_SYNC_REQ; + np->msgout[3] = per; + np->msgout[4] = ofs; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, target, "sync msgout", np->msgout); + } + + np->msgin [0] = M_NOOP; + + return 0; + +reject_it: + sym_setsync (np, target, 0, 0, 0, 0); + return -1; +} + +static void sym_sync_nego(hcb_p np, tcb_p tp, ccb_p cp) +{ + int req = 1; + int result; + + /* + * Request or answer ? + */ + if (INB (HS_PRT) == HS_NEGOTIATE) { + OUTB (HS_PRT, HS_BUSY); + if (cp->nego_status && cp->nego_status != NS_SYNC) + goto reject_it; + req = 0; + } + + /* + * Check and apply new values. + */ + result = sym_sync_nego_check(np, req, cp->target); + if (result) /* Not acceptable, reject it */ + goto reject_it; + if (req) { /* Was a request, send response. */ + cp->nego_status = NS_SYNC; + OUTL_DSP (SCRIPTB_BA (np, sdtr_resp)); + } + else /* Was a response, we are done. */ + OUTL_DSP (SCRIPTA_BA (np, clrack)); + return; + +reject_it: + OUTL_DSP (SCRIPTB_BA (np, msg_bad)); +} + +/* + * chip handler for PARALLEL PROTOCOL REQUEST (PPR) message. + */ +static int +sym_ppr_nego_check(hcb_p np, int req, int target) +{ + tcb_p tp = &np->target[target]; + u_char chg, ofs, per, fak, dt, div, wide; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, target, "ppr msgin", np->msgin); + }; + + /* + * Get requested values. + */ + chg = 0; + per = np->msgin[3]; + ofs = np->msgin[5]; + wide = np->msgin[6]; + dt = np->msgin[7] & PPR_OPT_DT; + + /* + * Check values against our limits. + */ + if (wide > np->maxwide) + {chg = 1; wide = np->maxwide;} + if (!wide || !(np->features & FE_ULTRA3)) + dt &= ~PPR_OPT_DT; + if (req) { + if (wide > tp->tinfo.user.width) + {chg = 1; wide = tp->tinfo.user.width;} + } + + if (!(np->features & FE_U3EN)) /* Broken U3EN bit not supported */ + dt &= ~PPR_OPT_DT; + + if (dt != (np->msgin[7] & PPR_OPT_MASK)) chg = 1; + + if (ofs) { + if (dt) { + if (ofs > np->maxoffs_dt) + {chg = 1; ofs = np->maxoffs_dt;} + } + else if (ofs > np->maxoffs) + {chg = 1; ofs = np->maxoffs;} + if (req) { + if (ofs > tp->tinfo.user.offset) + {chg = 1; ofs = tp->tinfo.user.offset;} + } + } + + if (ofs) { + if (dt) { + if (per < np->minsync_dt) + {chg = 1; per = np->minsync_dt;} + } + else if (per < np->minsync) + {chg = 1; per = np->minsync;} + if (req) { + if (per < tp->tinfo.user.period) + {chg = 1; per = tp->tinfo.user.period;} + } + } + + /* + * Get new chip synchronous parameters value. + */ + div = fak = 0; + if (ofs && sym_getsync(np, dt, per, &div, &fak) < 0) + goto reject_it; + + /* + * If it was an answer we want to change, + * then it isn't acceptable. Reject it. + */ + if (!req && chg) + goto reject_it; + + /* + * Apply new values. + */ + sym_setpprot (np, target, dt, ofs, per, wide, div, fak); + + /* + * It was an answer. We are done. + */ + if (!req) + return 0; + + /* + * It was a request. Prepare an answer message. + */ + np->msgout[0] = M_EXTENDED; + np->msgout[1] = 6; + np->msgout[2] = M_X_PPR_REQ; + np->msgout[3] = per; + np->msgout[4] = 0; + np->msgout[5] = ofs; + np->msgout[6] = wide; + np->msgout[7] = dt; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, target, "ppr msgout", np->msgout); + } + + np->msgin [0] = M_NOOP; + + return 0; + +reject_it: + sym_setpprot (np, target, 0, 0, 0, 0, 0, 0); + /* + * If it is a device response that should result in + * ST, we may want to try a legacy negotiation later. + */ + if (!req && !dt) { + tp->tinfo.goal.options = 0; + tp->tinfo.goal.width = wide; + tp->tinfo.goal.period = per; + tp->tinfo.goal.offset = ofs; + } + return -1; +} + +static void sym_ppr_nego(hcb_p np, tcb_p tp, ccb_p cp) +{ + int req = 1; + int result; + + /* + * Request or answer ? + */ + if (INB (HS_PRT) == HS_NEGOTIATE) { + OUTB (HS_PRT, HS_BUSY); + if (cp->nego_status && cp->nego_status != NS_PPR) + goto reject_it; + req = 0; + } + + /* + * Check and apply new values. + */ + result = sym_ppr_nego_check(np, req, cp->target); + if (result) /* Not acceptable, reject it */ + goto reject_it; + if (req) { /* Was a request, send response. */ + cp->nego_status = NS_PPR; + OUTL_DSP (SCRIPTB_BA (np, ppr_resp)); + } + else /* Was a response, we are done. */ + OUTL_DSP (SCRIPTA_BA (np, clrack)); + return; + +reject_it: + OUTL_DSP (SCRIPTB_BA (np, msg_bad)); +} + +/* + * chip handler for WIDE DATA TRANSFER REQUEST (WDTR) message. + */ +static int +sym_wide_nego_check(hcb_p np, int req, int target) +{ + tcb_p tp = &np->target[target]; + u_char chg, wide; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, target, "wide msgin", np->msgin); + }; + + /* + * Get requested values. + */ + chg = 0; + wide = np->msgin[3]; + + /* + * Check values against our limits. + */ + if (wide > np->maxwide) + {chg = 1; wide = np->maxwide;} + if (req) { + if (wide > tp->tinfo.user.width) + {chg = 1; wide = tp->tinfo.user.width;} + } + + if (DEBUG_FLAGS & DEBUG_NEGO) { + PRINT_TARGET(np, target); + printf ("wdtr: wide=%d chg=%d.\n", wide, chg); + } + + /* + * If it was an answer we want to change, + * then it isn't acceptable. Reject it. + */ + if (!req && chg) + goto reject_it; + + /* + * Apply new values. + */ + sym_setwide (np, target, wide); + + /* + * It was an answer. We are done. + */ + if (!req) + return 0; + + /* + * It was a request. Prepare an answer message. + */ + np->msgout[0] = M_EXTENDED; + np->msgout[1] = 2; + np->msgout[2] = M_X_WIDE_REQ; + np->msgout[3] = wide; + + np->msgin [0] = M_NOOP; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, target, "wide msgout", np->msgout); + } + + return 0; + +reject_it: + return -1; +} + +static void sym_wide_nego(hcb_p np, tcb_p tp, ccb_p cp) +{ + int req = 1; + int result; + + /* + * Request or answer ? + */ + if (INB (HS_PRT) == HS_NEGOTIATE) { + OUTB (HS_PRT, HS_BUSY); + if (cp->nego_status && cp->nego_status != NS_WIDE) + goto reject_it; + req = 0; + } + + /* + * Check and apply new values. + */ + result = sym_wide_nego_check(np, req, cp->target); + if (result) /* Not acceptable, reject it */ + goto reject_it; + if (req) { /* Was a request, send response. */ + cp->nego_status = NS_WIDE; + OUTL_DSP (SCRIPTB_BA (np, wdtr_resp)); + } + else { /* Was a response. */ + /* + * Negotiate for SYNC immediately after WIDE response. + * This allows to negotiate for both WIDE and SYNC on + * a single SCSI command (Suggested by Justin Gibbs). + */ + if (tp->tinfo.goal.offset) { + np->msgout[0] = M_EXTENDED; + np->msgout[1] = 3; + np->msgout[2] = M_X_SYNC_REQ; + np->msgout[3] = tp->tinfo.goal.period; + np->msgout[4] = tp->tinfo.goal.offset; + + if (DEBUG_FLAGS & DEBUG_NEGO) { + sym_print_nego_msg(np, cp->target, + "sync msgout", np->msgout); + } + + cp->nego_status = NS_SYNC; + OUTB (HS_PRT, HS_NEGOTIATE); + OUTL_DSP (SCRIPTB_BA (np, sdtr_resp)); + return; + } + else + OUTL_DSP (SCRIPTA_BA (np, clrack)); + }; + + return; + +reject_it: + OUTL_DSP (SCRIPTB_BA (np, msg_bad)); +} + +/* + * Reset DT, SYNC or WIDE to default settings. + * + * Called when a negotiation does not succeed either + * on rejection or on protocol error. + * + * A target that understands a PPR message should never + * reject it, and messing with it is very unlikely. + * So, if a PPR makes problems, we may just want to + * try a legacy negotiation later. + */ +static void sym_nego_default(hcb_p np, tcb_p tp, ccb_p cp) +{ + switch (cp->nego_status) { + case NS_PPR: +#if 0 + sym_setpprot (np, cp->target, 0, 0, 0, 0, 0, 0); +#else + tp->tinfo.goal.options = 0; + if (tp->tinfo.goal.period < np->minsync) + tp->tinfo.goal.period = np->minsync; + if (tp->tinfo.goal.offset > np->maxoffs) + tp->tinfo.goal.offset = np->maxoffs; +#endif + break; + case NS_SYNC: + sym_setsync (np, cp->target, 0, 0, 0, 0); + break; + case NS_WIDE: + sym_setwide (np, cp->target, 0); + break; + }; + np->msgin [0] = M_NOOP; + np->msgout[0] = M_NOOP; + cp->nego_status = 0; +} + +/* + * chip handler for MESSAGE REJECT received in response to + * PPR, WIDE or SYNCHRONOUS negotiation. + */ +static void sym_nego_rejected(hcb_p np, tcb_p tp, ccb_p cp) +{ + sym_nego_default(np, tp, cp); + OUTB (HS_PRT, HS_BUSY); +} + +/* + * chip exception handler for programmed interrupts. + */ +static void sym_int_sir (hcb_p np) +{ + u_char num = INB (nc_dsps); + u32 dsa = INL (nc_dsa); + ccb_p cp = sym_ccb_from_dsa(np, dsa); + u_char target = INB (nc_sdid) & 0x0f; + tcb_p tp = &np->target[target]; + int tmp; + + if (DEBUG_FLAGS & DEBUG_TINY) printf ("I#%d", num); + + switch (num) { +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 + /* + * SCRIPTS tell us that we may have to update + * 64 bit DMA segment registers. + */ + case SIR_DMAP_DIRTY: + sym_update_dmap_regs(np); + goto out; +#endif + /* + * Command has been completed with error condition + * or has been auto-sensed. + */ + case SIR_COMPLETE_ERROR: + sym_complete_error(np, cp); + return; + /* + * The C code is currently trying to recover from something. + * Typically, user want to abort some command. + */ + case SIR_SCRIPT_STOPPED: + case SIR_TARGET_SELECTED: + case SIR_ABORT_SENT: + sym_sir_task_recovery(np, num); + return; + /* + * The device didn't go to MSG OUT phase after having + * been selected with ATN. We donnot want to handle + * that. + */ + case SIR_SEL_ATN_NO_MSG_OUT: + printf ("%s:%d: No MSG OUT phase after selection with ATN.\n", + sym_name (np), target); + goto out_stuck; + /* + * The device didn't switch to MSG IN phase after + * having reseleted the initiator. + */ + case SIR_RESEL_NO_MSG_IN: + printf ("%s:%d: No MSG IN phase after reselection.\n", + sym_name (np), target); + goto out_stuck; + /* + * After reselection, the device sent a message that wasn't + * an IDENTIFY. + */ + case SIR_RESEL_NO_IDENTIFY: + printf ("%s:%d: No IDENTIFY after reselection.\n", + sym_name (np), target); + goto out_stuck; + /* + * The device reselected a LUN we donnot know about. + */ + case SIR_RESEL_BAD_LUN: + np->msgout[0] = M_RESET; + goto out; + /* + * The device reselected for an untagged nexus and we + * haven't any. + */ + case SIR_RESEL_BAD_I_T_L: + np->msgout[0] = M_ABORT; + goto out; + /* + * The device reselected for a tagged nexus that we donnot + * have. + */ + case SIR_RESEL_BAD_I_T_L_Q: + np->msgout[0] = M_ABORT_TAG; + goto out; + /* + * The SCRIPTS let us know that the device has grabbed + * our message and will abort the job. + */ + case SIR_RESEL_ABORTED: + np->lastmsg = np->msgout[0]; + np->msgout[0] = M_NOOP; + printf ("%s:%d: message %x sent on bad reselection.\n", + sym_name (np), target, np->lastmsg); + goto out; + /* + * The SCRIPTS let us know that a message has been + * successfully sent to the device. + */ + case SIR_MSG_OUT_DONE: + np->lastmsg = np->msgout[0]; + np->msgout[0] = M_NOOP; + /* Should we really care of that */ + if (np->lastmsg == M_PARITY || np->lastmsg == M_ID_ERROR) { + if (cp) { + cp->xerr_status &= ~XE_PARITY_ERR; + if (!cp->xerr_status) + OUTOFFB (HF_PRT, HF_EXT_ERR); + } + } + goto out; + /* + * The device didn't send a GOOD SCSI status. + * We may have some work to do prior to allow + * the SCRIPTS processor to continue. + */ + case SIR_BAD_SCSI_STATUS: + if (!cp) + goto out; + sym_sir_bad_scsi_status(np, num, cp); + return; + /* + * We are asked by the SCRIPTS to prepare a + * REJECT message. + */ + case SIR_REJECT_TO_SEND: + sym_print_msg(cp, "M_REJECT to send for ", np->msgin); + np->msgout[0] = M_REJECT; + goto out; + /* + * We have been ODD at the end of a DATA IN + * transfer and the device didn't send a + * IGNORE WIDE RESIDUE message. + * It is a data overrun condition. + */ + case SIR_SWIDE_OVERRUN: + if (cp) { + OUTONB (HF_PRT, HF_EXT_ERR); + cp->xerr_status |= XE_SWIDE_OVRUN; + } + goto out; + /* + * We have been ODD at the end of a DATA OUT + * transfer. + * It is a data underrun condition. + */ + case SIR_SODL_UNDERRUN: + if (cp) { + OUTONB (HF_PRT, HF_EXT_ERR); + cp->xerr_status |= XE_SODL_UNRUN; + } + goto out; + /* + * The device wants us to tranfer more data than + * expected or in the wrong direction. + * The number of extra bytes is in scratcha. + * It is a data overrun condition. + */ + case SIR_DATA_OVERRUN: + if (cp) { + OUTONB (HF_PRT, HF_EXT_ERR); + cp->xerr_status |= XE_EXTRA_DATA; + cp->extra_bytes += INL (nc_scratcha); + } + goto out; + /* + * The device switched to an illegal phase (4/5). + */ + case SIR_BAD_PHASE: + if (cp) { + OUTONB (HF_PRT, HF_EXT_ERR); + cp->xerr_status |= XE_BAD_PHASE; + } + goto out; + /* + * We received a message. + */ + case SIR_MSG_RECEIVED: + if (!cp) + goto out_stuck; + switch (np->msgin [0]) { + /* + * We received an extended message. + * We handle MODIFY DATA POINTER, SDTR, WDTR + * and reject all other extended messages. + */ + case M_EXTENDED: + switch (np->msgin [2]) { + case M_X_MODIFY_DP: + if (DEBUG_FLAGS & DEBUG_POINTER) + sym_print_msg(cp,"modify DP",np->msgin); + tmp = (np->msgin[3]<<24) + (np->msgin[4]<<16) + + (np->msgin[5]<<8) + (np->msgin[6]); + sym_modify_dp(np, tp, cp, tmp); + return; + case M_X_SYNC_REQ: + sym_sync_nego(np, tp, cp); + return; + case M_X_PPR_REQ: + sym_ppr_nego(np, tp, cp); + return; + case M_X_WIDE_REQ: + sym_wide_nego(np, tp, cp); + return; + default: + goto out_reject; + } + break; + /* + * We received a 1/2 byte message not handled from SCRIPTS. + * We are only expecting MESSAGE REJECT and IGNORE WIDE + * RESIDUE messages that haven't been anticipated by + * SCRIPTS on SWIDE full condition. Unanticipated IGNORE + * WIDE RESIDUE messages are aliased as MODIFY DP (-1). + */ + case M_IGN_RESIDUE: + if (DEBUG_FLAGS & DEBUG_POINTER) + sym_print_msg(cp,"ign wide residue", np->msgin); + sym_modify_dp(np, tp, cp, -1); + return; + case M_REJECT: + if (INB (HS_PRT) == HS_NEGOTIATE) + sym_nego_rejected(np, tp, cp); + else { + PRINT_ADDR(cp); + printf ("M_REJECT received (%x:%x).\n", + scr_to_cpu(np->lastmsg), np->msgout[0]); + } + goto out_clrack; + break; + default: + goto out_reject; + } + break; + /* + * We received an unknown message. + * Ignore all MSG IN phases and reject it. + */ + case SIR_MSG_WEIRD: + sym_print_msg(cp, "WEIRD message received", np->msgin); + OUTL_DSP (SCRIPTB_BA (np, msg_weird)); + return; + /* + * Negotiation failed. + * Target does not send us the reply. + * Remove the HS_NEGOTIATE status. + */ + case SIR_NEGO_FAILED: + OUTB (HS_PRT, HS_BUSY); + /* + * Negotiation failed. + * Target does not want answer message. + */ + case SIR_NEGO_PROTO: + sym_nego_default(np, tp, cp); + goto out; + }; + +out: + OUTONB_STD (); + return; +out_reject: + OUTL_DSP (SCRIPTB_BA (np, msg_bad)); + return; +out_clrack: + OUTL_DSP (SCRIPTA_BA (np, clrack)); + return; +out_stuck: +} + +/* + * Acquire a control block + */ +ccb_p sym_get_ccb (hcb_p np, u_char tn, u_char ln, u_char tag_order) +{ + tcb_p tp = &np->target[tn]; + lcb_p lp = sym_lp(np, tp, ln); + u_short tag = NO_TAG; + SYM_QUEHEAD *qp; + ccb_p cp = (ccb_p) 0; + + /* + * Look for a free CCB + */ + if (sym_que_empty(&np->free_ccbq)) + (void) sym_alloc_ccb(np); + qp = sym_remque_head(&np->free_ccbq); + if (!qp) + goto out; + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + +#ifndef SYM_OPT_HANDLE_DEVICE_QUEUEING + /* + * If the LCB is not yet available and the LUN + * has been probed ok, try to allocate the LCB. + */ + if (!lp && sym_is_bit(tp->lun_map, ln)) { + lp = sym_alloc_lcb(np, tn, ln); + if (!lp) + goto out_free; + } +#endif + + /* + * If the LCB is not available here, then the + * logical unit is not yet discovered. For those + * ones only accept 1 SCSI IO per logical unit, + * since we cannot allow disconnections. + */ + if (!lp) { + if (!sym_is_bit(tp->busy0_map, ln)) + sym_set_bit(tp->busy0_map, ln); + else + goto out_free; + } else { + /* + * If we have been asked for a tagged command. + */ + if (tag_order) { + /* + * Debugging purpose. + */ +#ifndef SYM_OPT_HANDLE_DEVICE_QUEUEING + assert(lp->busy_itl == 0); +#endif + /* + * Allocate resources for tags if not yet. + */ + if (!lp->cb_tags) { + sym_alloc_lcb_tags(np, tn, ln); + if (!lp->cb_tags) + goto out_free; + } + /* + * Get a tag for this SCSI IO and set up + * the CCB bus address for reselection, + * and count it for this LUN. + * Toggle reselect path to tagged. + */ + if (lp->busy_itlq < SYM_CONF_MAX_TASK) { + tag = lp->cb_tags[lp->ia_tag]; + if (++lp->ia_tag == SYM_CONF_MAX_TASK) + lp->ia_tag = 0; + ++lp->busy_itlq; +#ifndef SYM_OPT_HANDLE_DEVICE_QUEUEING + lp->itlq_tbl[tag] = cpu_to_scr(cp->ccb_ba); + lp->head.resel_sa = + cpu_to_scr(SCRIPTA_BA (np, resel_tag)); +#endif +#ifdef SYM_OPT_LIMIT_COMMAND_REORDERING + cp->tags_si = lp->tags_si; + ++lp->tags_sum[cp->tags_si]; + ++lp->tags_since; +#endif + } + else + goto out_free; + } + /* + * This command will not be tagged. + * If we already have either a tagged or untagged + * one, refuse to overlap this untagged one. + */ + else { + /* + * Debugging purpose. + */ +#ifndef SYM_OPT_HANDLE_DEVICE_QUEUEING + assert(lp->busy_itl == 0 && lp->busy_itlq == 0); +#endif + /* + * Count this nexus for this LUN. + * Set up the CCB bus address for reselection. + * Toggle reselect path to untagged. + */ + ++lp->busy_itl; +#ifndef SYM_OPT_HANDLE_DEVICE_QUEUEING + if (lp->busy_itl == 1) { + lp->head.itl_task_sa = cpu_to_scr(cp->ccb_ba); + lp->head.resel_sa = + cpu_to_scr(SCRIPTA_BA (np, resel_no_tag)); + } + else + goto out_free; +#endif + } + } + /* + * Put the CCB into the busy queue. + */ + sym_insque_tail(&cp->link_ccbq, &np->busy_ccbq); +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + if (lp) { + sym_remque(&cp->link2_ccbq); + sym_insque_tail(&cp->link2_ccbq, &lp->waiting_ccbq); + } + +#endif + /* + * Remember all informations needed to free this CCB. + */ + cp->to_abort = 0; + cp->tag = tag; + cp->order = tag_order; + cp->target = tn; + cp->lun = ln; + + if (DEBUG_FLAGS & DEBUG_TAGS) { + PRINT_LUN(np, tn, ln); + printf ("ccb @%p using tag %d.\n", cp, tag); + } + +out: + return cp; +out_free: + sym_insque_head(&cp->link_ccbq, &np->free_ccbq); + return (ccb_p) 0; +} + +/* + * Release one control block + */ +void sym_free_ccb (hcb_p np, ccb_p cp) +{ + tcb_p tp = &np->target[cp->target]; + lcb_p lp = sym_lp(np, tp, cp->lun); + + if (DEBUG_FLAGS & DEBUG_TAGS) { + PRINT_LUN(np, cp->target, cp->lun); + printf ("ccb @%p freeing tag %d.\n", cp, cp->tag); + } + + /* + * If LCB available, + */ + if (lp) { + /* + * If tagged, release the tag, set the relect path + */ + if (cp->tag != NO_TAG) { +#ifdef SYM_OPT_LIMIT_COMMAND_REORDERING + --lp->tags_sum[cp->tags_si]; +#endif + /* + * Free the tag value. + */ + lp->cb_tags[lp->if_tag] = cp->tag; + if (++lp->if_tag == SYM_CONF_MAX_TASK) + lp->if_tag = 0; + /* + * Make the reselect path invalid, + * and uncount this CCB. + */ + lp->itlq_tbl[cp->tag] = cpu_to_scr(np->bad_itlq_ba); + --lp->busy_itlq; + } else { /* Untagged */ + /* + * Make the reselect path invalid, + * and uncount this CCB. + */ + lp->head.itl_task_sa = cpu_to_scr(np->bad_itl_ba); + --lp->busy_itl; + } + /* + * If no JOB active, make the LUN reselect path invalid. + */ + if (lp->busy_itlq == 0 && lp->busy_itl == 0) + lp->head.resel_sa = + cpu_to_scr(SCRIPTB_BA (np, resel_bad_lun)); + } + /* + * Otherwise, we only accept 1 IO per LUN. + * Clear the bit that keeps track of this IO. + */ + else + sym_clr_bit(tp->busy0_map, cp->lun); + + /* + * We donnot queue more than 1 ccb per target + * with negotiation at any time. If this ccb was + * used for negotiation, clear this info in the tcb. + */ + if (cp == tp->nego_cp) + tp->nego_cp = 0; + +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If we just complete the last queued CCB, + * clear this info that is no longer relevant. + */ + if (cp == np->last_cp) + np->last_cp = 0; +#endif + + /* + * Unmap user data from DMA map if needed. + */ + sym_data_dmamap_unload(np, cp); + + /* + * Make this CCB available. + */ + cp->cam_ccb = 0; + cp->host_status = HS_IDLE; + sym_remque(&cp->link_ccbq); + sym_insque_head(&cp->link_ccbq, &np->free_ccbq); + +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + /* + * Cancel any pending timeout condition. + */ + sym_untimeout_ccb(np, cp); +#endif + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + if (lp) { + sym_remque(&cp->link2_ccbq); + sym_insque_tail(&cp->link2_ccbq, &np->dummy_ccbq); + if (cp->started) { + if (cp->tag != NO_TAG) + --lp->started_tags; + else + --lp->started_no_tag; + } + } + cp->started = 0; +#endif +} + +/* + * Allocate a CCB from memory and initialize its fixed part. + */ +static ccb_p sym_alloc_ccb(hcb_p np) +{ + ccb_p cp = 0; + int hcode; + + /* + * Prevent from allocating more CCBs than we can + * queue to the controller. + */ + if (np->actccbs >= SYM_CONF_MAX_START) + return 0; + + /* + * Allocate memory for this CCB. + */ + cp = sym_calloc_dma(sizeof(struct sym_ccb), "CCB"); + if (!cp) + goto out_free; + + /* + * Allocate a bounce buffer for sense data. + */ + cp->sns_bbuf = sym_calloc_dma(SYM_SNS_BBUF_LEN, "SNS_BBUF"); + if (!cp->sns_bbuf) + goto out_free; + + /* + * Allocate a map for the DMA of user data. + */ + if (sym_data_dmamap_create(np, cp)) + goto out_free; + + /* + * Count it. + */ + np->actccbs++; + + /* + * Compute the bus address of this ccb. + */ + cp->ccb_ba = vtobus(cp); + + /* + * Insert this ccb into the hashed list. + */ + hcode = CCB_HASH_CODE(cp->ccb_ba); + cp->link_ccbh = np->ccbh[hcode]; + np->ccbh[hcode] = cp; + + /* + * Initialyze the start and restart actions. + */ + cp->phys.head.go.start = cpu_to_scr(SCRIPTA_BA (np, idle)); + cp->phys.head.go.restart = cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l)); + + /* + * Initilialyze some other fields. + */ + cp->phys.smsg_ext.addr = cpu_to_scr(HCB_BA(np, msgin[2])); + + /* + * Chain into free ccb queue. + */ + sym_insque_head(&cp->link_ccbq, &np->free_ccbq); + + /* + * Chain into optionnal lists. + */ +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + sym_insque_head(&cp->tmo_linkq, &np->tmo0_ccbq); +#endif +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + sym_insque_head(&cp->link2_ccbq, &np->dummy_ccbq); +#endif + return cp; +out_free: + if (cp) { + if (cp->sns_bbuf) + sym_mfree_dma(cp->sns_bbuf,SYM_SNS_BBUF_LEN,"SNS_BBUF"); + sym_mfree_dma(cp, sizeof(*cp), "CCB"); + } + return 0; +} + +/* + * Look up a CCB from a DSA value. + */ +static ccb_p sym_ccb_from_dsa(hcb_p np, u32 dsa) +{ + int hcode; + ccb_p cp; + + hcode = CCB_HASH_CODE(dsa); + cp = np->ccbh[hcode]; + while (cp) { + if (cp->ccb_ba == dsa) + break; + cp = cp->link_ccbh; + } + + return cp; +} + +/* + * Target control block initialisation. + * Nothing important to do at the moment. + */ +static void sym_init_tcb (hcb_p np, u_char tn) +{ +#if 0 /* Hmmm... this checking looks paranoid. */ + /* + * Check some alignments required by the chip. + */ + assert (((offsetof(struct sym_reg, nc_sxfer) ^ + offsetof(struct sym_tcb, head.sval)) &3) == 0); + assert (((offsetof(struct sym_reg, nc_scntl3) ^ + offsetof(struct sym_tcb, head.wval)) &3) == 0); +#endif +} + +/* + * Lun control block allocation and initialization. + */ +lcb_p sym_alloc_lcb (hcb_p np, u_char tn, u_char ln) +{ + tcb_p tp = &np->target[tn]; + lcb_p lp = sym_lp(np, tp, ln); + + /* + * Already done, just return. + */ + if (lp) + return lp; + + /* + * Donnot allow LUN control block + * allocation for not probed LUNs. + */ + if (!sym_is_bit(tp->lun_map, ln)) + return 0; + + /* + * Initialize the target control block if not yet. + */ + sym_init_tcb (np, tn); + + /* + * Allocate the LCB bus address array. + * Compute the bus address of this table. + */ + if (ln && !tp->luntbl) { + int i; + + tp->luntbl = sym_calloc_dma(256, "LUNTBL"); + if (!tp->luntbl) + goto fail; + for (i = 0 ; i < 64 ; i++) + tp->luntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa)); + tp->head.luntbl_sa = cpu_to_scr(vtobus(tp->luntbl)); + } + + /* + * Allocate the table of pointers for LUN(s) > 0, if needed. + */ + if (ln && !tp->lunmp) { + tp->lunmp = sym_calloc(SYM_CONF_MAX_LUN * sizeof(lcb_p), + "LUNMP"); + if (!tp->lunmp) + goto fail; + } + + /* + * Allocate the lcb. + * Make it available to the chip. + */ + lp = sym_calloc_dma(sizeof(struct sym_lcb), "LCB"); + if (!lp) + goto fail; + if (ln) { + tp->lunmp[ln] = lp; + tp->luntbl[ln] = cpu_to_scr(vtobus(lp)); + } + else { + tp->lun0p = lp; + tp->head.lun0_sa = cpu_to_scr(vtobus(lp)); + } + + /* + * Let the itl task point to error handling. + */ + lp->head.itl_task_sa = cpu_to_scr(np->bad_itl_ba); + + /* + * Set the reselect pattern to our default. :) + */ + lp->head.resel_sa = cpu_to_scr(SCRIPTB_BA (np, resel_bad_lun)); + + /* + * Set user capabilities. + */ + lp->user_flags = tp->usrflags & (SYM_DISC_ENABLED | SYM_TAGS_ENABLED); + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + /* + * Initialize device queueing. + */ + sym_que_init(&lp->waiting_ccbq); + sym_que_init(&lp->started_ccbq); + lp->started_max = SYM_CONF_MAX_TASK; + lp->started_limit = SYM_CONF_MAX_TASK; +#endif + /* + * If we are busy, count the IO. + */ + if (sym_is_bit(tp->busy0_map, ln)) { + lp->busy_itl = 1; + sym_clr_bit(tp->busy0_map, ln); + } +fail: + return lp; +} + +/* + * Allocate LCB resources for tagged command queuing. + */ +static void sym_alloc_lcb_tags (hcb_p np, u_char tn, u_char ln) +{ + tcb_p tp = &np->target[tn]; + lcb_p lp = sym_lp(np, tp, ln); + int i; + + /* + * If LCB not available, try to allocate it. + */ + if (!lp && !(lp = sym_alloc_lcb(np, tn, ln))) + goto fail; + + /* + * Allocate the task table and and the tag allocation + * circular buffer. We want both or none. + */ + lp->itlq_tbl = sym_calloc_dma(SYM_CONF_MAX_TASK*4, "ITLQ_TBL"); + if (!lp->itlq_tbl) + goto fail; + lp->cb_tags = sym_calloc(SYM_CONF_MAX_TASK, "CB_TAGS"); + if (!lp->cb_tags) { + sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK*4, "ITLQ_TBL"); + lp->itlq_tbl = 0; + goto fail; + } + + /* + * Initialize the task table with invalid entries. + */ + for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++) + lp->itlq_tbl[i] = cpu_to_scr(np->notask_ba); + + /* + * Fill up the tag buffer with tag numbers. + */ + for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++) + lp->cb_tags[i] = i; + + /* + * Make the task table available to SCRIPTS, + * And accept tagged commands now. + */ + lp->head.itlq_tbl_sa = cpu_to_scr(vtobus(lp->itlq_tbl)); + + return; +fail: +} + +/* + * Queue a SCSI IO to the controller. + */ +int sym_queue_scsiio(hcb_p np, cam_scsiio_p csio, ccb_p cp) +{ + tcb_p tp; + lcb_p lp; + u_char idmsg, *msgptr; + u_int msglen; + + /* + * Keep track of the IO in our CCB. + */ + cp->cam_ccb = (cam_ccb_p) csio; + + /* + * Retreive the target descriptor. + */ + tp = &np->target[cp->target]; + + /* + * Retreive the lun descriptor. + */ + lp = sym_lp(np, tp, cp->lun); + + /* + * Build the IDENTIFY message. + */ + idmsg = M_IDENTIFY | cp->lun; + if (cp->tag != NO_TAG || (lp && (lp->curr_flags & SYM_DISC_ENABLED))) + idmsg |= 0x40; + + msgptr = cp->scsi_smsg; + msglen = 0; + msgptr[msglen++] = idmsg; + + /* + * Build the tag message if present. + */ + if (cp->tag != NO_TAG) { + u_char order = cp->order; + + switch(order) { + case M_ORDERED_TAG: + break; + case M_HEAD_TAG: + break; + default: + order = M_SIMPLE_TAG; + } +#ifdef SYM_OPT_LIMIT_COMMAND_REORDERING + /* + * Avoid too much reordering of SCSI commands. + * The algorithm tries to prevent completion of any + * tagged command from being delayed against more + * than 3 times the max number of queued commands. + */ + if (lp && lp->tags_since > 3*SYM_CONF_MAX_TAG) { + lp->tags_si = !(lp->tags_si); + if (lp->tags_sum[lp->tags_si]) { + order = M_ORDERED_TAG; + if ((DEBUG_FLAGS & DEBUG_TAGS)||sym_verbose>1) { + PRINT_ADDR(cp); + printf("ordered tag forced.\n"); + } + } + lp->tags_since = 0; + } +#endif + msgptr[msglen++] = order; + + /* + * For less than 128 tags, actual tags are numbered + * 1,3,5,..2*MAXTAGS+1,since we may have to deal + * with devices that have problems with #TAG 0 or too + * great #TAG numbers. For more tags (up to 256), + * we use directly our tag number. + */ +#if SYM_CONF_MAX_TASK > (512/4) + msgptr[msglen++] = cp->tag; +#else + msgptr[msglen++] = (cp->tag << 1) + 1; +#endif + } + + /* + * Build a negotiation message if needed. + * (nego_status is filled by sym_prepare_nego()) + */ + cp->nego_status = 0; + if (tp->tinfo.curr.width != tp->tinfo.goal.width || + tp->tinfo.curr.period != tp->tinfo.goal.period || + tp->tinfo.curr.offset != tp->tinfo.goal.offset || + tp->tinfo.curr.options != tp->tinfo.goal.options) { + if (!tp->nego_cp && lp) + msglen += sym_prepare_nego(np, cp, 0, msgptr + msglen); + } + + /* + * Startqueue + */ + cp->phys.head.go.start = cpu_to_scr(SCRIPTA_BA (np, select)); + cp->phys.head.go.restart = cpu_to_scr(SCRIPTA_BA (np, resel_dsa)); + + /* + * select + */ + cp->phys.select.sel_id = cp->target; + cp->phys.select.sel_scntl3 = tp->head.wval; + cp->phys.select.sel_sxfer = tp->head.sval; + cp->phys.select.sel_scntl4 = tp->head.uval; + + /* + * message + */ + cp->phys.smsg.addr = cpu_to_scr(CCB_BA (cp, scsi_smsg)); + cp->phys.smsg.size = cpu_to_scr(msglen); + + /* + * status + */ + cp->host_xflags = 0; + cp->host_status = cp->nego_status ? HS_NEGOTIATE : HS_BUSY; + cp->ssss_status = S_ILLEGAL; + cp->xerr_status = 0; + cp->host_flags = 0; + cp->extra_bytes = 0; + + /* + * extreme data pointer. + * shall be positive, so -1 is lower than lowest.:) + */ + cp->ext_sg = -1; + cp->ext_ofs = 0; + + /* + * Build the CDB and DATA descriptor block + * and start the IO. + */ + return sym_setup_data_and_start(np, csio, cp); +} + +/* + * Reset a SCSI target (all LUNs of this target). + */ +int sym_reset_scsi_target(hcb_p np, int target) +{ + tcb_p tp; + + if (target == np->myaddr || (u_int)target >= SYM_CONF_MAX_TARGET) + return -1; + + tp = &np->target[target]; + tp->to_reset = 1; + + np->istat_sem = SEM; + OUTB (nc_istat, SIGP|SEM); + + return 0; +} + +/* + * Abort a SCSI IO. + */ +int sym_abort_ccb(hcb_p np, ccb_p cp, int timed_out) +{ + /* + * Check that the IO is active. + */ + if (!cp || !cp->host_status || cp->host_status == HS_WAIT) + return -1; + + /* + * If a previous abort didn't succeed in time, + * perform a BUS reset. + */ + if (cp->to_abort) { + sym_reset_scsi_bus(np, 1); + return 0; + } + + /* + * Mark the CCB for abort and allow time for. + */ + cp->to_abort = timed_out ? 2 : 1; + + /* + * Tell the SCRIPTS processor to stop and synchronize with us. + */ + np->istat_sem = SEM; + OUTB (nc_istat, SIGP|SEM); + return 0; +} + +int sym_abort_scsiio(hcb_p np, cam_ccb_p ccb, int timed_out) +{ + ccb_p cp; + SYM_QUEHEAD *qp; + + /* + * Look up our CCB control block. + */ + cp = 0; + FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { + ccb_p cp2 = sym_que_entry(qp, struct sym_ccb, link_ccbq); + if (cp2->cam_ccb == ccb) { + cp = cp2; + break; + } + } + + return sym_abort_ccb(np, cp, timed_out); +} + +/* + * Complete execution of a SCSI command with extented + * error, SCSI status error, or having been auto-sensed. + * + * The SCRIPTS processor is not running there, so we + * can safely access IO registers and remove JOBs from + * the START queue. + * SCRATCHA is assumed to have been loaded with STARTPOS + * before the SCRIPTS called the C code. + */ +void sym_complete_error (hcb_p np, ccb_p cp) +{ + tcb_p tp; + lcb_p lp; + int resid; + int i; + + /* + * Paranoid check. :) + */ + if (!cp || !cp->cam_ccb) + return; + + if (DEBUG_FLAGS & (DEBUG_TINY|DEBUG_RESULT)) { + printf ("CCB=%lx STAT=%x/%x/%x DEV=%d/%d\n", (unsigned long)cp, + cp->host_status, cp->ssss_status, cp->host_flags, + cp->target, cp->lun); + MDELAY(100); + } + + /* + * Get target and lun pointers. + */ + tp = &np->target[cp->target]; + lp = sym_lp(np, tp, cp->lun); + + /* + * Check for extended errors. + */ + if (cp->xerr_status) { + if (sym_verbose) + sym_print_xerr(cp, cp->xerr_status); + if (cp->host_status == HS_COMPLETE) + cp->host_status = HS_COMP_ERR; + } + + /* + * Calculate the residual. + */ + resid = sym_compute_residual(np, cp); + + if (!SYM_SETUP_RESIDUAL_SUPPORT) {/* If user does not want residuals */ + resid = 0; /* throw them away. :) */ + cp->sv_resid = 0; + } +#ifdef DEBUG_2_0_X +if (resid) + printf("XXXX RESID= %d - 0x%x\n", resid, resid); +#endif + + /* + * Dequeue all queued CCBs for that device + * not yet started by SCRIPTS. + */ + i = (INL (nc_scratcha) - np->squeue_ba) / 4; + i = sym_dequeue_from_squeue(np, i, cp->target, cp->lun, -1); + + /* + * Restart the SCRIPTS processor. + */ + OUTL_DSP (SCRIPTA_BA (np, start)); + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + if (cp->host_status == HS_COMPLETE && + cp->ssss_status == S_QUEUE_FULL) { + if (!lp || lp->started_tags - i < 2) + goto weirdness; + /* + * Decrease queue depth as needed. + */ + lp->started_max = lp->started_tags - i - 1; + lp->num_sgood = 0; + + if (sym_verbose >= 2) { + PRINT_LUN(np, cp->target, cp->lun); + printf(" queue depth is now %d\n", lp->started_max); + } + + /* + * Repair the CCB. + */ + cp->host_status = HS_BUSY; + cp->ssss_status = S_ILLEGAL; + + /* + * Let's requeue it to device. + */ + sym_set_cam_status(cp->cam_ccb, CAM_REQUEUE_REQ); + goto finish; + } +weirdness: +#endif + /* + * Synchronize DMA map if needed. + */ + sym_data_dmamap_postsync(np, cp); + + /* + * Build result in CAM ccb. + */ + sym_set_cam_result_error(np, cp, resid); + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING +finish: +#endif + /* + * Add this one to the COMP queue. + */ + sym_remque(&cp->link_ccbq); + sym_insque_head(&cp->link_ccbq, &np->comp_ccbq); + + /* + * Complete all those commands with either error + * or requeue condition. + */ + sym_flush_comp_queue(np, 0); + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + /* + * Donnot start more than 1 command after an error. + */ + if (lp) + sym_start_next_ccbs(np, lp, 1); +#endif +} + +/* + * Complete execution of a successful SCSI command. + * + * Only successful commands go to the DONE queue, + * since we need to have the SCRIPTS processor + * stopped on any error condition. + * The SCRIPTS processor is running while we are + * completing successful commands. + */ +void sym_complete_ok (hcb_p np, ccb_p cp) +{ + tcb_p tp; + lcb_p lp; + cam_ccb_p ccb; + int resid; + + /* + * Paranoid check. :) + */ + if (!cp || !cp->cam_ccb) + return; + assert (cp->host_status == HS_COMPLETE); + + /* + * Get user command. + */ + ccb = cp->cam_ccb; + + /* + * Get target and lun pointers. + */ + tp = &np->target[cp->target]; + lp = sym_lp(np, tp, cp->lun); + + /* + * Assume device discovered on first success. + */ + if (!lp) + sym_set_bit(tp->lun_map, cp->lun); + + /* + * If all data have been transferred, given than no + * extended error did occur, there is no residual. + */ + resid = 0; + if (cp->phys.head.lastp != sym_goalp(cp)) + resid = sym_compute_residual(np, cp); + + /* + * Wrong transfer residuals may be worse than just always + * returning zero. User can disable this feature from + * sym_conf.h. Residual support is enabled by default. + */ + if (!SYM_SETUP_RESIDUAL_SUPPORT) + resid = 0; +#ifdef DEBUG_2_0_X +if (resid) + printf("XXXX RESID= %d - 0x%x\n", resid, resid); +#endif + + /* + * Synchronize DMA map if needed. + */ + sym_data_dmamap_postsync(np, cp); + + /* + * Build result in CAM ccb. + */ + sym_set_cam_result_ok(np, cp, resid); + +#ifdef SYM_OPT_SNIFF_INQUIRY + /* + * On standard INQUIRY response (EVPD and CmDt + * not set), sniff out device capabilities. + */ + if (cp->cdb_buf[0] == 0x12 && !(cp->cdb_buf[1] & 0x3)) + sym_sniff_inquiry(np, cp->cam_ccb, resid); +#endif + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + /* + * If max number of started ccbs had been reduced, + * increase it if 200 good status received. + */ + if (lp && lp->started_max < lp->started_limit) { + ++lp->num_sgood; + if (lp->num_sgood >= 200) { + lp->num_sgood = 0; + ++lp->started_max; + if (sym_verbose >= 2) { + PRINT_LUN(np, cp->target, cp->lun); + printf(" queue depth is now %d\n", + lp->started_max); + } + } + } +#endif + + /* + * Free our CCB. + */ + sym_free_ccb (np, cp); + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + /* + * Requeue a couple of awaiting scsi commands. + */ + if (lp && !sym_que_empty(&lp->waiting_ccbq)) + sym_start_next_ccbs(np, lp, 2); +#endif + /* + * Complete the command. + */ + sym_xpt_done(np, ccb); +} + +/* + * Soft-attach the controller. + */ +#ifdef SYM_OPT_NVRAM_PRE_READ +int sym_hcb_attach(hcb_p np, struct sym_fw *fw, struct sym_nvram *nvram) +#else +int sym_hcb_attach(hcb_p np, struct sym_fw *fw) +#endif +{ +#ifndef SYM_OPT_NVRAM_PRE_READ + struct sym_nvram nvram_buf, *nvram = &nvram_buf; +#endif + int i; + + /* + * Get some info about the firmware. + */ + np->scripta_sz = fw->a_size; + np->scriptb_sz = fw->b_size; + np->scriptz_sz = fw->z_size; + np->fw_setup = fw->setup; + np->fw_patch = fw->patch; + np->fw_name = fw->name; + + /* + * Save setting of some IO registers, so we will + * be able to probe specific implementations. + */ + sym_save_initial_setting (np); + + /* + * Reset the chip now, since it has been reported + * that SCSI clock calibration may not work properly + * if the chip is currently active. + */ + sym_chip_reset (np); + + /* + * Try to read the user set-up. + */ +#ifndef SYM_OPT_NVRAM_PRE_READ + (void) sym_read_nvram(np, nvram); +#endif + + /* + * Prepare controller and devices settings, according + * to chip features, user set-up and driver set-up. + */ + (void) sym_prepare_setting(np, nvram); + + /* + * Check the PCI clock frequency. + * Must be performed after prepare_setting since it destroys + * STEST1 that is used to probe for the clock doubler. + */ + i = sym_getpciclock(np); + if (i > 37000 && !(np->features & FE_66MHZ)) + printf("%s: PCI BUS clock seems too high: %u KHz.\n", + sym_name(np), i); + + /* + * Allocate the start queue. + */ + np->squeue = (u32 *) sym_calloc_dma(sizeof(u32)*(MAX_QUEUE*2),"SQUEUE"); + if (!np->squeue) + goto attach_failed; + np->squeue_ba = vtobus(np->squeue); + + /* + * Allocate the done queue. + */ + np->dqueue = (u32 *) sym_calloc_dma(sizeof(u32)*(MAX_QUEUE*2),"DQUEUE"); + if (!np->dqueue) + goto attach_failed; + np->dqueue_ba = vtobus(np->dqueue); + + /* + * Allocate the target bus address array. + */ + np->targtbl = (u32 *) sym_calloc_dma(256, "TARGTBL"); + if (!np->targtbl) + goto attach_failed; + np->targtbl_ba = vtobus(np->targtbl); + + /* + * Allocate SCRIPTS areas. + */ + np->scripta0 = sym_calloc_dma(np->scripta_sz, "SCRIPTA0"); + np->scriptb0 = sym_calloc_dma(np->scriptb_sz, "SCRIPTB0"); + np->scriptz0 = sym_calloc_dma(np->scriptz_sz, "SCRIPTZ0"); + if (!np->scripta0 || !np->scriptb0 || !np->scriptz0) + goto attach_failed; + + /* + * Initialyze the CCB free and busy queues. + */ + sym_que_init(&np->free_ccbq); + sym_que_init(&np->busy_ccbq); + sym_que_init(&np->comp_ccbq); + + /* + * Initializations for optional handling + * of IO timeouts and device queueing. + */ +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + sym_que_init(&np->tmo0_ccbq); + np->tmo_ccbq = + sym_calloc(2*SYM_CONF_TIMEOUT_ORDER_MAX*sizeof(SYM_QUEHEAD), + "TMO_CCBQ"); + for (i = 0 ; i < 2*SYM_CONF_TIMEOUT_ORDER_MAX ; i++) + sym_que_init(&np->tmo_ccbq[i]); +#endif +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + sym_que_init(&np->dummy_ccbq); +#endif + /* + * Allocate some CCB. We need at least ONE. + */ + if (!sym_alloc_ccb(np)) + goto attach_failed; + + /* + * Calculate BUS addresses where we are going + * to load the SCRIPTS. + */ + np->scripta_ba = vtobus(np->scripta0); + np->scriptb_ba = vtobus(np->scriptb0); + np->scriptz_ba = vtobus(np->scriptz0); + + if (np->ram_ba) { + np->scripta_ba = np->ram_ba; + if (np->features & FE_RAM8K) { + np->ram_ws = 8192; + np->scriptb_ba = np->scripta_ba + 4096; +#if 0 /* May get useful for 64 BIT PCI addressing */ + np->scr_ram_seg = cpu_to_scr(np->scripta_ba >> 32); +#endif + } + else + np->ram_ws = 4096; + } + + /* + * Copy scripts to controller instance. + */ + bcopy(fw->a_base, np->scripta0, np->scripta_sz); + bcopy(fw->b_base, np->scriptb0, np->scriptb_sz); + bcopy(fw->z_base, np->scriptz0, np->scriptz_sz); + + /* + * Setup variable parts in scripts and compute + * scripts bus addresses used from the C code. + */ + np->fw_setup(np, fw); + + /* + * Bind SCRIPTS with physical addresses usable by the + * SCRIPTS processor (as seen from the BUS = BUS addresses). + */ + sym_fw_bind_script(np, (u32 *) np->scripta0, np->scripta_sz); + sym_fw_bind_script(np, (u32 *) np->scriptb0, np->scriptb_sz); + sym_fw_bind_script(np, (u32 *) np->scriptz0, np->scriptz_sz); + +#ifdef SYM_CONF_IARB_SUPPORT + /* + * If user wants IARB to be set when we win arbitration + * and have other jobs, compute the max number of consecutive + * settings of IARB hints before we leave devices a chance to + * arbitrate for reselection. + */ +#ifdef SYM_SETUP_IARB_MAX + np->iarb_max = SYM_SETUP_IARB_MAX; +#else + np->iarb_max = 4; +#endif +#endif + + /* + * Prepare the idle and invalid task actions. + */ + np->idletask.start = cpu_to_scr(SCRIPTA_BA (np, idle)); + np->idletask.restart = cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l)); + np->idletask_ba = vtobus(&np->idletask); + + np->notask.start = cpu_to_scr(SCRIPTA_BA (np, idle)); + np->notask.restart = cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l)); + np->notask_ba = vtobus(&np->notask); + + np->bad_itl.start = cpu_to_scr(SCRIPTA_BA (np, idle)); + np->bad_itl.restart = cpu_to_scr(SCRIPTB_BA (np, bad_i_t_l)); + np->bad_itl_ba = vtobus(&np->bad_itl); + + np->bad_itlq.start = cpu_to_scr(SCRIPTA_BA (np, idle)); + np->bad_itlq.restart = cpu_to_scr(SCRIPTB_BA (np,bad_i_t_l_q)); + np->bad_itlq_ba = vtobus(&np->bad_itlq); + + /* + * Allocate and prepare the lun JUMP table that is used + * for a target prior the probing of devices (bad lun table). + * A private table will be allocated for the target on the + * first INQUIRY response received. + */ + np->badluntbl = sym_calloc_dma(256, "BADLUNTBL"); + if (!np->badluntbl) + goto attach_failed; + + np->badlun_sa = cpu_to_scr(SCRIPTB_BA (np, resel_bad_lun)); + for (i = 0 ; i < 64 ; i++) /* 64 luns/target, no less */ + np->badluntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa)); + + /* + * Prepare the bus address array that contains the bus + * address of each target control block. + * For now, assume all logical units are wrong. :) + */ + for (i = 0 ; i < SYM_CONF_MAX_TARGET ; i++) { + np->targtbl[i] = cpu_to_scr(vtobus(&np->target[i])); + np->target[i].head.luntbl_sa = + cpu_to_scr(vtobus(np->badluntbl)); + np->target[i].head.lun0_sa = + cpu_to_scr(vtobus(&np->badlun_sa)); + } + + /* + * Now check the cache handling of the pci chipset. + */ + if (sym_snooptest (np)) { + printf("%s: CACHE INCORRECTLY CONFIGURED.\n", sym_name(np)); + goto attach_failed; + }; + + /* + * Sigh! we are done. + */ + return 0; + + /* + * We have failed. + * We will try to free all the resources we have + * allocated, but if we are a boot device, this + * will not help that much.;) + */ +attach_failed: + sym_hcb_free(np); + return ENXIO; +} + +/* + * Free everything that has been allocated for this device. + */ +void sym_hcb_free(hcb_p np) +{ + SYM_QUEHEAD *qp; + ccb_p cp; + tcb_p tp; + lcb_p lp; + int target, lun; + + if (np->scriptz0) + sym_mfree_dma(np->scriptz0, np->scriptz_sz, "SCRIPTZ0"); + if (np->scriptb0) + sym_mfree_dma(np->scriptb0, np->scriptb_sz, "SCRIPTB0"); + if (np->scripta0) + sym_mfree_dma(np->scripta0, np->scripta_sz, "SCRIPTA0"); +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + if (np->tmo_ccbq) + sym_mfree(np->tmo_ccbq, + 2*SYM_CONF_TIMEOUT_ORDER_MAX*sizeof(SYM_QUEHEAD), + "TMO_CCBQ"); +#endif + if (np->squeue) + sym_mfree_dma(np->squeue, sizeof(u32)*(MAX_QUEUE*2), "SQUEUE"); + if (np->dqueue) + sym_mfree_dma(np->dqueue, sizeof(u32)*(MAX_QUEUE*2), "DQUEUE"); + + if (np->actccbs) { + while ((qp = sym_remque_head(&np->free_ccbq)) != 0) { + cp = sym_que_entry(qp, struct sym_ccb, link_ccbq); + sym_data_dmamap_destroy(np, cp); + sym_mfree_dma(cp->sns_bbuf, SYM_SNS_BBUF_LEN, + "SNS_BBUF"); + sym_mfree_dma(cp, sizeof(*cp), "CCB"); + } + } + + if (np->badluntbl) + sym_mfree_dma(np->badluntbl, 256,"BADLUNTBL"); + + for (target = 0; target < SYM_CONF_MAX_TARGET ; target++) { + tp = &np->target[target]; + for (lun = 0 ; lun < SYM_CONF_MAX_LUN ; lun++) { + lp = sym_lp(np, tp, lun); + if (!lp) + continue; + if (lp->itlq_tbl) + sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK*4, + "ITLQ_TBL"); + if (lp->cb_tags) + sym_mfree(lp->cb_tags, SYM_CONF_MAX_TASK, + "CB_TAGS"); + sym_mfree_dma(lp, sizeof(*lp), "LCB"); + } +#if SYM_CONF_MAX_LUN > 1 + if (tp->lunmp) + sym_mfree(tp->lunmp, SYM_CONF_MAX_LUN*sizeof(lcb_p), + "LUNMP"); +#endif + } + if (np->targtbl) + sym_mfree_dma(np->targtbl, 256, "TARGTBL"); +} diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.h b/drivers/scsi/sym53c8xx_2/sym_hipd.h new file mode 100644 index 000000000000..62530d4c451b --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.h @@ -0,0 +1,1445 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM_HIPD_H +#define SYM_HIPD_H + +/* + * Generic driver options. + * + * They may be defined in platform specific headers, if they + * are useful. + * + * SYM_OPT_NO_BUS_MEMORY_MAPPING + * When this option is set, the driver will not load the + * on-chip RAM using MMIO, but let the SCRIPTS processor + * do the work using MOVE MEMORY instructions. + * (set for Linux/PPC) + * + * SYM_OPT_HANDLE_DIR_UNKNOWN + * When this option is set, the SCRIPTS used by the driver + * are able to handle SCSI transfers with direction not + * supplied by user. + * (set for Linux-2.0.X) + * + * SYM_OPT_HANDLE_DEVICE_QUEUEING + * When this option is set, the driver will use a queue per + * device and handle QUEUE FULL status requeuing internally. + * + * SYM_OPT_BUS_DMA_ABSTRACTION + * When this option is set, the driver allocator is responsible + * of maintaining bus physical addresses and so provides virtual + * to bus physical address translation of driver data structures. + * (set for FreeBSD-4 and Linux 2.3) + * + * SYM_OPT_SNIFF_INQUIRY + * When this option is set, the driver sniff out successful + * INQUIRY response and performs negotiations accordingly. + * (set for Linux) + * + * SYM_OPT_LIMIT_COMMAND_REORDERING + * When this option is set, the driver tries to limit tagged + * command reordering to some reasonnable value. + * (set for Linux) + */ +#if 0 +#define SYM_OPT_NO_BUS_MEMORY_MAPPING +#define SYM_OPT_HANDLE_DIR_UNKNOWN +#define SYM_OPT_HANDLE_DEVICE_QUEUEING +#define SYM_OPT_BUS_DMA_ABSTRACTION +#define SYM_OPT_SNIFF_INQUIRY +#define SYM_OPT_LIMIT_COMMAND_REORDERING +#endif + +/* + * Active debugging tags and verbosity. + * Both DEBUG_FLAGS and sym_verbose can be redefined + * by the platform specific code to something else. + */ +#define DEBUG_ALLOC (0x0001) +#define DEBUG_PHASE (0x0002) +#define DEBUG_POLL (0x0004) +#define DEBUG_QUEUE (0x0008) +#define DEBUG_RESULT (0x0010) +#define DEBUG_SCATTER (0x0020) +#define DEBUG_SCRIPT (0x0040) +#define DEBUG_TINY (0x0080) +#define DEBUG_TIMING (0x0100) +#define DEBUG_NEGO (0x0200) +#define DEBUG_TAGS (0x0400) +#define DEBUG_POINTER (0x0800) + +#ifndef DEBUG_FLAGS +#define DEBUG_FLAGS (0x0000) +#endif + +#ifndef sym_verbose +#define sym_verbose (np->verbose) +#endif + +/* + * These ones should have been already defined. + */ +#ifndef offsetof +#define offsetof(t, m) ((size_t) (&((t *)0)->m)) +#endif +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#ifndef assert +#define assert(expression) { \ + if (!(expression)) { \ + (void)panic( \ + "assertion \"%s\" failed: file \"%s\", line %d\n", \ + #expression, \ + __FILE__, __LINE__); \ + } \ +} +#endif + +/* + * Number of tasks per device we want to handle. + */ +#if SYM_CONF_MAX_TAG_ORDER > 8 +#error "more than 256 tags per logical unit not allowed." +#endif +#define SYM_CONF_MAX_TASK (1<<SYM_CONF_MAX_TAG_ORDER) + +/* + * Donnot use more tasks that we can handle. + */ +#ifndef SYM_CONF_MAX_TAG +#define SYM_CONF_MAX_TAG SYM_CONF_MAX_TASK +#endif +#if SYM_CONF_MAX_TAG > SYM_CONF_MAX_TASK +#undef SYM_CONF_MAX_TAG +#define SYM_CONF_MAX_TAG SYM_CONF_MAX_TASK +#endif + +/* + * This one means 'NO TAG for this job' + */ +#define NO_TAG (256) + +/* + * Number of SCSI targets. + */ +#if SYM_CONF_MAX_TARGET > 16 +#error "more than 16 targets not allowed." +#endif + +/* + * Number of logical units per target. + */ +#if SYM_CONF_MAX_LUN > 64 +#error "more than 64 logical units per target not allowed." +#endif + +/* + * Asynchronous pre-scaler (ns). Shall be 40 for + * the SCSI timings to be compliant. + */ +#define SYM_CONF_MIN_ASYNC (40) + +/* + * Number of entries in the START and DONE queues. + * + * We limit to 1 PAGE in order to succeed allocation of + * these queues. Each entry is 8 bytes long (2 DWORDS). + */ +#ifdef SYM_CONF_MAX_START +#define SYM_CONF_MAX_QUEUE (SYM_CONF_MAX_START+2) +#else +#define SYM_CONF_MAX_QUEUE (7*SYM_CONF_MAX_TASK+2) +#define SYM_CONF_MAX_START (SYM_CONF_MAX_QUEUE-2) +#endif + +#if SYM_CONF_MAX_QUEUE > SYM_MEM_CLUSTER_SIZE/8 +#undef SYM_CONF_MAX_QUEUE +#define SYM_CONF_MAX_QUEUE (SYM_MEM_CLUSTER_SIZE/8) +#undef SYM_CONF_MAX_START +#define SYM_CONF_MAX_START (SYM_CONF_MAX_QUEUE-2) +#endif + +/* + * For this one, we want a short name :-) + */ +#define MAX_QUEUE SYM_CONF_MAX_QUEUE + +/* + * Union of supported NVRAM formats. + */ +struct sym_nvram { + int type; +#define SYM_SYMBIOS_NVRAM (1) +#define SYM_TEKRAM_NVRAM (2) +#if SYM_CONF_NVRAM_SUPPORT + union { + Symbios_nvram Symbios; + Tekram_nvram Tekram; + } data; +#endif +}; + +/* + * Common definitions for both bus space based and legacy IO methods. + */ +#define INB(r) INB_OFF(offsetof(struct sym_reg,r)) +#define INW(r) INW_OFF(offsetof(struct sym_reg,r)) +#define INL(r) INL_OFF(offsetof(struct sym_reg,r)) + +#define OUTB(r, v) OUTB_OFF(offsetof(struct sym_reg,r), (v)) +#define OUTW(r, v) OUTW_OFF(offsetof(struct sym_reg,r), (v)) +#define OUTL(r, v) OUTL_OFF(offsetof(struct sym_reg,r), (v)) + +#define OUTONB(r, m) OUTB(r, INB(r) | (m)) +#define OUTOFFB(r, m) OUTB(r, INB(r) & ~(m)) +#define OUTONW(r, m) OUTW(r, INW(r) | (m)) +#define OUTOFFW(r, m) OUTW(r, INW(r) & ~(m)) +#define OUTONL(r, m) OUTL(r, INL(r) | (m)) +#define OUTOFFL(r, m) OUTL(r, INL(r) & ~(m)) + +/* + * We normally want the chip to have a consistent view + * of driver internal data structures when we restart it. + * Thus these macros. + */ +#define OUTL_DSP(v) \ + do { \ + MEMORY_WRITE_BARRIER(); \ + OUTL (nc_dsp, (v)); \ + } while (0) + +#define OUTONB_STD() \ + do { \ + MEMORY_WRITE_BARRIER(); \ + OUTONB (nc_dcntl, (STD|NOCOM)); \ + } while (0) + +/* + * Command control block states. + */ +#define HS_IDLE (0) +#define HS_BUSY (1) +#define HS_NEGOTIATE (2) /* sync/wide data transfer*/ +#define HS_DISCONNECT (3) /* Disconnected by target */ +#define HS_WAIT (4) /* waiting for resource */ + +#define HS_DONEMASK (0x80) +#define HS_COMPLETE (4|HS_DONEMASK) +#define HS_SEL_TIMEOUT (5|HS_DONEMASK) /* Selection timeout */ +#define HS_UNEXPECTED (6|HS_DONEMASK) /* Unexpected disconnect */ +#define HS_COMP_ERR (7|HS_DONEMASK) /* Completed with error */ + +/* + * Software Interrupt Codes + */ +#define SIR_BAD_SCSI_STATUS (1) +#define SIR_SEL_ATN_NO_MSG_OUT (2) +#define SIR_MSG_RECEIVED (3) +#define SIR_MSG_WEIRD (4) +#define SIR_NEGO_FAILED (5) +#define SIR_NEGO_PROTO (6) +#define SIR_SCRIPT_STOPPED (7) +#define SIR_REJECT_TO_SEND (8) +#define SIR_SWIDE_OVERRUN (9) +#define SIR_SODL_UNDERRUN (10) +#define SIR_RESEL_NO_MSG_IN (11) +#define SIR_RESEL_NO_IDENTIFY (12) +#define SIR_RESEL_BAD_LUN (13) +#define SIR_TARGET_SELECTED (14) +#define SIR_RESEL_BAD_I_T_L (15) +#define SIR_RESEL_BAD_I_T_L_Q (16) +#define SIR_ABORT_SENT (17) +#define SIR_RESEL_ABORTED (18) +#define SIR_MSG_OUT_DONE (19) +#define SIR_COMPLETE_ERROR (20) +#define SIR_DATA_OVERRUN (21) +#define SIR_BAD_PHASE (22) +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 +#define SIR_DMAP_DIRTY (23) +#define SIR_MAX (23) +#else +#define SIR_MAX (22) +#endif + +/* + * Extended error bit codes. + * xerr_status field of struct sym_ccb. + */ +#define XE_EXTRA_DATA (1) /* unexpected data phase */ +#define XE_BAD_PHASE (1<<1) /* illegal phase (4/5) */ +#define XE_PARITY_ERR (1<<2) /* unrecovered SCSI parity error */ +#define XE_SODL_UNRUN (1<<3) /* ODD transfer in DATA OUT phase */ +#define XE_SWIDE_OVRUN (1<<4) /* ODD transfer in DATA IN phase */ + +/* + * Negotiation status. + * nego_status field of struct sym_ccb. + */ +#define NS_SYNC (1) +#define NS_WIDE (2) +#define NS_PPR (3) + +/* + * A CCB hashed table is used to retrieve CCB address + * from DSA value. + */ +#define CCB_HASH_SHIFT 8 +#define CCB_HASH_SIZE (1UL << CCB_HASH_SHIFT) +#define CCB_HASH_MASK (CCB_HASH_SIZE-1) +#if 1 +#define CCB_HASH_CODE(dsa) \ + (((dsa) >> (_LGRU16_(sizeof(struct sym_ccb)))) & CCB_HASH_MASK) +#else +#define CCB_HASH_CODE(dsa) (((dsa) >> 9) & CCB_HASH_MASK) +#endif + +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 +/* + * We may want to use segment registers for 64 bit DMA. + * 16 segments registers -> up to 64 GB addressable. + */ +#define SYM_DMAP_SHIFT (4) +#define SYM_DMAP_SIZE (1u<<SYM_DMAP_SHIFT) +#define SYM_DMAP_MASK (SYM_DMAP_SIZE-1) +#endif + +/* + * Device flags. + */ +#define SYM_DISC_ENABLED (1) +#define SYM_TAGS_ENABLED (1<<1) +#define SYM_SCAN_BOOT_DISABLED (1<<2) +#define SYM_SCAN_LUNS_DISABLED (1<<3) + +/* + * Host adapter miscellaneous flags. + */ +#define SYM_AVOID_BUS_RESET (1) +#define SYM_SCAN_TARGETS_HILO (1<<1) + +/* + * Misc. + */ +#define SYM_SNOOP_TIMEOUT (10000000) +#define BUS_8_BIT 0 +#define BUS_16_BIT 1 + +/* + * Gather negotiable parameters value + */ +struct sym_trans { + u8 scsi_version; + u8 spi_version; + u8 period; + u8 offset; + u8 width; + u8 options; /* PPR options */ +}; + +struct sym_tinfo { + struct sym_trans curr; + struct sym_trans goal; + struct sym_trans user; +#ifdef SYM_OPT_ANNOUNCE_TRANSFER_RATE + struct sym_trans prev; +#endif +}; + +/* + * Global TCB HEADER. + * + * Due to lack of indirect addressing on earlier NCR chips, + * this substructure is copied from the TCB to a global + * address after selection. + * For SYMBIOS chips that support LOAD/STORE this copy is + * not needed and thus not performed. + */ +struct sym_tcbh { + /* + * Scripts bus addresses of LUN table accessed from scripts. + * LUN #0 is a special case, since multi-lun devices are rare, + * and we we want to speed-up the general case and not waste + * resources. + */ + u32 luntbl_sa; /* bus address of this table */ + u32 lun0_sa; /* bus address of LCB #0 */ + /* + * Actual SYNC/WIDE IO registers value for this target. + * 'sval', 'wval' and 'uval' are read from SCRIPTS and + * so have alignment constraints. + */ +/*0*/ u_char uval; /* -> SCNTL4 register */ +/*1*/ u_char sval; /* -> SXFER io register */ +/*2*/ u_char filler1; +/*3*/ u_char wval; /* -> SCNTL3 io register */ +}; + +/* + * Target Control Block + */ +struct sym_tcb { + /* + * TCB header. + * Assumed at offset 0. + */ +/*0*/ struct sym_tcbh head; + + /* + * LUN table used by the SCRIPTS processor. + * An array of bus addresses is used on reselection. + */ + u32 *luntbl; /* LCBs bus address table */ + + /* + * LUN table used by the C code. + */ + lcb_p lun0p; /* LCB of LUN #0 (usual case) */ +#if SYM_CONF_MAX_LUN > 1 + lcb_p *lunmp; /* Other LCBs [1..MAX_LUN] */ +#endif + + /* + * Bitmap that tells about LUNs that succeeded at least + * 1 IO and therefore assumed to be a real device. + * Avoid useless allocation of the LCB structure. + */ + u32 lun_map[(SYM_CONF_MAX_LUN+31)/32]; + + /* + * Bitmap that tells about LUNs that haven't yet an LCB + * allocated (not discovered or LCB allocation failed). + */ + u32 busy0_map[(SYM_CONF_MAX_LUN+31)/32]; + +#ifdef SYM_HAVE_STCB + /* + * O/S specific data structure. + */ + struct sym_stcb s; +#endif + + /* + * Transfer capabilities (SIP) + */ + struct sym_tinfo tinfo; + + /* + * Keep track of the CCB used for the negotiation in order + * to ensure that only 1 negotiation is queued at a time. + */ + ccb_p nego_cp; /* CCB used for the nego */ + + /* + * Set when we want to reset the device. + */ + u_char to_reset; + + /* + * Other user settable limits and options. + * These limits are read from the NVRAM if present. + */ + u_char usrflags; + u_short usrtags; + +#ifdef SYM_OPT_SNIFF_INQUIRY + /* + * Some minimal information from INQUIRY response. + */ + u32 cmdq_map[(SYM_CONF_MAX_LUN+31)/32]; + u_char inq_version; + u_char inq_byte7; + u_char inq_byte56; + u_char inq_byte7_valid; +#endif + +}; + +/* + * Global LCB HEADER. + * + * Due to lack of indirect addressing on earlier NCR chips, + * this substructure is copied from the LCB to a global + * address after selection. + * For SYMBIOS chips that support LOAD/STORE this copy is + * not needed and thus not performed. + */ +struct sym_lcbh { + /* + * SCRIPTS address jumped by SCRIPTS on reselection. + * For not probed logical units, this address points to + * SCRIPTS that deal with bad LU handling (must be at + * offset zero of the LCB for that reason). + */ +/*0*/ u32 resel_sa; + + /* + * Task (bus address of a CCB) read from SCRIPTS that points + * to the unique ITL nexus allowed to be disconnected. + */ + u32 itl_task_sa; + + /* + * Task table bus address (read from SCRIPTS). + */ + u32 itlq_tbl_sa; +}; + +/* + * Logical Unit Control Block + */ +struct sym_lcb { + /* + * TCB header. + * Assumed at offset 0. + */ +/*0*/ struct sym_lcbh head; + + /* + * Task table read from SCRIPTS that contains pointers to + * ITLQ nexuses. The bus address read from SCRIPTS is + * inside the header. + */ + u32 *itlq_tbl; /* Kernel virtual address */ + + /* + * Busy CCBs management. + */ + u_short busy_itlq; /* Number of busy tagged CCBs */ + u_short busy_itl; /* Number of busy untagged CCBs */ + + /* + * Circular tag allocation buffer. + */ + u_short ia_tag; /* Tag allocation index */ + u_short if_tag; /* Tag release index */ + u_char *cb_tags; /* Circular tags buffer */ + + /* + * O/S specific data structure. + */ +#ifdef SYM_HAVE_SLCB + struct sym_slcb s; +#endif + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + /* + * Optionnaly the driver can handle device queueing, + * and requeues internally command to redo. + */ + SYM_QUEHEAD + waiting_ccbq; + SYM_QUEHEAD + started_ccbq; + int num_sgood; + u_short started_tags; + u_short started_no_tag; + u_short started_max; + u_short started_limit; +#endif + +#ifdef SYM_OPT_LIMIT_COMMAND_REORDERING + /* + * Optionnaly the driver can try to prevent SCSI + * IOs from being too much reordering. + */ + u_char tags_si; /* Current index to tags sum */ + u_short tags_sum[2]; /* Tags sum counters */ + u_short tags_since; /* # of tags since last switch */ +#endif + + /* + * Set when we want to clear all tasks. + */ + u_char to_clear; + + /* + * Capabilities. + */ + u_char user_flags; + u_char curr_flags; +}; + +/* + * Action from SCRIPTS on a task. + * Is part of the CCB, but is also used separately to plug + * error handling action to perform from SCRIPTS. + */ +struct sym_actscr { + u32 start; /* Jumped by SCRIPTS after selection */ + u32 restart; /* Jumped by SCRIPTS on relection */ +}; + +/* + * Phase mismatch context. + * + * It is part of the CCB and is used as parameters for the + * DATA pointer. We need two contexts to handle correctly the + * SAVED DATA POINTER. + */ +struct sym_pmc { + struct sym_tblmove sg; /* Updated interrupted SG block */ + u32 ret; /* SCRIPT return address */ +}; + +/* + * LUN control block lookup. + * We use a direct pointer for LUN #0, and a table of + * pointers which is only allocated for devices that support + * LUN(s) > 0. + */ +#if SYM_CONF_MAX_LUN <= 1 +#define sym_lp(np, tp, lun) (!lun) ? (tp)->lun0p : 0 +#else +#define sym_lp(np, tp, lun) \ + (!lun) ? (tp)->lun0p : (tp)->lunmp ? (tp)->lunmp[(lun)] : 0 +#endif + +/* + * Status are used by the host and the script processor. + * + * The last four bytes (status[4]) are copied to the + * scratchb register (declared as scr0..scr3) just after the + * select/reselect, and copied back just after disconnecting. + * Inside the script the XX_REG are used. + */ + +/* + * Last four bytes (script) + */ +#define HX_REG scr0 +#define HX_PRT nc_scr0 +#define HS_REG scr1 +#define HS_PRT nc_scr1 +#define SS_REG scr2 +#define SS_PRT nc_scr2 +#define HF_REG scr3 +#define HF_PRT nc_scr3 + +/* + * Last four bytes (host) + */ +#define host_xflags phys.head.status[0] +#define host_status phys.head.status[1] +#define ssss_status phys.head.status[2] +#define host_flags phys.head.status[3] + +/* + * Host flags + */ +#define HF_IN_PM0 1u +#define HF_IN_PM1 (1u<<1) +#define HF_ACT_PM (1u<<2) +#define HF_DP_SAVED (1u<<3) +#define HF_SENSE (1u<<4) +#define HF_EXT_ERR (1u<<5) +#define HF_DATA_IN (1u<<6) +#ifdef SYM_CONF_IARB_SUPPORT +#define HF_HINT_IARB (1u<<7) +#endif + +/* + * More host flags + */ +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 +#define HX_DMAP_DIRTY (1u<<7) +#endif + +/* + * Global CCB HEADER. + * + * Due to lack of indirect addressing on earlier NCR chips, + * this substructure is copied from the ccb to a global + * address after selection (or reselection) and copied back + * before disconnect. + * For SYMBIOS chips that support LOAD/STORE this copy is + * not needed and thus not performed. + */ + +struct sym_ccbh { + /* + * Start and restart SCRIPTS addresses (must be at 0). + */ +/*0*/ struct sym_actscr go; + + /* + * SCRIPTS jump address that deal with data pointers. + * 'savep' points to the position in the script responsible + * for the actual transfer of data. + * It's written on reception of a SAVE_DATA_POINTER message. + */ + u32 savep; /* Jump address to saved data pointer */ + u32 lastp; /* SCRIPTS address at end of data */ +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + u32 wlastp; +#endif + + /* + * Status fields. + */ + u8 status[4]; +}; + +/* + * GET/SET the value of the data pointer used by SCRIPTS. + * + * We must distinguish between the LOAD/STORE-based SCRIPTS + * that use directly the header in the CCB, and the NCR-GENERIC + * SCRIPTS that use the copy of the header in the HCB. + */ +#if SYM_CONF_GENERIC_SUPPORT +#define sym_set_script_dp(np, cp, dp) \ + do { \ + if (np->features & FE_LDSTR) \ + cp->phys.head.lastp = cpu_to_scr(dp); \ + else \ + np->ccb_head.lastp = cpu_to_scr(dp); \ + } while (0) +#define sym_get_script_dp(np, cp) \ + scr_to_cpu((np->features & FE_LDSTR) ? \ + cp->phys.head.lastp : np->ccb_head.lastp) +#else +#define sym_set_script_dp(np, cp, dp) \ + do { \ + cp->phys.head.lastp = cpu_to_scr(dp); \ + } while (0) + +#define sym_get_script_dp(np, cp) (cp->phys.head.lastp) +#endif + +/* + * Data Structure Block + * + * During execution of a ccb by the script processor, the + * DSA (data structure address) register points to this + * substructure of the ccb. + */ +struct sym_dsb { + /* + * CCB header. + * Also assumed at offset 0 of the sym_ccb structure. + */ +/*0*/ struct sym_ccbh head; + + /* + * Phase mismatch contexts. + * We need two to handle correctly the SAVED DATA POINTER. + * MUST BOTH BE AT OFFSET < 256, due to using 8 bit arithmetic + * for address calculation from SCRIPTS. + */ + struct sym_pmc pm0; + struct sym_pmc pm1; + + /* + * Table data for Script + */ + struct sym_tblsel select; + struct sym_tblmove smsg; + struct sym_tblmove smsg_ext; + struct sym_tblmove cmd; + struct sym_tblmove sense; + struct sym_tblmove wresid; + struct sym_tblmove data [SYM_CONF_MAX_SG]; +}; + +/* + * Our Command Control Block + */ +struct sym_ccb { + /* + * This is the data structure which is pointed by the DSA + * register when it is executed by the script processor. + * It must be the first entry. + */ + struct sym_dsb phys; + + /* + * Pointer to CAM ccb and related stuff. + */ + cam_ccb_p cam_ccb; /* CAM scsiio ccb */ + u8 cdb_buf[16]; /* Copy of CDB */ + u8 *sns_bbuf; /* Bounce buffer for sense data */ +#ifndef SYM_SNS_BBUF_LEN +#define SYM_SNS_BBUF_LEN (32) +#endif + int data_len; /* Total data length */ + int segments; /* Number of SG segments */ + + u8 order; /* Tag type (if tagged command) */ + + /* + * Miscellaneous status'. + */ + u_char nego_status; /* Negotiation status */ + u_char xerr_status; /* Extended error flags */ + u32 extra_bytes; /* Extraneous bytes transferred */ + + /* + * Message areas. + * We prepare a message to be sent after selection. + * We may use a second one if the command is rescheduled + * due to CHECK_CONDITION or COMMAND TERMINATED. + * Contents are IDENTIFY and SIMPLE_TAG. + * While negotiating sync or wide transfer, + * a SDTR or WDTR message is appended. + */ + u_char scsi_smsg [12]; + u_char scsi_smsg2[12]; + + /* + * Auto request sense related fields. + */ + u_char sensecmd[6]; /* Request Sense command */ + u_char sv_scsi_status; /* Saved SCSI status */ + u_char sv_xerr_status; /* Saved extended status */ + int sv_resid; /* Saved residual */ + + /* + * O/S specific data structure. + */ +#ifdef SYM_HAVE_SCCB + struct sym_sccb s; +#endif + /* + * Other fields. + */ +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + SYM_QUEHEAD tmo_linkq; /* Optional timeout handling */ + u_int tmo_clock; /* (link and dealine value) */ +#endif + u32 ccb_ba; /* BUS address of this CCB */ + u_short tag; /* Tag for this transfer */ + /* NO_TAG means no tag */ + u_char target; + u_char lun; + ccb_p link_ccbh; /* Host adapter CCB hash chain */ + SYM_QUEHEAD + link_ccbq; /* Link to free/busy CCB queue */ + u32 startp; /* Initial data pointer */ + u32 goalp; /* Expected last data pointer */ +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + u32 wgoalp; +#endif + int ext_sg; /* Extreme data pointer, used */ + int ext_ofs; /* to calculate the residual. */ +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + SYM_QUEHEAD + link2_ccbq; /* Link for device queueing */ + u_char started; /* CCB queued to the squeue */ +#endif + u_char to_abort; /* Want this IO to be aborted */ +#ifdef SYM_OPT_LIMIT_COMMAND_REORDERING + u_char tags_si; /* Lun tags sum index (0,1) */ +#endif +}; + +#define CCB_BA(cp,lbl) (cp->ccb_ba + offsetof(struct sym_ccb, lbl)) + +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN +#define sym_goalp(cp) ((cp->host_flags & HF_DATA_IN) ? cp->goalp : cp->wgoalp) +#else +#define sym_goalp(cp) (cp->goalp) +#endif + +/* + * Host Control Block + */ +struct sym_hcb { + /* + * Global headers. + * Due to poorness of addressing capabilities, earlier + * chips (810, 815, 825) copy part of the data structures + * (CCB, TCB and LCB) in fixed areas. + */ +#if SYM_CONF_GENERIC_SUPPORT + struct sym_ccbh ccb_head; + struct sym_tcbh tcb_head; + struct sym_lcbh lcb_head; +#endif + /* + * Idle task and invalid task actions and + * their bus addresses. + */ + struct sym_actscr idletask, notask, bad_itl, bad_itlq; + u32 idletask_ba, notask_ba, bad_itl_ba, bad_itlq_ba; + + /* + * Dummy lun table to protect us against target + * returning bad lun number on reselection. + */ + u32 *badluntbl; /* Table physical address */ + u32 badlun_sa; /* SCRIPT handler BUS address */ + + /* + * Bus address of this host control block. + */ + u32 hcb_ba; + + /* + * Bit 32-63 of the on-chip RAM bus address in LE format. + * The START_RAM64 script loads the MMRS and MMWS from this + * field. + */ + u32 scr_ram_seg; + + /* + * Initial value of some IO register bits. + * These values are assumed to have been set by BIOS, and may + * be used to probe adapter implementation differences. + */ + u_char sv_scntl0, sv_scntl3, sv_dmode, sv_dcntl, sv_ctest3, sv_ctest4, + sv_ctest5, sv_gpcntl, sv_stest2, sv_stest4, sv_scntl4, + sv_stest1; + + /* + * Actual initial value of IO register bits used by the + * driver. They are loaded at initialisation according to + * features that are to be enabled/disabled. + */ + u_char rv_scntl0, rv_scntl3, rv_dmode, rv_dcntl, rv_ctest3, rv_ctest4, + rv_ctest5, rv_stest2, rv_ccntl0, rv_ccntl1, rv_scntl4; + + /* + * Target data. + */ + struct sym_tcb target[SYM_CONF_MAX_TARGET]; + + /* + * Target control block bus address array used by the SCRIPT + * on reselection. + */ + u32 *targtbl; + u32 targtbl_ba; + + /* + * DMA pool handle for this HBA. + */ +#ifdef SYM_OPT_BUS_DMA_ABSTRACTION + m_pool_ident_t bus_dmat; +#endif + + /* + * O/S specific data structure + */ + struct sym_shcb s; + + /* + * Physical bus addresses of the chip. + */ + u32 mmio_ba; /* MMIO 32 bit BUS address */ + int mmio_ws; /* MMIO Window size */ + + u32 ram_ba; /* RAM 32 bit BUS address */ + int ram_ws; /* RAM window size */ + + /* + * SCRIPTS virtual and physical bus addresses. + * 'script' is loaded in the on-chip RAM if present. + * 'scripth' stays in main memory for all chips except the + * 53C895A, 53C896 and 53C1010 that provide 8K on-chip RAM. + */ + u_char *scripta0; /* Copy of scripts A, B, Z */ + u_char *scriptb0; + u_char *scriptz0; + u32 scripta_ba; /* Actual scripts A, B, Z */ + u32 scriptb_ba; /* 32 bit bus addresses. */ + u32 scriptz_ba; + u_short scripta_sz; /* Actual size of script A, B, Z*/ + u_short scriptb_sz; + u_short scriptz_sz; + + /* + * Bus addresses, setup and patch methods for + * the selected firmware. + */ + struct sym_fwa_ba fwa_bas; /* Useful SCRIPTA bus addresses */ + struct sym_fwb_ba fwb_bas; /* Useful SCRIPTB bus addresses */ + struct sym_fwz_ba fwz_bas; /* Useful SCRIPTZ bus addresses */ + void (*fw_setup)(hcb_p np, struct sym_fw *fw); + void (*fw_patch)(hcb_p np); + char *fw_name; + + /* + * General controller parameters and configuration. + */ + u_short device_id; /* PCI device id */ + u_char revision_id; /* PCI device revision id */ + u_int features; /* Chip features map */ + u_char myaddr; /* SCSI id of the adapter */ + u_char maxburst; /* log base 2 of dwords burst */ + u_char maxwide; /* Maximum transfer width */ + u_char minsync; /* Min sync period factor (ST) */ + u_char maxsync; /* Max sync period factor (ST) */ + u_char maxoffs; /* Max scsi offset (ST) */ + u_char minsync_dt; /* Min sync period factor (DT) */ + u_char maxsync_dt; /* Max sync period factor (DT) */ + u_char maxoffs_dt; /* Max scsi offset (DT) */ + u_char multiplier; /* Clock multiplier (1,2,4) */ + u_char clock_divn; /* Number of clock divisors */ + u32 clock_khz; /* SCSI clock frequency in KHz */ + u32 pciclk_khz; /* Estimated PCI clock in KHz */ + /* + * Start queue management. + * It is filled up by the host processor and accessed by the + * SCRIPTS processor in order to start SCSI commands. + */ + volatile /* Prevent code optimizations */ + u32 *squeue; /* Start queue virtual address */ + u32 squeue_ba; /* Start queue BUS address */ + u_short squeueput; /* Next free slot of the queue */ + u_short actccbs; /* Number of allocated CCBs */ + + /* + * Command completion queue. + * It is the same size as the start queue to avoid overflow. + */ + u_short dqueueget; /* Next position to scan */ + volatile /* Prevent code optimizations */ + u32 *dqueue; /* Completion (done) queue */ + u32 dqueue_ba; /* Done queue BUS address */ + + /* + * Miscellaneous buffers accessed by the scripts-processor. + * They shall be DWORD aligned, because they may be read or + * written with a script command. + */ + u_char msgout[8]; /* Buffer for MESSAGE OUT */ + u_char msgin [8]; /* Buffer for MESSAGE IN */ + u32 lastmsg; /* Last SCSI message sent */ + u32 scratch; /* Scratch for SCSI receive */ + /* Also used for cache test */ + /* + * Miscellaneous configuration and status parameters. + */ + u_char usrflags; /* Miscellaneous user flags */ + u_char scsi_mode; /* Current SCSI BUS mode */ + u_char verbose; /* Verbosity for this controller*/ + + /* + * CCB lists and queue. + */ + ccb_p ccbh[CCB_HASH_SIZE]; /* CCB hashed by DSA value */ + SYM_QUEHEAD free_ccbq; /* Queue of available CCBs */ + SYM_QUEHEAD busy_ccbq; /* Queue of busy CCBs */ + + /* + * During error handling and/or recovery, + * active CCBs that are to be completed with + * error or requeued are moved from the busy_ccbq + * to the comp_ccbq prior to completion. + */ + SYM_QUEHEAD comp_ccbq; + +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING + SYM_QUEHEAD dummy_ccbq; +#endif + /* + * Optional handling of IO timeouts. + */ +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT + SYM_QUEHEAD tmo0_ccbq; + SYM_QUEHEAD *tmo_ccbq; /* [2*SYM_TIMEOUT_ORDER_MAX] */ + u_int tmo_clock; + u_int tmo_actq; +#endif + + /* + * IMMEDIATE ARBITRATION (IARB) control. + * + * We keep track in 'last_cp' of the last CCB that has been + * queued to the SCRIPTS processor and clear 'last_cp' when + * this CCB completes. If last_cp is not zero at the moment + * we queue a new CCB, we set a flag in 'last_cp' that is + * used by the SCRIPTS as a hint for setting IARB. + * We donnot set more than 'iarb_max' consecutive hints for + * IARB in order to leave devices a chance to reselect. + * By the way, any non zero value of 'iarb_max' is unfair. :) + */ +#ifdef SYM_CONF_IARB_SUPPORT + u_short iarb_max; /* Max. # consecutive IARB hints*/ + u_short iarb_count; /* Actual # of these hints */ + ccb_p last_cp; +#endif + + /* + * Command abort handling. + * We need to synchronize tightly with the SCRIPTS + * processor in order to handle things correctly. + */ + u_char abrt_msg[4]; /* Message to send buffer */ + struct sym_tblmove abrt_tbl; /* Table for the MOV of it */ + struct sym_tblsel abrt_sel; /* Sync params for selection */ + u_char istat_sem; /* Tells the chip to stop (SEM) */ + + /* + * 64 bit DMA handling. + */ +#if SYM_CONF_DMA_ADDRESSING_MODE != 0 + u_char use_dac; /* Use PCI DAC cycles */ +#if SYM_CONF_DMA_ADDRESSING_MODE == 2 + u_char dmap_dirty; /* Dma segments registers dirty */ + u32 dmap_bah[SYM_DMAP_SIZE];/* Segment registers map */ +#endif +#endif +}; + +#define HCB_BA(np, lbl) (np->hcb_ba + offsetof(struct sym_hcb, lbl)) + +/* + * NVRAM reading (sym_nvram.c). + */ +void sym_nvram_setup_host (hcb_p np, struct sym_nvram *nvram); +void sym_nvram_setup_target (hcb_p np, int target, struct sym_nvram *nvp); +int sym_read_nvram (sdev_p np, struct sym_nvram *nvp); + +/* + * FIRMWARES (sym_fw.c) + */ +struct sym_fw * sym_find_firmware(struct sym_pci_chip *chip); +void sym_fw_bind_script (hcb_p np, u32 *start, int len); + +/* + * Driver methods called from O/S specific code. + */ +char *sym_driver_name(void); +void sym_print_xerr(ccb_p cp, int x_status); +int sym_reset_scsi_bus(hcb_p np, int enab_int); +struct sym_pci_chip * +sym_lookup_pci_chip_table (u_short device_id, u_char revision); +void sym_put_start_queue(hcb_p np, ccb_p cp); +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING +void sym_start_next_ccbs(hcb_p np, lcb_p lp, int maxn); +#endif +void sym_start_up (hcb_p np, int reason); +void sym_interrupt (hcb_p np); +void sym_flush_comp_queue(hcb_p np, int cam_status); +int sym_clear_tasks(hcb_p np, int cam_status, int target, int lun, int task); +ccb_p sym_get_ccb (hcb_p np, u_char tn, u_char ln, u_char tag_order); +void sym_free_ccb (hcb_p np, ccb_p cp); +lcb_p sym_alloc_lcb (hcb_p np, u_char tn, u_char ln); +int sym_queue_scsiio(hcb_p np, cam_scsiio_p csio, ccb_p cp); +int sym_abort_scsiio(hcb_p np, cam_ccb_p ccb, int timed_out); +int sym_abort_ccb(hcb_p np, ccb_p cp, int timed_out); +int sym_reset_scsi_target(hcb_p np, int target); +void sym_hcb_free(hcb_p np); + +#ifdef SYM_OPT_NVRAM_PRE_READ +int sym_hcb_attach(hcb_p np, struct sym_fw *fw, struct sym_nvram *nvram); +#else +int sym_hcb_attach(hcb_p np, struct sym_fw *fw); +#endif + +/* + * Optionnaly, the driver may handle IO timeouts. + */ +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT +int sym_abort_ccb(hcb_p np, ccb_p cp, int timed_out); +void sym_timeout_ccb(hcb_p np, ccb_p cp, u_int ticks); +static void __inline sym_untimeout_ccb(hcb_p np, ccb_p cp) +{ + sym_remque(&cp->tmo_linkq); + sym_insque_head(&cp->tmo_linkq, &np->tmo0_ccbq); +} +void sym_clock(hcb_p np); +#endif /* SYM_OPT_HANDLE_IO_TIMEOUT */ + +/* + * Optionnaly, the driver may provide a function + * to announce transfer rate changes. + */ +#ifdef SYM_OPT_ANNOUNCE_TRANSFER_RATE +void sym_announce_transfer_rate(hcb_p np, int target); +#endif + +/* + * Optionnaly, the driver may sniff inquiry data. + */ +#ifdef SYM_OPT_SNIFF_INQUIRY +#define INQ7_CMDQ (0x02) +#define INQ7_SYNC (0x10) +#define INQ7_WIDE16 (0x20) + +#define INQ56_CLOCKING (3<<2) +#define INQ56_ST_ONLY (0<<2) +#define INQ56_DT_ONLY (1<<2) +#define INQ56_ST_DT (3<<2) + +void sym_update_trans_settings(hcb_p np, tcb_p tp); +int +__sym_sniff_inquiry(hcb_p np, u_char tn, u_char ln, + u_char *inq_data, int inq_len); +#endif + + +/* + * Build a scatter/gather entry. + * + * For 64 bit systems, we use the 8 upper bits of the size field + * to provide bus address bits 32-39 to the SCRIPTS processor. + * This allows the 895A, 896, 1010 to address up to 1 TB of memory. + */ + +#if SYM_CONF_DMA_ADDRESSING_MODE == 0 +#define sym_build_sge(np, data, badd, len) \ +do { \ + (data)->addr = cpu_to_scr(badd); \ + (data)->size = cpu_to_scr(len); \ +} while (0) +#elif SYM_CONF_DMA_ADDRESSING_MODE == 1 +#define sym_build_sge(np, data, badd, len) \ +do { \ + (data)->addr = cpu_to_scr(badd); \ + (data)->size = cpu_to_scr((((badd) >> 8) & 0xff000000) + len); \ +} while (0) +#elif SYM_CONF_DMA_ADDRESSING_MODE == 2 +int sym_lookup_dmap(hcb_p np, u32 h, int s); +static __inline void +sym_build_sge(hcb_p np, struct sym_tblmove *data, u64 badd, int len) +{ + u32 h = (badd>>32); + int s = (h&SYM_DMAP_MASK); + + if (h != np->dmap_bah[s]) + goto bad; +good: + (data)->addr = cpu_to_scr(badd); + (data)->size = cpu_to_scr((s<<24) + len); + return; +bad: + s = sym_lookup_dmap(np, h, s); + goto good; +} +#else +#error "Unsupported DMA addressing mode" +#endif + +/* + * Set up data pointers used by SCRIPTS. + * Called from O/S specific code. + */ +static void __inline +sym_setup_data_pointers(hcb_p np, ccb_p cp, int dir) +{ + u32 lastp, goalp; + + /* + * No segments means no data. + */ + if (!cp->segments) + dir = CAM_DIR_NONE; + + /* + * Set the data pointer. + */ + switch(dir) { +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + case CAM_DIR_UNKNOWN: +#endif + case CAM_DIR_OUT: + goalp = SCRIPTA_BA (np, data_out2) + 8; + lastp = goalp - 8 - (cp->segments * (2*4)); +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + cp->wgoalp = cpu_to_scr(goalp); + if (dir != CAM_DIR_UNKNOWN) + break; + cp->phys.head.wlastp = cpu_to_scr(lastp); + /* fall through */ +#else + break; +#endif + case CAM_DIR_IN: + cp->host_flags |= HF_DATA_IN; + goalp = SCRIPTA_BA (np, data_in2) + 8; + lastp = goalp - 8 - (cp->segments * (2*4)); + break; + case CAM_DIR_NONE: + default: +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + cp->host_flags |= HF_DATA_IN; +#endif + lastp = goalp = SCRIPTB_BA (np, no_data); + break; + } + + /* + * Set all pointers values needed by SCRIPTS. + */ + cp->phys.head.lastp = cpu_to_scr(lastp); + cp->phys.head.savep = cpu_to_scr(lastp); + cp->startp = cp->phys.head.savep; + cp->goalp = cpu_to_scr(goalp); + +#ifdef SYM_OPT_HANDLE_DIR_UNKNOWN + /* + * If direction is unknown, start at data_io. + */ + if (dir == CAM_DIR_UNKNOWN) + cp->phys.head.savep = cpu_to_scr(SCRIPTB_BA (np, data_io)); +#endif +} + +/* + * MEMORY ALLOCATOR. + */ + +/* + * Shortest memory chunk is (1<<SYM_MEM_SHIFT), currently 16. + * Actual allocations happen as SYM_MEM_CLUSTER_SIZE sized. + * (1 PAGE at a time is just fine). + */ +#define SYM_MEM_SHIFT 4 +#define SYM_MEM_CLUSTER_SIZE (1UL << SYM_MEM_CLUSTER_SHIFT) +#define SYM_MEM_CLUSTER_MASK (SYM_MEM_CLUSTER_SIZE-1) + +/* + * Link between free memory chunks of a given size. + */ +typedef struct sym_m_link { + struct sym_m_link *next; +} *m_link_p; + +/* + * Virtual to bus physical translation for a given cluster. + * Such a structure is only useful with DMA abstraction. + */ +#ifdef SYM_OPT_BUS_DMA_ABSTRACTION +typedef struct sym_m_vtob { /* Virtual to Bus address translation */ + struct sym_m_vtob *next; +#ifdef SYM_HAVE_M_SVTOB + struct sym_m_svtob s; /* OS specific data structure */ +#endif + m_addr_t vaddr; /* Virtual address */ + m_addr_t baddr; /* Bus physical address */ +} *m_vtob_p; + +/* Hash this stuff a bit to speed up translations */ +#define VTOB_HASH_SHIFT 5 +#define VTOB_HASH_SIZE (1UL << VTOB_HASH_SHIFT) +#define VTOB_HASH_MASK (VTOB_HASH_SIZE-1) +#define VTOB_HASH_CODE(m) \ + ((((m_addr_t) (m)) >> SYM_MEM_CLUSTER_SHIFT) & VTOB_HASH_MASK) +#endif /* SYM_OPT_BUS_DMA_ABSTRACTION */ + +/* + * Memory pool of a given kind. + * Ideally, we want to use: + * 1) 1 pool for memory we donnot need to involve in DMA. + * 2) The same pool for controllers that require same DMA + * constraints and features. + * The OS specific m_pool_id_t thing and the sym_m_pool_match() + * method are expected to tell the driver about. + */ +typedef struct sym_m_pool { +#ifdef SYM_OPT_BUS_DMA_ABSTRACTION + m_pool_ident_t dev_dmat; /* Identifies the pool (see above) */ + m_addr_t (*get_mem_cluster)(struct sym_m_pool *); +#ifdef SYM_MEM_FREE_UNUSED + void (*free_mem_cluster)(struct sym_m_pool *, m_addr_t); +#endif +#define M_GET_MEM_CLUSTER() mp->get_mem_cluster(mp) +#define M_FREE_MEM_CLUSTER(p) mp->free_mem_cluster(mp, p) +#ifdef SYM_HAVE_M_SPOOL + struct sym_m_spool s; /* OS specific data structure */ +#endif + int nump; + m_vtob_p vtob[VTOB_HASH_SIZE]; + struct sym_m_pool *next; +#else +#define M_GET_MEM_CLUSTER() sym_get_mem_cluster() +#define M_FREE_MEM_CLUSTER(p) sym_free_mem_cluster(p) +#endif /* SYM_OPT_BUS_DMA_ABSTRACTION */ + struct sym_m_link h[SYM_MEM_CLUSTER_SHIFT - SYM_MEM_SHIFT + 1]; +} *m_pool_p; + +/* + * Alloc and free non DMAable memory. + */ +void sym_mfree_unlocked(void *ptr, int size, char *name); +void *sym_calloc_unlocked(int size, char *name); + +/* + * Alloc, free and translate addresses to bus physical + * for DMAable memory. + */ +#ifdef SYM_OPT_BUS_DMA_ABSTRACTION +void *__sym_calloc_dma_unlocked(m_pool_ident_t dev_dmat, int size, char *name); +void +__sym_mfree_dma_unlocked(m_pool_ident_t dev_dmat, void *m,int size, char *name); +u32 __vtobus_unlocked(m_pool_ident_t dev_dmat, void *m); +#endif + +/* + * Verbs used by the driver code for DMAable memory handling. + * The _uvptv_ macro avoids a nasty warning about pointer to volatile + * being discarded. + */ +#define _uvptv_(p) ((void *)((u_long)(p))) + +#define _sym_calloc_dma(np, l, n) __sym_calloc_dma(np->bus_dmat, l, n) +#define _sym_mfree_dma(np, p, l, n) \ + __sym_mfree_dma(np->bus_dmat, _uvptv_(p), l, n) +#define sym_calloc_dma(l, n) _sym_calloc_dma(np, l, n) +#define sym_mfree_dma(p, l, n) _sym_mfree_dma(np, p, l, n) +#define _vtobus(np, p) __vtobus(np->bus_dmat, _uvptv_(p)) +#define vtobus(p) _vtobus(np, p) + +/* + * Override some function names. + */ +#define PRINT_ADDR sym_print_addr +#define PRINT_TARGET sym_print_target +#define PRINT_LUN sym_print_lun +#define MDELAY sym_mdelay +#define UDELAY sym_udelay + +#endif /* SYM_HIPD_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_malloc.c b/drivers/scsi/sym53c8xx_2/sym_malloc.c new file mode 100644 index 000000000000..c48239686ee2 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_malloc.c @@ -0,0 +1,418 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __FreeBSD__ +#include <dev/sym/sym_glue.h> +#else +#include "sym_glue.h" +#endif + +/* + * Simple power of two buddy-like generic allocator. + * Provides naturally aligned memory chunks. + * + * This simple code is not intended to be fast, but to + * provide power of 2 aligned memory allocations. + * Since the SCRIPTS processor only supplies 8 bit arithmetic, + * this allocator allows simple and fast address calculations + * from the SCRIPTS code. In addition, cache line alignment + * is guaranteed for power of 2 cache line size. + * + * This allocator has been developped for the Linux sym53c8xx + * driver, since this O/S does not provide naturally aligned + * allocations. + * It has the advantage of allowing the driver to use private + * pages of memory that will be useful if we ever need to deal + * with IO MMUs for PCI. + */ +static void *___sym_malloc(m_pool_p mp, int size) +{ + int i = 0; + int s = (1 << SYM_MEM_SHIFT); + int j; + m_addr_t a; + m_link_p h = mp->h; + + if (size > SYM_MEM_CLUSTER_SIZE) + return 0; + + while (size > s) { + s <<= 1; + ++i; + } + + j = i; + while (!h[j].next) { + if (s == SYM_MEM_CLUSTER_SIZE) { + h[j].next = (m_link_p) M_GET_MEM_CLUSTER(); + if (h[j].next) + h[j].next->next = 0; + break; + } + ++j; + s <<= 1; + } + a = (m_addr_t) h[j].next; + if (a) { + h[j].next = h[j].next->next; + while (j > i) { + j -= 1; + s >>= 1; + h[j].next = (m_link_p) (a+s); + h[j].next->next = 0; + } + } +#ifdef DEBUG + printf("___sym_malloc(%d) = %p\n", size, (void *) a); +#endif + return (void *) a; +} + +/* + * Counter-part of the generic allocator. + */ +static void ___sym_mfree(m_pool_p mp, void *ptr, int size) +{ + int i = 0; + int s = (1 << SYM_MEM_SHIFT); + m_link_p q; + m_addr_t a, b; + m_link_p h = mp->h; + +#ifdef DEBUG + printf("___sym_mfree(%p, %d)\n", ptr, size); +#endif + + if (size > SYM_MEM_CLUSTER_SIZE) + return; + + while (size > s) { + s <<= 1; + ++i; + } + + a = (m_addr_t) ptr; + + while (1) { +#ifdef SYM_MEM_FREE_UNUSED + if (s == SYM_MEM_CLUSTER_SIZE) { + M_FREE_MEM_CLUSTER(a); + break; + } +#endif + b = a ^ s; + q = &h[i]; + while (q->next && q->next != (m_link_p) b) { + q = q->next; + } + if (!q->next) { + ((m_link_p) a)->next = h[i].next; + h[i].next = (m_link_p) a; + break; + } + q->next = q->next->next; + a = a & b; + s <<= 1; + ++i; + } +} + +/* + * Verbose and zeroing allocator that wrapps to the generic allocator. + */ +static void *__sym_calloc2(m_pool_p mp, int size, char *name, int uflags) +{ + void *p; + + p = ___sym_malloc(mp, size); + + if (DEBUG_FLAGS & DEBUG_ALLOC) { + printf ("new %-10s[%4d] @%p.\n", name, size, p); + } + + if (p) + bzero(p, size); + else if (uflags & SYM_MEM_WARN) + printf ("__sym_calloc2: failed to allocate %s[%d]\n", name, size); + return p; +} +#define __sym_calloc(mp, s, n) __sym_calloc2(mp, s, n, SYM_MEM_WARN) + +/* + * Its counter-part. + */ +static void __sym_mfree(m_pool_p mp, void *ptr, int size, char *name) +{ + if (DEBUG_FLAGS & DEBUG_ALLOC) + printf ("freeing %-10s[%4d] @%p.\n", name, size, ptr); + + ___sym_mfree(mp, ptr, size); +} + +/* + * Default memory pool we donnot need to involve in DMA. + * + * If DMA abtraction is not needed, the generic allocator + * calls directly some kernel allocator. + * + * With DMA abstraction, we use functions (methods), to + * distinguish between non DMAable memory and DMAable memory. + */ +#ifndef SYM_OPT_BUS_DMA_ABSTRACTION + +static struct sym_m_pool mp0; + +#else + +static m_addr_t ___mp0_get_mem_cluster(m_pool_p mp) +{ + m_addr_t m = (m_addr_t) sym_get_mem_cluster(); + if (m) + ++mp->nump; + return m; +} + +#ifdef SYM_MEM_FREE_UNUSED +static void ___mp0_free_mem_cluster(m_pool_p mp, m_addr_t m) +{ + sym_free_mem_cluster(m); + --mp->nump; +} +#endif + +#ifdef SYM_MEM_FREE_UNUSED +static struct sym_m_pool mp0 = + {0, ___mp0_get_mem_cluster, ___mp0_free_mem_cluster}; +#else +static struct sym_m_pool mp0 = + {0, ___mp0_get_mem_cluster}; +#endif + +#endif /* SYM_OPT_BUS_DMA_ABSTRACTION */ + +/* + * Actual memory allocation routine for non-DMAed memory. + */ +void *sym_calloc_unlocked(int size, char *name) +{ + void *m; + m = __sym_calloc(&mp0, size, name); + return m; +} + +/* + * Its counter-part. + */ +void sym_mfree_unlocked(void *ptr, int size, char *name) +{ + __sym_mfree(&mp0, ptr, size, name); +} + +#ifdef SYM_OPT_BUS_DMA_ABSTRACTION +/* + * Methods that maintains DMAable pools according to user allocations. + * New pools are created on the fly when a new pool id is provided. + * They are deleted on the fly when they get emptied. + */ +/* Get a memory cluster that matches the DMA contraints of a given pool */ +static m_addr_t ___get_dma_mem_cluster(m_pool_p mp) +{ + m_vtob_p vbp; + m_addr_t vaddr; + + vbp = __sym_calloc(&mp0, sizeof(*vbp), "VTOB"); + if (!vbp) + goto out_err; + + vaddr = sym_m_get_dma_mem_cluster(mp, vbp); + if (vaddr) { + int hc = VTOB_HASH_CODE(vaddr); + vbp->next = mp->vtob[hc]; + mp->vtob[hc] = vbp; + ++mp->nump; + return (m_addr_t) vaddr; + } + return vaddr; +out_err: + return 0; +} + +#ifdef SYM_MEM_FREE_UNUSED +/* Free a memory cluster and associated resources for DMA */ +static void ___free_dma_mem_cluster(m_pool_p mp, m_addr_t m) +{ + m_vtob_p *vbpp, vbp; + int hc = VTOB_HASH_CODE(m); + + vbpp = &mp->vtob[hc]; + while (*vbpp && (*vbpp)->vaddr != m) + vbpp = &(*vbpp)->next; + if (*vbpp) { + vbp = *vbpp; + *vbpp = (*vbpp)->next; + sym_m_free_dma_mem_cluster(mp, vbp); + __sym_mfree(&mp0, vbp, sizeof(*vbp), "VTOB"); + --mp->nump; + } +} +#endif + +/* Fetch the memory pool for a given pool id (i.e. DMA constraints) */ +static __inline m_pool_p ___get_dma_pool(m_pool_ident_t dev_dmat) +{ + m_pool_p mp; + for (mp = mp0.next; + mp && !sym_m_pool_match(mp->dev_dmat, dev_dmat); + mp = mp->next); + return mp; +} + +/* Create a new memory DMAable pool (when fetch failed) */ +static m_pool_p ___cre_dma_pool(m_pool_ident_t dev_dmat) +{ + m_pool_p mp = 0; + + mp = __sym_calloc(&mp0, sizeof(*mp), "MPOOL"); + if (mp) { + mp->dev_dmat = dev_dmat; + if (!sym_m_create_dma_mem_tag(mp)) { + mp->get_mem_cluster = ___get_dma_mem_cluster; +#ifdef SYM_MEM_FREE_UNUSED + mp->free_mem_cluster = ___free_dma_mem_cluster; +#endif + mp->next = mp0.next; + mp0.next = mp; + return mp; + } + } + if (mp) + __sym_mfree(&mp0, mp, sizeof(*mp), "MPOOL"); + return 0; +} + +#ifdef SYM_MEM_FREE_UNUSED +/* Destroy a DMAable memory pool (when got emptied) */ +static void ___del_dma_pool(m_pool_p p) +{ + m_pool_p *pp = &mp0.next; + + while (*pp && *pp != p) + pp = &(*pp)->next; + if (*pp) { + *pp = (*pp)->next; + sym_m_delete_dma_mem_tag(p); + __sym_mfree(&mp0, p, sizeof(*p), "MPOOL"); + } +} +#endif + +/* + * Actual allocator for DMAable memory. + */ +void *__sym_calloc_dma_unlocked(m_pool_ident_t dev_dmat, int size, char *name) +{ + m_pool_p mp; + void *m = 0; + + mp = ___get_dma_pool(dev_dmat); + if (!mp) + mp = ___cre_dma_pool(dev_dmat); + if (mp) + m = __sym_calloc(mp, size, name); +#ifdef SYM_MEM_FREE_UNUSED + if (mp && !mp->nump) + ___del_dma_pool(mp); +#endif + + return m; +} + +/* + * Its counter-part. + */ +void +__sym_mfree_dma_unlocked(m_pool_ident_t dev_dmat, void *m, int size, char *name) +{ + m_pool_p mp; + + mp = ___get_dma_pool(dev_dmat); + if (mp) + __sym_mfree(mp, m, size, name); +#ifdef SYM_MEM_FREE_UNUSED + if (mp && !mp->nump) + ___del_dma_pool(mp); +#endif +} + +/* + * Actual virtual to bus physical address translator + * for 32 bit addressable DMAable memory. + */ +u32 __vtobus_unlocked(m_pool_ident_t dev_dmat, void *m) +{ + m_pool_p mp; + int hc = VTOB_HASH_CODE(m); + m_vtob_p vp = 0; + m_addr_t a = ((m_addr_t) m) & ~SYM_MEM_CLUSTER_MASK; + + mp = ___get_dma_pool(dev_dmat); + if (mp) { + vp = mp->vtob[hc]; + while (vp && (m_addr_t) vp->vaddr != a) + vp = vp->next; + } + if (!vp) + panic("sym: VTOBUS FAILED!\n"); + return (u32)(vp ? vp->baddr + (((m_addr_t) m) - a) : 0); +} + +#endif /* SYM_OPT_BUS_DMA_ABSTRACTION */ diff --git a/drivers/scsi/sym53c8xx_2/sym_misc.c b/drivers/scsi/sym53c8xx_2/sym_misc.c new file mode 100644 index 000000000000..8ab37a5ec54b --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_misc.c @@ -0,0 +1,336 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __FreeBSD__ +#include <dev/sym/sym_glue.h> +#else +#include "sym_glue.h" +#endif + +#ifdef SYM_OPT_HANDLE_IO_TIMEOUT +/* + * Optional CCB timeout handling. + * + * This code is useful for O/Ses that allow or expect + * SIMs (low-level drivers) to handle SCSI IO timeouts. + * It uses a power-of-two based algorithm of my own:) + * that avoids scanning of lists, provided that: + * + * - The IO does complete in less than half the associated + * timeout value. + * - The greatest delay between the queuing of the IO and + * its completion is less than + * (1<<(SYM_CONF_TIMEOUT_ORDER_MAX-1))/2 ticks. + * + * For example, if tick is 1 second and the max order is 8, + * any IO that is completed within less than 64 seconds will + * just be put into some list at queuing and be removed + * at completion without any additionnal overhead. + */ + +/* + * Set a timeout condition on a CCB. + */ +void sym_timeout_ccb(hcb_p np, ccb_p cp, u_int ticks) +{ + sym_remque(&cp->tmo_linkq); + cp->tmo_clock = np->tmo_clock + ticks; + if (!ticks) { + sym_insque_head(&cp->tmo_linkq, &np->tmo0_ccbq); + } + else { + int i = SYM_CONF_TIMEOUT_ORDER_MAX - 1; + while (i > 0) { + if (ticks >= (1<<(i+1))) + break; + --i; + } + if (!(np->tmo_actq & (1<<i))) + i += SYM_CONF_TIMEOUT_ORDER_MAX; + sym_insque_head(&cp->tmo_linkq, &np->tmo_ccbq[i]); + } +} + +/* + * Walk a list of CCB and handle timeout conditions. + * Should never be called in normal situations. + */ +static void sym_walk_ccb_tmo_list(hcb_p np, SYM_QUEHEAD *tmoq) +{ + SYM_QUEHEAD qtmp, *qp; + ccb_p cp; + + sym_que_move(tmoq, &qtmp); + while ((qp = sym_remque_head(&qtmp)) != 0) { + sym_insque_head(qp, &np->tmo0_ccbq); + cp = sym_que_entry(qp, struct sym_ccb, tmo_linkq); + if (cp->tmo_clock != np->tmo_clock && + cp->tmo_clock + 1 != np->tmo_clock) + sym_timeout_ccb(np, cp, cp->tmo_clock - np->tmo_clock); + else + sym_abort_ccb(np, cp, 1); + } +} + +/* + * Our clock handler called from the O/S specific side. + */ +void sym_clock(hcb_p np) +{ + int i, j; + u_int tmp; + + tmp = np->tmo_clock; + tmp ^= (++np->tmo_clock); + + for (i = 0; i < SYM_CONF_TIMEOUT_ORDER_MAX; i++, tmp >>= 1) { + if (!(tmp & 1)) + continue; + j = i; + if (np->tmo_actq & (1<<i)) + j += SYM_CONF_TIMEOUT_ORDER_MAX; + + if (!sym_que_empty(&np->tmo_ccbq[j])) { + sym_walk_ccb_tmo_list(np, &np->tmo_ccbq[j]); + } + np->tmo_actq ^= (1<<i); + } +} +#endif /* SYM_OPT_HANDLE_IO_TIMEOUT */ + + +#ifdef SYM_OPT_ANNOUNCE_TRANSFER_RATE +/* + * Announce transfer rate if anything changed since last announcement. + */ +void sym_announce_transfer_rate(hcb_p np, int target) +{ + tcb_p tp = &np->target[target]; + +#define __tprev tp->tinfo.prev +#define __tcurr tp->tinfo.curr + + if (__tprev.options == __tcurr.options && + __tprev.width == __tcurr.width && + __tprev.offset == __tcurr.offset && + !(__tprev.offset && __tprev.period != __tcurr.period)) + return; + + __tprev.options = __tcurr.options; + __tprev.width = __tcurr.width; + __tprev.offset = __tcurr.offset; + __tprev.period = __tcurr.period; + + if (__tcurr.offset && __tcurr.period) { + u_int period, f10, mb10; + char *scsi; + + period = f10 = mb10 = 0; + scsi = "FAST-5"; + + if (__tcurr.period <= 9) { + scsi = "FAST-80"; + period = 125; + mb10 = 1600; + } + else { + if (__tcurr.period <= 11) { + scsi = "FAST-40"; + period = 250; + if (__tcurr.period == 11) + period = 303; + } + else if (__tcurr.period < 25) { + scsi = "FAST-20"; + if (__tcurr.period == 12) + period = 500; + } + else if (__tcurr.period <= 50) { + scsi = "FAST-10"; + } + if (!period) + period = 40 * __tcurr.period; + f10 = 100000 << (__tcurr.width ? 1 : 0); + mb10 = (f10 + period/2) / period; + } + printf_info ( + "%s:%d: %s %sSCSI %d.%d MB/s %s (%d.%d ns, offset %d)\n", + sym_name(np), target, scsi, __tcurr.width? "WIDE " : "", + mb10/10, mb10%10, + (__tcurr.options & PPR_OPT_DT) ? "DT" : "ST", + period/10, period%10, __tcurr.offset); + } + else + printf_info ("%s:%d: %sasynchronous.\n", + sym_name(np), target, __tcurr.width? "wide " : ""); +} +#undef __tprev +#undef __tcurr +#endif /* SYM_OPT_ANNOUNCE_TRANSFER_RATE */ + + +#ifdef SYM_OPT_SNIFF_INQUIRY +/* + * Update transfer settings according to user settings + * and bits sniffed out from INQUIRY response. + */ +void sym_update_trans_settings(hcb_p np, tcb_p tp) +{ + bcopy(&tp->tinfo.user, &tp->tinfo.goal, sizeof(tp->tinfo.goal)); + + if (tp->inq_version >= 4) { + switch(tp->inq_byte56 & INQ56_CLOCKING) { + case INQ56_ST_ONLY: + tp->tinfo.goal.options = 0; + break; + case INQ56_DT_ONLY: + case INQ56_ST_DT: + default: + break; + } + } + + if (!((tp->inq_byte7 & tp->inq_byte7_valid) & INQ7_WIDE16)) { + tp->tinfo.goal.width = 0; + tp->tinfo.goal.options = 0; + } + + if (!((tp->inq_byte7 & tp->inq_byte7_valid) & INQ7_SYNC)) { + tp->tinfo.goal.offset = 0; + tp->tinfo.goal.options = 0; + } + + if (tp->tinfo.goal.options & PPR_OPT_DT) { + if (tp->tinfo.goal.offset > np->maxoffs_dt) + tp->tinfo.goal.offset = np->maxoffs_dt; + } + else { + if (tp->tinfo.goal.offset > np->maxoffs) + tp->tinfo.goal.offset = np->maxoffs; + } +} + +/* + * Snoop target capabilities from INQUIRY response. + * We only believe device versions >= SCSI-2 that use + * appropriate response data format (2). But it seems + * that some CCS devices also support SYNC (?). + */ +int +__sym_sniff_inquiry(hcb_p np, u_char tn, u_char ln, + u_char *inq_data, int inq_len) +{ + tcb_p tp = &np->target[tn]; + u_char inq_version; + u_char inq_byte7; + u_char inq_byte56; + + if (!inq_data || inq_len < 2) + return -1; + + /* + * Check device type and qualifier. + */ + if ((inq_data[0] & 0xe0) == 0x60) + return -1; + + /* + * Get SPC version. + */ + if (inq_len <= 2) + return -1; + inq_version = inq_data[2] & 0x7; + + /* + * Get SYNC/WIDE16 capabilities. + */ + inq_byte7 = tp->inq_byte7; + if (inq_version >= 2 && (inq_data[3] & 0xf) == 2) { + if (inq_len > 7) + inq_byte7 = inq_data[7]; + } + else if (inq_version == 1 && (inq_data[3] & 0xf) == 1) + inq_byte7 = INQ7_SYNC; + + /* + * Get Tagged Command Queuing capability. + */ + if (inq_byte7 & INQ7_CMDQ) + sym_set_bit(tp->cmdq_map, ln); + else + sym_clr_bit(tp->cmdq_map, ln); + inq_byte7 &= ~INQ7_CMDQ; + + /* + * Get CLOCKING capability. + */ + inq_byte56 = tp->inq_byte56; + if (inq_version >= 4 && inq_len > 56) + tp->inq_byte56 = inq_data[56]; +#if 0 +printf("XXXXXX [%d] inq_version=%x inq_byte7=%x inq_byte56=%x XXXXX\n", + inq_len, inq_version, inq_byte7, inq_byte56); +#endif + /* + * Trigger a negotiation if needed. + */ + if (tp->inq_version != inq_version || + tp->inq_byte7 != inq_byte7 || + tp->inq_byte56 != inq_byte56) { + tp->inq_version = inq_version; + tp->inq_byte7 = inq_byte7; + tp->inq_byte56 = inq_byte56; + return 1; + } + return 0; +} +#endif /* SYM_OPT_SNIFF_INQUIRY */ diff --git a/drivers/scsi/sym53c8xx_2/sym_misc.h b/drivers/scsi/sym53c8xx_2/sym_misc.h new file mode 100644 index 000000000000..3a4b41f13907 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_misc.h @@ -0,0 +1,311 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SYM_MISC_H +#define SYM_MISC_H + +/* + * A 'read barrier' flushes any data that have been prefetched + * by the processor due to out of order execution. Such a barrier + * must notably be inserted prior to looking at data that have + * been DMAed, assuming that program does memory READs in proper + * order and that the device ensured proper ordering of WRITEs. + * + * A 'write barrier' prevents any previous WRITEs to pass further + * WRITEs. Such barriers must be inserted each time another agent + * relies on ordering of WRITEs. + * + * Note that, due to posting of PCI memory writes, we also must + * insert dummy PCI read transactions when some ordering involving + * both directions over the PCI does matter. PCI transactions are + * fully ordered in each direction. + * + * IA32 processors insert implicit barriers when the processor + * accesses unchacheable either for reading or writing, and + * donnot reorder WRITEs. As a result, some 'read barriers' can + * be avoided (following access to uncacheable), and 'write + * barriers' should be useless (preventing compiler optimizations + * should be enough). + */ + +#if defined __i386__ +#define __READ_BARRIER() \ + __asm__ volatile("lock; addl $0,0(%%esp)": : :"memory") +#define __WRITE_BARRIER() __asm__ volatile ("": : :"memory") +#elif defined __powerpc__ +#define __READ_BARRIER() __asm__ volatile("eieio; sync" : : : "memory") +#define __WRITE_BARRIER() __asm__ volatile("eieio; sync" : : : "memory") +#elif defined __ia64__ +#define __READ_BARRIER() __asm__ volatile("mf.a; mf" : : : "memory") +#define __WRITE_BARRIER() __asm__ volatile("mf.a; mf" : : : "memory") +#elif defined __alpha__ +#define __READ_BARRIER() __asm__ volatile("mb": : :"memory") +#define __WRITE_BARRIER() __asm__ volatile("mb": : :"memory") +#else +#define __READ_BARRIER() mb() +#define __WRITE_BARRIER() mb() +#endif + +#ifndef MEMORY_READ_BARRIER +#define MEMORY_READ_BARRIER() __READ_BARRIER() +#endif +#ifndef MEMORY_WRITE_BARRIER +#define MEMORY_WRITE_BARRIER() __WRITE_BARRIER() +#endif + + +/* + * A la VMS/CAM-3 queue management. + */ +typedef struct sym_quehead { + struct sym_quehead *flink; /* Forward pointer */ + struct sym_quehead *blink; /* Backward pointer */ +} SYM_QUEHEAD; + +#define sym_que_init(ptr) do { \ + (ptr)->flink = (ptr); (ptr)->blink = (ptr); \ +} while (0) + +static __inline struct sym_quehead *sym_que_first(struct sym_quehead *head) +{ + return (head->flink == head) ? 0 : head->flink; +} + +static __inline struct sym_quehead *sym_que_last(struct sym_quehead *head) +{ + return (head->blink == head) ? 0 : head->blink; +} + +static __inline void __sym_que_add(struct sym_quehead * new, + struct sym_quehead * blink, + struct sym_quehead * flink) +{ + flink->blink = new; + new->flink = flink; + new->blink = blink; + blink->flink = new; +} + +static __inline void __sym_que_del(struct sym_quehead * blink, + struct sym_quehead * flink) +{ + flink->blink = blink; + blink->flink = flink; +} + +static __inline int sym_que_empty(struct sym_quehead *head) +{ + return head->flink == head; +} + +static __inline void sym_que_splice(struct sym_quehead *list, + struct sym_quehead *head) +{ + struct sym_quehead *first = list->flink; + + if (first != list) { + struct sym_quehead *last = list->blink; + struct sym_quehead *at = head->flink; + + first->blink = head; + head->flink = first; + + last->flink = at; + at->blink = last; + } +} + +static __inline void sym_que_move(struct sym_quehead *orig, + struct sym_quehead *dest) +{ + struct sym_quehead *first, *last; + + first = orig->flink; + if (first != orig) { + first->blink = dest; + dest->flink = first; + last = orig->blink; + last->flink = dest; + dest->blink = last; + orig->flink = orig; + orig->blink = orig; + } else { + dest->flink = dest; + dest->blink = dest; + } +} + +#define sym_que_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned int)(&((type *)0)->member))) + + +#define sym_insque(new, pos) __sym_que_add(new, pos, (pos)->flink) + +#define sym_remque(el) __sym_que_del((el)->blink, (el)->flink) + +#define sym_insque_head(new, head) __sym_que_add(new, head, (head)->flink) + +static __inline struct sym_quehead *sym_remque_head(struct sym_quehead *head) +{ + struct sym_quehead *elem = head->flink; + + if (elem != head) + __sym_que_del(head, elem->flink); + else + elem = 0; + return elem; +} + +#define sym_insque_tail(new, head) __sym_que_add(new, (head)->blink, head) + +static __inline struct sym_quehead *sym_remque_tail(struct sym_quehead *head) +{ + struct sym_quehead *elem = head->blink; + + if (elem != head) + __sym_que_del(elem->blink, head); + else + elem = 0; + return elem; +} + +/* + * This one may be useful. + */ +#define FOR_EACH_QUEUED_ELEMENT(head, qp) \ + for (qp = (head)->flink; qp != (head); qp = qp->flink) +/* + * FreeBSD does not offer our kind of queue in the CAM CCB. + * So, we have to cast. + */ +#define sym_qptr(p) ((struct sym_quehead *) (p)) + +/* + * Simple bitmap operations. + */ +#define sym_set_bit(p, n) (((u32 *)(p))[(n)>>5] |= (1<<((n)&0x1f))) +#define sym_clr_bit(p, n) (((u32 *)(p))[(n)>>5] &= ~(1<<((n)&0x1f))) +#define sym_is_bit(p, n) (((u32 *)(p))[(n)>>5] & (1<<((n)&0x1f))) + +/* + * Portable but silly implemented byte order primitives. + */ +#if BYTE_ORDER == BIG_ENDIAN + +#define __revb16(x) ( (((u16)(x) & (u16)0x00ffU) << 8) | \ + (((u16)(x) & (u16)0xff00U) >> 8) ) +#define __revb32(x) ( (((u32)(x) & 0x000000ffU) << 24) | \ + (((u32)(x) & 0x0000ff00U) << 8) | \ + (((u32)(x) & 0x00ff0000U) >> 8) | \ + (((u32)(x) & 0xff000000U) >> 24) ) + +#define __htole16(v) __revb16(v) +#define __htole32(v) __revb32(v) +#define __le16toh(v) __htole16(v) +#define __le32toh(v) __htole32(v) + +static __inline u16 _htole16(u16 v) { return __htole16(v); } +static __inline u32 _htole32(u32 v) { return __htole32(v); } +#define _le16toh _htole16 +#define _le32toh _htole32 + +#else /* LITTLE ENDIAN */ + +#define __htole16(v) (v) +#define __htole32(v) (v) +#define __le16toh(v) (v) +#define __le32toh(v) (v) + +#define _htole16(v) (v) +#define _htole32(v) (v) +#define _le16toh(v) (v) +#define _le32toh(v) (v) + +#endif /* BYTE_ORDER */ + +/* + * The below round up/down macros are to be used with a constant + * as argument (sizeof(...) for example), for the compiler to + * optimize the whole thing. + */ +#define _U_(a,m) (a)<=(1<<m)?m: +#define _D_(a,m) (a)<(1<<(m+1))?m: + +/* + * Round up logarithm to base 2 of a 16 bit constant. + */ +#define _LGRU16_(a) \ +( \ + _U_(a, 0)_U_(a, 1)_U_(a, 2)_U_(a, 3)_U_(a, 4)_U_(a, 5)_U_(a, 6)_U_(a, 7) \ + _U_(a, 8)_U_(a, 9)_U_(a,10)_U_(a,11)_U_(a,12)_U_(a,13)_U_(a,14)_U_(a,15) \ + 16) + +/* + * Round down logarithm to base 2 of a 16 bit constant. + */ +#define _LGRD16_(a) \ +( \ + _D_(a, 0)_D_(a, 1)_D_(a, 2)_D_(a, 3)_D_(a, 4)_D_(a, 5)_D_(a, 6)_D_(a, 7) \ + _D_(a, 8)_D_(a, 9)_D_(a,10)_D_(a,11)_D_(a,12)_D_(a,13)_D_(a,14)_D_(a,15) \ + 16) + +/* + * Round up a 16 bit constant to the nearest power of 2. + */ +#define _SZRU16_(a) ((a)==0?0:(1<<_LGRU16_(a))) + +/* + * Round down a 16 bit constant to the nearest power of 2. + */ +#define _SZRD16_(a) ((a)==0?0:(1<<_LGRD16_(a))) + +#endif /* SYM_MISC_H */ diff --git a/drivers/scsi/sym53c8xx_2/sym_nvram.c b/drivers/scsi/sym53c8xx_2/sym_nvram.c new file mode 100644 index 000000000000..66ce54ffe657 --- /dev/null +++ b/drivers/scsi/sym53c8xx_2/sym_nvram.c @@ -0,0 +1,730 @@ +/* + * Device driver for the SYMBIOS/LSILOGIC 53C8XX and 53C1010 family + * of PCI-SCSI IO processors. + * + * Copyright (C) 1999-2001 Gerard Roudier <groudier@free.fr> + * + * This driver is derived from the Linux sym53c8xx driver. + * Copyright (C) 1998-2000 Gerard Roudier + * + * The sym53c8xx driver is derived from the ncr53c8xx driver that had been + * a port of the FreeBSD ncr driver to Linux-1.2.13. + * + * The original ncr driver has been written for 386bsd and FreeBSD by + * Wolfgang Stanglmeier <wolf@cologne.de> + * Stefan Esser <se@mi.Uni-Koeln.de> + * Copyright (C) 1994 Wolfgang Stanglmeier + * + * Other major contributions: + * + * NVRAM detection and reading. + * Copyright (C) 1997 Richard Waltham <dormouse@farsrobt.demon.co.uk> + * + *----------------------------------------------------------------------------- + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Where this Software is combined with software released under the terms of + * the GNU Public License ("GPL") and the terms of the GPL would require the + * combined work to also be released under the terms of the GPL, the terms + * and conditions of this License will apply in addition to those of the + * GPL with the exception of any terms or conditions of this License that + * conflict with, or are expressly prohibited by, the GPL. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __FreeBSD__ +#include <dev/sym/sym_glue.h> +#else +#include "sym_glue.h" +#endif + +/* + * Some poor and bogus sync table that refers to Tekram NVRAM layout. + */ +#if SYM_CONF_NVRAM_SUPPORT +static u_char Tekram_sync[16] = + {25,31,37,43, 50,62,75,125, 12,15,18,21, 6,7,9,10}; +#ifdef SYM_CONF_DEBUG_NVRAM +static u_char Tekram_boot_delay[7] = {3, 5, 10, 20, 30, 60, 120}; +#endif +#endif + +/* + * Get host setup from NVRAM. + */ +void sym_nvram_setup_host (hcb_p np, struct sym_nvram *nvram) +{ +#if SYM_CONF_NVRAM_SUPPORT + /* + * Get parity checking, host ID, verbose mode + * and miscellaneous host flags from NVRAM. + */ + switch(nvram->type) { + case SYM_SYMBIOS_NVRAM: + if (!(nvram->data.Symbios.flags & SYMBIOS_PARITY_ENABLE)) + np->rv_scntl0 &= ~0x0a; + np->myaddr = nvram->data.Symbios.host_id & 0x0f; + if (nvram->data.Symbios.flags & SYMBIOS_VERBOSE_MSGS) + np->verbose += 1; + if (nvram->data.Symbios.flags1 & SYMBIOS_SCAN_HI_LO) + np->usrflags |= SYM_SCAN_TARGETS_HILO; + if (nvram->data.Symbios.flags2 & SYMBIOS_AVOID_BUS_RESET) + np->usrflags |= SYM_AVOID_BUS_RESET; + break; + case SYM_TEKRAM_NVRAM: + np->myaddr = nvram->data.Tekram.host_id & 0x0f; + break; + default: + break; + } +#endif +} + +/* + * Get target setup from NVRAM. + */ +#if SYM_CONF_NVRAM_SUPPORT +static void sym_Symbios_setup_target(hcb_p np,int target, Symbios_nvram *nvram); +static void sym_Tekram_setup_target(hcb_p np,int target, Tekram_nvram *nvram); +#endif + +void sym_nvram_setup_target (hcb_p np, int target, struct sym_nvram *nvp) +{ +#if SYM_CONF_NVRAM_SUPPORT + switch(nvp->type) { + case SYM_SYMBIOS_NVRAM: + sym_Symbios_setup_target (np, target, &nvp->data.Symbios); + break; + case SYM_TEKRAM_NVRAM: + sym_Tekram_setup_target (np, target, &nvp->data.Tekram); + break; + default: + break; + } +#endif +} + +#if SYM_CONF_NVRAM_SUPPORT +/* + * Get target set-up from Symbios format NVRAM. + */ +static void +sym_Symbios_setup_target(hcb_p np, int target, Symbios_nvram *nvram) +{ + tcb_p tp = &np->target[target]; + Symbios_target *tn = &nvram->target[target]; + + tp->tinfo.user.period = tn->sync_period ? (tn->sync_period + 3) / 4 : 0; + tp->tinfo.user.width = tn->bus_width == 0x10 ? BUS_16_BIT : BUS_8_BIT; + tp->usrtags = + (tn->flags & SYMBIOS_QUEUE_TAGS_ENABLED)? SYM_SETUP_MAX_TAG : 0; + + if (!(tn->flags & SYMBIOS_DISCONNECT_ENABLE)) + tp->usrflags &= ~SYM_DISC_ENABLED; + if (!(tn->flags & SYMBIOS_SCAN_AT_BOOT_TIME)) + tp->usrflags |= SYM_SCAN_BOOT_DISABLED; + if (!(tn->flags & SYMBIOS_SCAN_LUNS)) + tp->usrflags |= SYM_SCAN_LUNS_DISABLED; +} + +/* + * Get target set-up from Tekram format NVRAM. + */ +static void +sym_Tekram_setup_target(hcb_p np, int target, Tekram_nvram *nvram) +{ + tcb_p tp = &np->target[target]; + struct Tekram_target *tn = &nvram->target[target]; + int i; + + if (tn->flags & TEKRAM_SYNC_NEGO) { + i = tn->sync_index & 0xf; + tp->tinfo.user.period = Tekram_sync[i]; + } + + tp->tinfo.user.width = + (tn->flags & TEKRAM_WIDE_NEGO) ? BUS_16_BIT : BUS_8_BIT; + + if (tn->flags & TEKRAM_TAGGED_COMMANDS) { + tp->usrtags = 2 << nvram->max_tags_index; + } + + if (tn->flags & TEKRAM_DISCONNECT_ENABLE) + tp->usrflags |= SYM_DISC_ENABLED; + + /* If any device does not support parity, we will not use this option */ + if (!(tn->flags & TEKRAM_PARITY_CHECK)) + np->rv_scntl0 &= ~0x0a; /* SCSI parity checking disabled */ +} + +#ifdef SYM_CONF_DEBUG_NVRAM +/* + * Dump Symbios format NVRAM for debugging purpose. + */ +static void sym_display_Symbios_nvram(sdev_p np, Symbios_nvram *nvram) +{ + int i; + + /* display Symbios nvram host data */ + printf("%s: HOST ID=%d%s%s%s%s%s%s\n", + sym_name(np), nvram->host_id & 0x0f, + (nvram->flags & SYMBIOS_SCAM_ENABLE) ? " SCAM" :"", + (nvram->flags & SYMBIOS_PARITY_ENABLE) ? " PARITY" :"", + (nvram->flags & SYMBIOS_VERBOSE_MSGS) ? " VERBOSE" :"", + (nvram->flags & SYMBIOS_CHS_MAPPING) ? " CHS_ALT" :"", + (nvram->flags2 & SYMBIOS_AVOID_BUS_RESET)?" NO_RESET" :"", + (nvram->flags1 & SYMBIOS_SCAN_HI_LO) ? " HI_LO" :""); + + /* display Symbios nvram drive data */ + for (i = 0 ; i < 15 ; i++) { + struct Symbios_target *tn = &nvram->target[i]; + printf("%s-%d:%s%s%s%s WIDTH=%d SYNC=%d TMO=%d\n", + sym_name(np), i, + (tn->flags & SYMBIOS_DISCONNECT_ENABLE) ? " DISC" : "", + (tn->flags & SYMBIOS_SCAN_AT_BOOT_TIME) ? " SCAN_BOOT" : "", + (tn->flags & SYMBIOS_SCAN_LUNS) ? " SCAN_LUNS" : "", + (tn->flags & SYMBIOS_QUEUE_TAGS_ENABLED)? " TCQ" : "", + tn->bus_width, + tn->sync_period / 4, + tn->timeout); + } +} + +/* + * Dump TEKRAM format NVRAM for debugging purpose. + */ +static void sym_display_Tekram_nvram(sdev_p np, Tekram_nvram *nvram) +{ + int i, tags, boot_delay; + char *rem; + + /* display Tekram nvram host data */ + tags = 2 << nvram->max_tags_index; + boot_delay = 0; + if (nvram->boot_delay_index < 6) + boot_delay = Tekram_boot_delay[nvram->boot_delay_index]; + switch((nvram->flags & TEKRAM_REMOVABLE_FLAGS) >> 6) { + default: + case 0: rem = ""; break; + case 1: rem = " REMOVABLE=boot device"; break; + case 2: rem = " REMOVABLE=all"; break; + } + + printf("%s: HOST ID=%d%s%s%s%s%s%s%s%s%s BOOT DELAY=%d tags=%d\n", + sym_name(np), nvram->host_id & 0x0f, + (nvram->flags1 & SYMBIOS_SCAM_ENABLE) ? " SCAM" :"", + (nvram->flags & TEKRAM_MORE_THAN_2_DRIVES) ? " >2DRIVES":"", + (nvram->flags & TEKRAM_DRIVES_SUP_1GB) ? " >1GB" :"", + (nvram->flags & TEKRAM_RESET_ON_POWER_ON) ? " RESET" :"", + (nvram->flags & TEKRAM_ACTIVE_NEGATION) ? " ACT_NEG" :"", + (nvram->flags & TEKRAM_IMMEDIATE_SEEK) ? " IMM_SEEK" :"", + (nvram->flags & TEKRAM_SCAN_LUNS) ? " SCAN_LUNS" :"", + (nvram->flags1 & TEKRAM_F2_F6_ENABLED) ? " F2_F6" :"", + rem, boot_delay, tags); + + /* display Tekram nvram drive data */ + for (i = 0; i <= 15; i++) { + int sync, j; + struct Tekram_target *tn = &nvram->target[i]; + j = tn->sync_index & 0xf; + sync = Tekram_sync[j]; + printf("%s-%d:%s%s%s%s%s%s PERIOD=%d\n", + sym_name(np), i, + (tn->flags & TEKRAM_PARITY_CHECK) ? " PARITY" : "", + (tn->flags & TEKRAM_SYNC_NEGO) ? " SYNC" : "", + (tn->flags & TEKRAM_DISCONNECT_ENABLE) ? " DISC" : "", + (tn->flags & TEKRAM_START_CMD) ? " START" : "", + (tn->flags & TEKRAM_TAGGED_COMMANDS) ? " TCQ" : "", + (tn->flags & TEKRAM_WIDE_NEGO) ? " WIDE" : "", + sync); + } +} +#endif /* SYM_CONF_DEBUG_NVRAM */ +#endif /* SYM_CONF_NVRAM_SUPPORT */ + + +/* + * Try reading Symbios or Tekram NVRAM + */ +#if SYM_CONF_NVRAM_SUPPORT +static int sym_read_Symbios_nvram (sdev_p np, Symbios_nvram *nvram); +static int sym_read_Tekram_nvram (sdev_p np, Tekram_nvram *nvram); +#endif + +int sym_read_nvram (sdev_p np, struct sym_nvram *nvp) +{ +#if SYM_CONF_NVRAM_SUPPORT + /* + * Try to read SYMBIOS nvram. + * Try to read TEKRAM nvram if Symbios nvram not found. + */ + if (SYM_SETUP_SYMBIOS_NVRAM && + !sym_read_Symbios_nvram (np, &nvp->data.Symbios)) { + nvp->type = SYM_SYMBIOS_NVRAM; +#ifdef SYM_CONF_DEBUG_NVRAM + sym_display_Symbios_nvram(np, &nvp->data.Symbios); +#endif + } + else if (SYM_SETUP_TEKRAM_NVRAM && + !sym_read_Tekram_nvram (np, &nvp->data.Tekram)) { + nvp->type = SYM_TEKRAM_NVRAM; +#ifdef SYM_CONF_DEBUG_NVRAM + sym_display_Tekram_nvram(np, &nvp->data.Tekram); +#endif + } + else + nvp->type = 0; +#else + nvp->type = 0; +#endif + return nvp->type; +} + + +#if SYM_CONF_NVRAM_SUPPORT +/* + * 24C16 EEPROM reading. + * + * GPOI0 - data in/data out + * GPIO1 - clock + * Symbios NVRAM wiring now also used by Tekram. + */ + +#define SET_BIT 0 +#define CLR_BIT 1 +#define SET_CLK 2 +#define CLR_CLK 3 + +/* + * Set/clear data/clock bit in GPIO0 + */ +static void S24C16_set_bit(sdev_p np, u_char write_bit, u_char *gpreg, + int bit_mode) +{ + UDELAY (5); + switch (bit_mode){ + case SET_BIT: + *gpreg |= write_bit; + break; + case CLR_BIT: + *gpreg &= 0xfe; + break; + case SET_CLK: + *gpreg |= 0x02; + break; + case CLR_CLK: + *gpreg &= 0xfd; + break; + + } + OUTB (nc_gpreg, *gpreg); + UDELAY (5); +} + +/* + * Send START condition to NVRAM to wake it up. + */ +static void S24C16_start(sdev_p np, u_char *gpreg) +{ + S24C16_set_bit(np, 1, gpreg, SET_BIT); + S24C16_set_bit(np, 0, gpreg, SET_CLK); + S24C16_set_bit(np, 0, gpreg, CLR_BIT); + S24C16_set_bit(np, 0, gpreg, CLR_CLK); +} + +/* + * Send STOP condition to NVRAM - puts NVRAM to sleep... ZZzzzz!! + */ +static void S24C16_stop(sdev_p np, u_char *gpreg) +{ + S24C16_set_bit(np, 0, gpreg, SET_CLK); + S24C16_set_bit(np, 1, gpreg, SET_BIT); +} + +/* + * Read or write a bit to the NVRAM, + * read if GPIO0 input else write if GPIO0 output + */ +static void S24C16_do_bit(sdev_p np, u_char *read_bit, u_char write_bit, + u_char *gpreg) +{ + S24C16_set_bit(np, write_bit, gpreg, SET_BIT); + S24C16_set_bit(np, 0, gpreg, SET_CLK); + if (read_bit) + *read_bit = INB (nc_gpreg); + S24C16_set_bit(np, 0, gpreg, CLR_CLK); + S24C16_set_bit(np, 0, gpreg, CLR_BIT); +} + +/* + * Output an ACK to the NVRAM after reading, + * change GPIO0 to output and when done back to an input + */ +static void S24C16_write_ack(sdev_p np, u_char write_bit, u_char *gpreg, + u_char *gpcntl) +{ + OUTB (nc_gpcntl, *gpcntl & 0xfe); + S24C16_do_bit(np, 0, write_bit, gpreg); + OUTB (nc_gpcntl, *gpcntl); +} + +/* + * Input an ACK from NVRAM after writing, + * change GPIO0 to input and when done back to an output + */ +static void S24C16_read_ack(sdev_p np, u_char *read_bit, u_char *gpreg, + u_char *gpcntl) +{ + OUTB (nc_gpcntl, *gpcntl | 0x01); + S24C16_do_bit(np, read_bit, 1, gpreg); + OUTB (nc_gpcntl, *gpcntl); +} + +/* + * WRITE a byte to the NVRAM and then get an ACK to see it was accepted OK, + * GPIO0 must already be set as an output + */ +static void S24C16_write_byte(sdev_p np, u_char *ack_data, u_char write_data, + u_char *gpreg, u_char *gpcntl) +{ + int x; + + for (x = 0; x < 8; x++) + S24C16_do_bit(np, 0, (write_data >> (7 - x)) & 0x01, gpreg); + + S24C16_read_ack(np, ack_data, gpreg, gpcntl); +} + +/* + * READ a byte from the NVRAM and then send an ACK to say we have got it, + * GPIO0 must already be set as an input + */ +static void S24C16_read_byte(sdev_p np, u_char *read_data, u_char ack_data, + u_char *gpreg, u_char *gpcntl) +{ + int x; + u_char read_bit; + + *read_data = 0; + for (x = 0; x < 8; x++) { + S24C16_do_bit(np, &read_bit, 1, gpreg); + *read_data |= ((read_bit & 0x01) << (7 - x)); + } + + S24C16_write_ack(np, ack_data, gpreg, gpcntl); +} + +/* + * Read 'len' bytes starting at 'offset'. + */ +static int sym_read_S24C16_nvram (sdev_p np, int offset, u_char *data, int len) +{ + u_char gpcntl, gpreg; + u_char old_gpcntl, old_gpreg; + u_char ack_data; + int retv = 1; + int x; + + /* save current state of GPCNTL and GPREG */ + old_gpreg = INB (nc_gpreg); + old_gpcntl = INB (nc_gpcntl); + gpcntl = old_gpcntl & 0x1c; + + /* set up GPREG & GPCNTL to set GPIO0 and GPIO1 in to known state */ + OUTB (nc_gpreg, old_gpreg); + OUTB (nc_gpcntl, gpcntl); + + /* this is to set NVRAM into a known state with GPIO0/1 both low */ + gpreg = old_gpreg; + S24C16_set_bit(np, 0, &gpreg, CLR_CLK); + S24C16_set_bit(np, 0, &gpreg, CLR_BIT); + + /* now set NVRAM inactive with GPIO0/1 both high */ + S24C16_stop(np, &gpreg); + + /* activate NVRAM */ + S24C16_start(np, &gpreg); + + /* write device code and random address MSB */ + S24C16_write_byte(np, &ack_data, + 0xa0 | ((offset >> 7) & 0x0e), &gpreg, &gpcntl); + if (ack_data & 0x01) + goto out; + + /* write random address LSB */ + S24C16_write_byte(np, &ack_data, + offset & 0xff, &gpreg, &gpcntl); + if (ack_data & 0x01) + goto out; + + /* regenerate START state to set up for reading */ + S24C16_start(np, &gpreg); + + /* rewrite device code and address MSB with read bit set (lsb = 0x01) */ + S24C16_write_byte(np, &ack_data, + 0xa1 | ((offset >> 7) & 0x0e), &gpreg, &gpcntl); + if (ack_data & 0x01) + goto out; + + /* now set up GPIO0 for inputting data */ + gpcntl |= 0x01; + OUTB (nc_gpcntl, gpcntl); + + /* input all requested data - only part of total NVRAM */ + for (x = 0; x < len; x++) + S24C16_read_byte(np, &data[x], (x == (len-1)), &gpreg, &gpcntl); + + /* finally put NVRAM back in inactive mode */ + gpcntl &= 0xfe; + OUTB (nc_gpcntl, gpcntl); + S24C16_stop(np, &gpreg); + retv = 0; +out: + /* return GPIO0/1 to original states after having accessed NVRAM */ + OUTB (nc_gpcntl, old_gpcntl); + OUTB (nc_gpreg, old_gpreg); + + return retv; +} + +#undef SET_BIT 0 +#undef CLR_BIT 1 +#undef SET_CLK 2 +#undef CLR_CLK 3 + +/* + * Try reading Symbios NVRAM. + * Return 0 if OK. + */ +static int sym_read_Symbios_nvram (sdev_p np, Symbios_nvram *nvram) +{ + static u_char Symbios_trailer[6] = {0xfe, 0xfe, 0, 0, 0, 0}; + u_char *data = (u_char *) nvram; + int len = sizeof(*nvram); + u_short csum; + int x; + + /* probe the 24c16 and read the SYMBIOS 24c16 area */ + if (sym_read_S24C16_nvram (np, SYMBIOS_NVRAM_ADDRESS, data, len)) + return 1; + + /* check valid NVRAM signature, verify byte count and checksum */ + if (nvram->type != 0 || + bcmp(nvram->trailer, Symbios_trailer, 6) || + nvram->byte_count != len - 12) + return 1; + + /* verify checksum */ + for (x = 6, csum = 0; x < len - 6; x++) + csum += data[x]; + if (csum != nvram->checksum) + return 1; + + return 0; +} + +/* + * 93C46 EEPROM reading. + * + * GPOI0 - data in + * GPIO1 - data out + * GPIO2 - clock + * GPIO4 - chip select + * + * Used by Tekram. + */ + +/* + * Pulse clock bit in GPIO0 + */ +static void T93C46_Clk(sdev_p np, u_char *gpreg) +{ + OUTB (nc_gpreg, *gpreg | 0x04); + UDELAY (2); + OUTB (nc_gpreg, *gpreg); +} + +/* + * Read bit from NVRAM + */ +static void T93C46_Read_Bit(sdev_p np, u_char *read_bit, u_char *gpreg) +{ + UDELAY (2); + T93C46_Clk(np, gpreg); + *read_bit = INB (nc_gpreg); +} + +/* + * Write bit to GPIO0 + */ +static void T93C46_Write_Bit(sdev_p np, u_char write_bit, u_char *gpreg) +{ + if (write_bit & 0x01) + *gpreg |= 0x02; + else + *gpreg &= 0xfd; + + *gpreg |= 0x10; + + OUTB (nc_gpreg, *gpreg); + UDELAY (2); + + T93C46_Clk(np, gpreg); +} + +/* + * Send STOP condition to NVRAM - puts NVRAM to sleep... ZZZzzz!! + */ +static void T93C46_Stop(sdev_p np, u_char *gpreg) +{ + *gpreg &= 0xef; + OUTB (nc_gpreg, *gpreg); + UDELAY (2); + + T93C46_Clk(np, gpreg); +} + +/* + * Send read command and address to NVRAM + */ +static void T93C46_Send_Command(sdev_p np, u_short write_data, + u_char *read_bit, u_char *gpreg) +{ + int x; + + /* send 9 bits, start bit (1), command (2), address (6) */ + for (x = 0; x < 9; x++) + T93C46_Write_Bit(np, (u_char) (write_data >> (8 - x)), gpreg); + + *read_bit = INB (nc_gpreg); +} + +/* + * READ 2 bytes from the NVRAM + */ +static void T93C46_Read_Word(sdev_p np, u_short *nvram_data, u_char *gpreg) +{ + int x; + u_char read_bit; + + *nvram_data = 0; + for (x = 0; x < 16; x++) { + T93C46_Read_Bit(np, &read_bit, gpreg); + + if (read_bit & 0x01) + *nvram_data |= (0x01 << (15 - x)); + else + *nvram_data &= ~(0x01 << (15 - x)); + } +} + +/* + * Read Tekram NvRAM data. + */ +static int T93C46_Read_Data(sdev_p np, u_short *data,int len,u_char *gpreg) +{ + u_char read_bit; + int x; + + for (x = 0; x < len; x++) { + + /* output read command and address */ + T93C46_Send_Command(np, 0x180 | x, &read_bit, gpreg); + if (read_bit & 0x01) + return 1; /* Bad */ + T93C46_Read_Word(np, &data[x], gpreg); + T93C46_Stop(np, gpreg); + } + + return 0; +} + +/* + * Try reading 93C46 Tekram NVRAM. + */ +static int sym_read_T93C46_nvram (sdev_p np, Tekram_nvram *nvram) +{ + u_char gpcntl, gpreg; + u_char old_gpcntl, old_gpreg; + int retv = 1; + + /* save current state of GPCNTL and GPREG */ + old_gpreg = INB (nc_gpreg); + old_gpcntl = INB (nc_gpcntl); + + /* set up GPREG & GPCNTL to set GPIO0/1/2/4 in to known state, 0 in, + 1/2/4 out */ + gpreg = old_gpreg & 0xe9; + OUTB (nc_gpreg, gpreg); + gpcntl = (old_gpcntl & 0xe9) | 0x09; + OUTB (nc_gpcntl, gpcntl); + + /* input all of NVRAM, 64 words */ + retv = T93C46_Read_Data(np, (u_short *) nvram, + sizeof(*nvram) / sizeof(short), &gpreg); + + /* return GPIO0/1/2/4 to original states after having accessed NVRAM */ + OUTB (nc_gpcntl, old_gpcntl); + OUTB (nc_gpreg, old_gpreg); + + return retv; +} + +/* + * Try reading Tekram NVRAM. + * Return 0 if OK. + */ +static int sym_read_Tekram_nvram (sdev_p np, Tekram_nvram *nvram) +{ + u_char *data = (u_char *) nvram; + int len = sizeof(*nvram); + u_short csum; + int x; + + switch (np->device_id) { + case PCI_ID_SYM53C885: + case PCI_ID_SYM53C895: + case PCI_ID_SYM53C896: + x = sym_read_S24C16_nvram(np, TEKRAM_24C16_NVRAM_ADDRESS, + data, len); + break; + case PCI_ID_SYM53C875: + x = sym_read_S24C16_nvram(np, TEKRAM_24C16_NVRAM_ADDRESS, + data, len); + if (!x) + break; + default: + x = sym_read_T93C46_nvram(np, nvram); + break; + } + if (x) + return 1; + + /* verify checksum */ + for (x = 0, csum = 0; x < len - 1; x += 2) + csum += data[x] + (data[x+1] << 8); + if (csum != 0x1234) + return 1; + + return 0; +} + +#endif /* SYM_CONF_NVRAM_SUPPORT */ diff --git a/drivers/sound/ad1816.c b/drivers/sound/ad1816.c index 501bc1bbe6be..8e86be1b6762 100644 --- a/drivers/sound/ad1816.c +++ b/drivers/sound/ad1816.c @@ -28,6 +28,8 @@ * Christoph Hellwig: Adapted to module_init/module_exit. 2000/03/03 * * Christoph Hellwig: Added isapnp support 2000/03/15 + * + * Arnaldo Carvalho de Melo: get rid of check_region 2001/10/07 */ #include <linux/config.h> @@ -48,7 +50,7 @@ timeout--; \ } \ if (timeout==0) {\ - printk("ad1816: Check for power failed in %s line: %d\n",__FILE__,__LINE__); \ + printk(KERN_WARNING "ad1816: Check for power failed in %s line: %d\n",__FILE__,__LINE__); \ } \ } @@ -78,9 +80,9 @@ typedef struct } ad1816_info; -static int nr_ad1816_devs = 0; -static int ad1816_clockfreq=33000; -static int options=0; +static int nr_ad1816_devs; +static int ad1816_clockfreq = 33000; +static int options; /* for backward mapping of irq to sound device */ @@ -558,14 +560,15 @@ static void ad1816_interrupt (int irq, void *dev_id, struct pt_regs *dummy) if (irq < 0 || irq > 15) { - printk ("ad1816: Got bogus interrupt %d\n", irq); + printk(KERN_WARNING "ad1816: Got bogus interrupt %d\n", irq); return; } dev = irq2dev[irq]; if (dev < 0 || dev >= num_audiodevs) { - printk ("ad1816: IRQ2AD1816-mapping failed for irq %d device %d\n", irq,dev); + printk(KERN_WARNING "ad1816: IRQ2AD1816-mapping failed for " + "irq %d device %d\n", irq,dev); return; } @@ -1000,8 +1003,10 @@ static int __init probe_ad1816 ( struct address_info *hw_config ) int *osp=hw_config->osp; int tmp; - printk("ad1816: AD1816 sounddriver Copyright (C) 1998 by Thorsten Knabe\n"); - printk("ad1816: io=0x%x, irq=%d, dma=%d, dma2=%d, clockfreq=%d, options=%d isadmabug=%d\n", + printk(KERN_INFO "ad1816: AD1816 sounddriver " + "Copyright (C) 1998 by Thorsten Knabe\n"); + printk(KERN_INFO "ad1816: io=0x%x, irq=%d, dma=%d, dma2=%d, " + "clockfreq=%d, options=%d isadmabug=%d\n", hw_config->io_base, hw_config->irq, hw_config->dma, @@ -1010,16 +1015,17 @@ static int __init probe_ad1816 ( struct address_info *hw_config ) options, isa_dma_bridge_buggy); - if (check_region (io_base, 16)) { - printk ("ad1816: I/O port 0x%03x not free\n", io_base); - return 0; + if (!request_region(io_base, 16, "AD1816 Sound")) { + printk(KERN_WARNING "ad1816: I/O port 0x%03x not free\n", + io_base); + goto err; } DEBUGLOG(printk ("ad1816: detect(%x)\n", io_base)); if (nr_ad1816_devs >= MAX_AUDIO_DEV) { - printk ("ad1816: detect error - step 0\n"); - return 0; + printk(KERN_WARNING "ad1816: detect error - step 0\n"); + goto out_release_region; } devc->base = io_base; @@ -1032,7 +1038,7 @@ static int __init probe_ad1816 ( struct address_info *hw_config ) tmp=inb(devc->base); if ( (tmp&0x80)==0 || tmp==255 ) { DEBUGLOG (printk ("ad1816: Chip is not an AD1816 or chip is not active (Test 0)\n")); - return(0); + goto out_release_region; } @@ -1040,14 +1046,14 @@ static int __init probe_ad1816 ( struct address_info *hw_config ) ad_write(devc,8,12345); if (ad_read(devc,9)!=12345) { DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 1)\n")); - return(0); + goto out_release_region; } /* writes to ireg 8 are copied to ireg 9 */ ad_write(devc,8,54321); if (ad_read(devc,9)!=54321) { DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 2)\n")); - return(0); + goto out_release_region; } @@ -1055,14 +1061,14 @@ static int __init probe_ad1816 ( struct address_info *hw_config ) ad_write(devc,10,54321); if (ad_read(devc,11)!=54321) { DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 3)\n")); - return(0); + goto out_release_region; } /* writes to ireg 10 are copied to ireg 11 */ ad_write(devc,10,12345); if (ad_read(devc,11)!=12345) { DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 4)\n")); - return(0); + goto out_release_region; } /* bit in base +1 cannot be set to 1 */ @@ -1070,15 +1076,19 @@ static int __init probe_ad1816 ( struct address_info *hw_config ) outb(0xff,devc->base+1); if (inb(devc->base+1)!=tmp) { DEBUGLOG (printk ("ad1816: Chip is not an AD1816 (Test 5)\n")); - return(0); + goto out_release_region; } DEBUGLOG (printk ("ad1816: detect() - Detected OK\n")); DEBUGLOG (printk ("ad1816: AD1816 Version: %d\n",ad_read(devc,45))); - /* detection was successful */ + /* detection was successful */ return 1; +out_release_region: + release_region(io_base, 16); + /* detection was NOT successful */ +err: return 0; } @@ -1092,10 +1102,7 @@ static void __init attach_ad1816 (struct address_info *hw_config) int my_dev; char dev_name[100]; ad1816_info *devc = &dev_info[nr_ad1816_devs]; - - /* allocate i/o ports */ - request_region (hw_config->io_base, 16, "AD1816 Sound"); devc->base = hw_config->io_base; /* disable all interrupts */ @@ -1105,35 +1112,29 @@ static void __init attach_ad1816 (struct address_info *hw_config) outb (0, devc->base+1); /* allocate irq */ - if (hw_config->irq < 0 || hw_config->irq > 15) { - release_region(hw_config->io_base, 16); - return; - } + if (hw_config->irq < 0 || hw_config->irq > 15) + goto out_release_region; if (request_irq(hw_config->irq, ad1816_interrupt,0, - "SoundPort", - hw_config->osp) < 0) { - printk ("ad1816: IRQ in use\n"); - release_region(hw_config->io_base, 16); - return; + "SoundPort", hw_config->osp) < 0) { + printk(KERN_WARNING "ad1816: IRQ in use\n"); + goto out_release_region; } devc->irq=hw_config->irq; /* DMA stuff */ if (sound_alloc_dma (hw_config->dma, "Sound System")) { - printk ("ad1816: Can't allocate DMA%d\n", hw_config->dma); - free_irq(hw_config->irq,hw_config->osp); - release_region(hw_config->io_base, 16); - return; + printk(KERN_WARNING "ad1816: Can't allocate DMA%d\n", + hw_config->dma); + goto out_free_irq; } devc->dma_playback=hw_config->dma; if ( hw_config->dma2 != -1 && hw_config->dma2 != hw_config->dma) { - if (sound_alloc_dma (hw_config->dma2, "Sound System (capture)")) { - printk ("ad1816: Can't allocate DMA%d\n", hw_config->dma2); - sound_free_dma(hw_config->dma); - free_irq(hw_config->irq,hw_config->osp); - release_region(hw_config->io_base, 16); - return; + if (sound_alloc_dma(hw_config->dma2, + "Sound System (capture)")) { + printk(KERN_WARNING "ad1816: Can't allocate DMA%d\n", + hw_config->dma2); + goto out_free_dma; } devc->dma_capture=hw_config->dma2; devc->audio_mode=DMA_AUTOMODE|DMA_DUPLEX; @@ -1157,15 +1158,8 @@ static void __init attach_ad1816 (struct address_info *hw_config) devc, hw_config->dma, hw_config->dma2)) < 0) { - printk ("ad1816: Can't install sound driver\n"); - if (devc->dma_capture>=0) { - sound_free_dma(hw_config->dma2); - } - sound_free_dma(hw_config->dma); - free_irq(hw_config->irq,hw_config->osp); - release_region(hw_config->io_base, 16); - return; - + printk(KERN_WARNING "ad1816: Can't install sound driver\n"); + goto out_free_dma_2; } /* fill rest of structure with reasonable default values */ @@ -1211,6 +1205,17 @@ static void __init attach_ad1816 (struct address_info *hw_config) devc)) >= 0) { audio_devs[my_dev]->min_fragment = 0; } +out: return; +out_free_dma_2: + if (devc->dma_capture >= 0) + sound_free_dma(hw_config->dma2); +out_free_dma: + sound_free_dma(hw_config->dma); +out_free_irq: + free_irq(hw_config->irq,hw_config->osp); +out_release_region: + release_region(hw_config->io_base, 16); + goto out; } static void __exit unload_card(ad1816_info *devc) @@ -1242,9 +1247,8 @@ static void __exit unload_card(ad1816_info *devc) DEBUGLOG (printk("ad1816: Unloading card at base=%x was successful\n",devc->base)); - } else { - printk ("ad1816: no device/card specified\n"); - } + } else + printk(KERN_WARNING "ad1816: no device/card specified\n"); } static struct address_info cfg; diff --git a/drivers/usb/hid-input.c b/drivers/usb/hid-input.c index 1747095edc56..2533c3e031d8 100644 --- a/drivers/usb/hid-input.c +++ b/drivers/usb/hid-input.c @@ -131,7 +131,8 @@ static void hidinput_configure_usage(struct hid_device *device, struct hid_field if (usage->hid == HID_GD_HATSWITCH) { usage->code = ABS_HAT0X; - usage->hat = 1 + (field->logical_maximum == 4); + usage->hat_min = field->logical_minimum; + usage->hat_max = field->logical_maximum; } break; @@ -285,7 +286,7 @@ static void hidinput_configure_usage(struct hid_device *device, struct hid_field input->absflat[usage->code] = (b - a) >> 4; } - if (usage->hat) { + if (usage->hat_min != usage->hat_max) { int i; for (i = usage->code; i < usage->code + 2 && i <= max; i++) { input->absmax[i] = 1; @@ -302,9 +303,9 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct struct input_dev *input = &hid->input; int *quirks = &hid->quirks; - if (usage->hat) { - if (usage->hat == 2) value = value * 2; - if (value > 8) value = 8; + if (usage->hat_min != usage->hat_max) { + value = (value - usage->hat_min) * 8 / (usage->hat_max - usage->hat_min + 1) + 1; + if (value < 0 || value > 8) value = 0; input_event(input, usage->type, usage->code , hid_hat_to_axis[value].x); input_event(input, usage->type, usage->code + 1, hid_hat_to_axis[value].y); return; diff --git a/drivers/usb/hid.h b/drivers/usb/hid.h index 45d69a0fbc61..cfc21092600d 100644 --- a/drivers/usb/hid.h +++ b/drivers/usb/hid.h @@ -234,7 +234,8 @@ struct hid_usage { unsigned hid; /* hid usage code */ __u16 code; /* input driver code */ __u8 type; /* input driver type */ - __u8 hat; /* hat switch fun */ + __s8 hat_min; /* hat switch fun */ + __s8 hat_max; /* ditto */ }; struct hid_field { diff --git a/drivers/usb/storage/freecom.c b/drivers/usb/storage/freecom.c index 2ab9a0917135..6c94e167e145 100644 --- a/drivers/usb/storage/freecom.c +++ b/drivers/usb/storage/freecom.c @@ -1,6 +1,6 @@ /* Driver for Freecom USB/IDE adaptor * - * $Id: freecom.c,v 1.18 2001/11/04 13:01:17 mdharm Exp $ + * $Id: freecom.c,v 1.19 2001/11/11 05:42:34 mdharm Exp $ * * Freecom v0.1: * @@ -81,27 +81,28 @@ struct freecom_status { /* Freecom stuffs the interrupt status in the INDEX_STAT bit of the ide * register. */ -#define FCM_INT_STATUS INDEX_STAT +#define FCM_INT_STATUS 0x02 /* INDEX_STAT */ +#define FCM_STATUS_BUSY 0x80 /* These are the packet types. The low bit indicates that this command * should wait for an interrupt. */ -#define FCM_PACKET_ATAPI 0x21 -#define FCM_PACKET_STATUS 0x20 +#define FCM_PACKET_ATAPI 0x21 +#define FCM_PACKET_STATUS 0x20 /* Receive data from the IDE interface. The ATAPI packet has already * waited, so the data should be immediately available. */ -#define FCM_PACKET_INPUT 0x81 +#define FCM_PACKET_INPUT 0x81 /* Send data to the IDE interface. */ -#define FCM_PACKET_OUTPUT 0x01 +#define FCM_PACKET_OUTPUT 0x01 /* Write a value to an ide register. Or the ide register to write after * munging the address a bit. */ -#define FCM_PACKET_IDE_WRITE 0x40 -#define FCM_PACKET_IDE_READ 0xC0 +#define FCM_PACKET_IDE_WRITE 0x40 +#define FCM_PACKET_IDE_READ 0xC0 /* All packets (except for status) are 64 bytes long. */ -#define FCM_PACKET_LENGTH 64 +#define FCM_PACKET_LENGTH 64 /* * Transfer an entire SCSI command's worth of data payload over the bulk @@ -205,6 +206,7 @@ freecom_ide_write (struct us_data *us, int reg, int value) return USB_STOR_TRANSPORT_GOOD; } +#endif /* Read a value from an ide register. */ static int @@ -229,6 +231,8 @@ freecom_ide_read (struct us_data *us, int reg, int *value) else reg = 0x0e; + US_DEBUGP("IDE in request for register 0x%02x\n", reg); + idein->Type = FCM_PACKET_IDE_READ | reg; memset (idein->Pad, 0, sizeof (idein->Pad)); @@ -253,17 +257,17 @@ freecom_ide_read (struct us_data *us, int reg, int *value) else return USB_STOR_TRANSPORT_ERROR; } + US_DEBUGP("IDE in partial is %d\n", partial); if (desired_length == 1) *value = buffer[0]; else *value = le16_to_cpu (*(__u16 *) buffer); - US_DEBUGP("IDE in 0x%02x -> 0x%02x\n", reg, *value); + US_DEBUGP("IDE in 0x%02x -> 0x%02x\n", reg, *value); return USB_STOR_TRANSPORT_GOOD; } -#endif static int freecom_readdata (Scsi_Cmnd *srb, struct us_data *us, @@ -373,13 +377,6 @@ int freecom_transport(Scsi_Cmnd *srb, struct us_data *us) opipe = usb_sndbulkpipe (us->pusb_dev, us->ep_out); ipipe = usb_rcvbulkpipe (us->pusb_dev, us->ep_in); -#if 0 - /* Yuck, let's see if this helps us. Artificially increase the - * length on this. */ - if (srb->cmnd[0] == 0x03 && srb->cmnd[4] == 0x12) - srb->cmnd[4] = 0x0E; -#endif - /* The ATAPI Command always goes out first. */ fcb->Type = FCM_PACKET_ATAPI | 0x00; fcb->Timeout = 0; @@ -421,17 +418,25 @@ int freecom_transport(Scsi_Cmnd *srb, struct us_data *us) US_DEBUG(pdump ((void *) fst, partial)); - /* while we haven't received the IRQ */ - while (!(fst->Status & 0x2)) { - /* send a command to re-fetch the status */ - US_DEBUGP("Re-attempting to get status...\n"); + /* The firmware will time-out commands after 20 seconds. Some commands + * can legitimately take longer than this, so we use a different + * command that only waits for the interrupt and then sends status, + * without having to send a new ATAPI command to the device. + * + * NOTE: There is some indication that a data transfer after a timeout + * may not work, but that is a condition that should never happen. + */ + while (fst->Status & FCM_STATUS_BUSY) { + US_DEBUGP("20 second USB/ATAPI bridge TIMEOUT occured!\n"); + US_DEBUGP("fst->Status is %x\n", fst->Status); + /* Get the status again */ fcb->Type = FCM_PACKET_STATUS; fcb->Timeout = 0; - memset (fcb->Atapi, 0, 12); + memset (fcb->Atapi, 0, sizeof(fcb->Filler)); memset (fcb->Filler, 0, sizeof (fcb->Filler)); - /* Send it out. */ + /* Send it out. */ result = usb_stor_bulk_msg (us, fcb, opipe, FCM_PACKET_LENGTH, &partial); @@ -452,10 +457,12 @@ int freecom_transport(Scsi_Cmnd *srb, struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } - /* actually get the status info */ - result = usb_stor_bulk_msg (us, fst, ipipe, + /* get the data */ + result = usb_stor_bulk_msg (us, fst, ipipe, FCM_PACKET_LENGTH, &partial); + US_DEBUGP("bar Status result %d %d\n", result, partial); + /* -ENOENT -- we canceled this transfer */ if (result == -ENOENT) { US_DEBUGP("freecom_transport(): transfer aborted\n"); diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 75d7a8ff3064..88e654ec7df4 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -1,7 +1,7 @@ /* Driver for USB Mass Storage compliant devices * SCSI layer glue code * - * $Id: scsiglue.c,v 1.23 2001/10/15 07:02:32 mdharm Exp $ + * $Id: scsiglue.c,v 1.24 2001/11/11 03:33:58 mdharm Exp $ * * Current development and maintenance by: * (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) @@ -346,7 +346,7 @@ static int proc_info (char *buffer, char **start, off_t offset, int length, /* show the GUID of the device */ SPRINTF(" GUID: " GUID_FORMAT "\n", GUID_ARGS(us->guid)); - SPRINTF(" Attached: %d\n", us->pusb_dev != NULL); + SPRINTF(" Attached: %s\n", us->pusb_dev ? "Yes" : "No"); /* * Calculate start of next buffer, and return value. diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index dc6a5ca72766..72447a713e51 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -1,6 +1,6 @@ /* Driver for USB Mass Storage compliant devices * - * $Id: usb.c,v 1.68 2001/10/15 07:02:33 mdharm Exp $ + * $Id: usb.c,v 1.69 2001/11/11 03:33:03 mdharm Exp $ * * Current development and maintenance by: * (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) @@ -262,16 +262,28 @@ void fill_inquiry_response(struct us_data *us, unsigned char *data, if (data_len<36) // You lose. return; - memcpy(data+8, us->unusual_dev->vendorName, - strlen(us->unusual_dev->vendorName) > 8 ? 8 : - strlen(us->unusual_dev->vendorName)); - memcpy(data+16, us->unusual_dev->productName, - strlen(us->unusual_dev->productName) > 16 ? 16 : - strlen(us->unusual_dev->productName)); - data[32] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice>>12) & 0x0F); - data[33] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice>>8) & 0x0F); - data[34] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice>>4) & 0x0F); - data[35] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice) & 0x0F); + if(data[0]&0x20) { /* USB device currently not connected. Return + peripheral qualifier 001b ("...however, the + physical device is not currently connected + to this logical unit") and leave vendor and + product identification empty. ("If the target + does store some of the INQUIRY data on the + device, it may return zeros or ASCII spaces + (20h) in those fields until the data is + available from the device."). */ + memset(data+8,0,28); + } else { + memcpy(data+8, us->unusual_dev->vendorName, + strlen(us->unusual_dev->vendorName) > 8 ? 8 : + strlen(us->unusual_dev->vendorName)); + memcpy(data+16, us->unusual_dev->productName, + strlen(us->unusual_dev->productName) > 16 ? 16 : + strlen(us->unusual_dev->productName)); + data[32] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice>>12) & 0x0F); + data[33] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice>>8) & 0x0F); + data[34] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice>>4) & 0x0F); + data[35] = 0x30 + ((us->pusb_dev->descriptor.bcdDevice) & 0x0F); + } if (us->srb->use_sg) { sg = (struct scatterlist *)us->srb->request_buffer; @@ -389,24 +401,6 @@ static int usb_stor_control_thread(void * __us) break; } - /* Handle those devices which need us to fake their - * inquiry data */ - if ((us->srb->cmnd[0] == INQUIRY) && - (us->flags & US_FL_FIX_INQUIRY)) { - unsigned char data_ptr[36] = { - 0x00, 0x80, 0x02, 0x02, - 0x1F, 0x00, 0x00, 0x00}; - - US_DEBUGP("Faking INQUIRY command\n"); - fill_inquiry_response(us, data_ptr, 36); - us->srb->result = GOOD << 1; - - set_current_state(TASK_INTERRUPTIBLE); - us->srb->scsi_done(us->srb); - us->srb = NULL; - break; - } - /* lock the device pointers */ down(&(us->dev_semaphore)); @@ -422,6 +416,13 @@ static int usb_stor_control_thread(void * __us) usb_stor_sense_notready, sizeof(usb_stor_sense_notready)); us->srb->result = GOOD << 1; + } else if(us->srb->cmnd[0] == INQUIRY) { + unsigned char data_ptr[36] = { + 0x20, 0x80, 0x02, 0x02, + 0x1F, 0x00, 0x00, 0x00}; + US_DEBUGP("Faking INQUIRY command for disconnected device\n"); + fill_inquiry_response(us, data_ptr, 36); + us->srb->result = GOOD << 1; } else { memcpy(us->srb->sense_buffer, usb_stor_sense_notready, @@ -429,9 +430,23 @@ static int usb_stor_control_thread(void * __us) us->srb->result = CHECK_CONDITION << 1; } } else { /* !us->pusb_dev */ - /* we've got a command, let's do it! */ - US_DEBUG(usb_stor_show_command(us->srb)); - us->proto_handler(us->srb, us); + + /* Handle those devices which need us to fake + * their inquiry data */ + if ((us->srb->cmnd[0] == INQUIRY) && + (us->flags & US_FL_FIX_INQUIRY)) { + unsigned char data_ptr[36] = { + 0x00, 0x80, 0x02, 0x02, + 0x1F, 0x00, 0x00, 0x00}; + + US_DEBUGP("Faking INQUIRY command\n"); + fill_inquiry_response(us, data_ptr, 36); + us->srb->result = GOOD << 1; + } else { + /* we've got a command, let's do it! */ + US_DEBUG(usb_stor_show_command(us->srb)); + us->proto_handler(us->srb, us); + } } /* unlock the device pointers */ diff --git a/drivers/usb/usbnet.c b/drivers/usb/usbnet.c index ef92aa979f84..724defed3d82 100644 --- a/drivers/usb/usbnet.c +++ b/drivers/usb/usbnet.c @@ -19,6 +19,8 @@ * - "Linux Devices" (like iPaq and similar SA-1100 based PDAs) * - NetChip 1080 (interoperates with NetChip Win32 drivers) * - Prolific PL-2301/2302 (replaces "plusb" driver) + * - GeneSys GL620USB-A + * * USB devices can implement their side of this protocol at the cost * of two bulk endpoints; it's not restricted to "cable" applications. @@ -76,6 +78,9 @@ * 17-oct-2001 Handle "Advance USBNET" product, like Belkin/eTEK devices, * from Ioannis Mavroukakis <i.mavroukakis@btinternet.com>; * rx unlinks somehow weren't async; minor cleanup. + * 25-oct-2001 Merged GeneSys driver, using code from + * Jiun-Jie Huang <huangjj@genesyslogic.com.tw> + * by Stanislav Brabec <utx@penguin.cz> * *-------------------------------------------------------------------------*/ @@ -104,6 +109,7 @@ #define CONFIG_USB_LINUXDEV #define CONFIG_USB_NET1080 #define CONFIG_USB_PL2301 +#define CONFIG_USB_GENELINK /*-------------------------------------------------------------------------*/ @@ -165,6 +171,9 @@ struct usbnet { struct sk_buff_head done; struct tasklet_struct bh; struct tq_struct ctrl_task; + + // various data structure may be needed + void *priv_data; }; // device-specific info used by the driver @@ -173,6 +182,7 @@ struct driver_info { int flags; #define FLAG_FRAMING_NC 0x0001 /* guard against device dropouts */ +#define FLAG_GENELINK 0x0002 /* genelink flag */ #define FLAG_NO_SETINT 0x0010 /* device can't set_interface() */ /* reset device ... can sleep */ @@ -181,6 +191,12 @@ struct driver_info { /* see if peer is connected ... can sleep */ int (*check_connect)(struct usbnet *); + /* allocate and initialize the private resources per device */ + int (*initialize_private)(struct usbnet *); + + /* free the private resources per device */ + int (*release_private)(struct usbnet *); + // FIXME -- also an interrupt mechanism /* framework currently "knows" bulk EPs talk packets */ @@ -725,6 +741,199 @@ static const struct driver_info prolific_info = { +#ifdef CONFIG_USB_GENELINK + +/*------------------------------------------------------------------------- + * + * GeneSys GL620USB-A (www.genesyslogic.com.tw) + * + *-------------------------------------------------------------------------*/ + +// control msg write command +#define GENELINK_CONNECT_WRITE 0xF0 +// interrupt pipe index +#define GENELINK_INTERRUPT_PIPE 0x03 +// interrupt read buffer size +#define INTERRUPT_BUFSIZE 0x08 +// interrupt pipe interval value +#define GENELINK_INTERRUPT_INTERVAL 0x10 +// max transmit packet number per transmit +#define GL_MAX_TRANSMIT_PACKETS 32 +// max packet length +#define GL_MAX_PACKET_LEN 1514 +// max receive buffer size +#define GL_RCV_BUF_SIZE (((GL_MAX_PACKET_LEN + 4) * GL_MAX_TRANSMIT_PACKETS) + 4) + +struct gl_priv +{ + struct urb *irq_urb; + char irq_buf[INTERRUPT_BUFSIZE]; +}; + +struct gl_packet +{ + u32 packet_length; + char packet_data[1]; +}; + +struct gl_header +{ + u32 packet_count; + + struct gl_packet packets; +}; + +static inline int gl_control_write(struct usbnet *dev, u8 request, u16 value) +{ + int retval; + + retval = usb_control_msg (dev->udev, + usb_sndctrlpipe (dev->udev, 0), + request, + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, + value, + 0, // index + 0, // data buffer + 0, // size + CONTROL_TIMEOUT_JIFFIES); + + return retval; +} + +static void gl_interrupt_complete (struct urb *urb) +{ + int status = urb->status; + + if (status) + dbg("gl_interrupt_complete fail - %X\n", status); + else + dbg("gl_interrupt_complete success...\n"); +} + +static inline int gl_interrupt_read(struct usbnet *dev) +{ + struct gl_priv *priv = dev->priv_data; + int retval; + + // issue usb interrupt read + if (priv && priv->irq_urb) { + // submit urb + if ((retval = usb_submit_urb (priv->irq_urb)) != 0) + dbg("gl_interrupt_read: submit interrupt read urb fail - %X...\n", retval); + else + dbg("gl_interrupt_read: submit interrupt read urb success...\n"); + } + + return 0; +} + +// check whether another side is connected +static int genelink_check_connect (struct usbnet *dev) +{ + dbg ("%s: assuming peer is connected", dev->net.name); + return 0; + + /* + // FIXME Uncomment this code after genelink_check_connect + // control hanshaking will be implemented + + int retval; + + dbg("genelink_check_connect...\n"); + + // issue a usb control write command to detect whether another side is connected + if ((retval = gl_control_write(dev, GENELINK_CONNECT_WRITE, 0)) != 0) { + dbg ("%s: genelink_check_connect control write fail - %X\n", dev->net.name, retval); + return retval; + } else { + dbg("%s: genelink_check_conntect control write success\n",dev->net.name); + + // issue a usb interrupt read command to ack another side + + if ((retval = gl_interrupt_read(dev)) != 0) { + dbg("%s: genelink_check_connect interrupt read fail - %X\n", dev->net.name, retval); + return retval; + } else { + dbg("%s: genelink_check_connect interrupt read success\n", dev->net.name); + } + + } + */ + + return 0; +} + +// allocate and initialize the private data for genelink +static int genelink_init_priv(struct usbnet *dev) +{ + struct gl_priv *priv; + + // allocate the private data structure + if ((priv = kmalloc (sizeof *priv, GFP_KERNEL)) == 0) { + dbg("%s: cannot allocate private data per device", dev->net.name); + return -ENOMEM; + } + + // allocate irq urb + if ((priv->irq_urb = usb_alloc_urb(0)) == 0) { + dbg("%s: cannot allocate private irq urb per device", dev->net.name); + kfree(priv); + return -ENOMEM; + } + + // fill irq urb + FILL_INT_URB(priv->irq_urb, dev->udev, usb_rcvintpipe(dev->udev, GENELINK_INTERRUPT_PIPE), + priv->irq_buf, INTERRUPT_BUFSIZE, gl_interrupt_complete, 0, GENELINK_INTERRUPT_INTERVAL); + + // set private data pointer + dev->priv_data = priv; + + return 0; +} + +// release the private data +static int genelink_release_priv(struct usbnet *dev) +{ + struct gl_priv *priv = dev->priv_data; + + if (!priv) + return 0; + + // cancel irq urb first + usb_unlink_urb(priv->irq_urb); + + // free irq urb + usb_free_urb(priv->irq_urb); + + // free the private data structure + kfree(priv); + + return 0; +} + +// reset the device status +static int genelink_reset (struct usbnet *dev) +{ + // we don't need to reset, just return 0 + return 0; +} + +static const struct driver_info genelink_info = { + description: "Genesys GeneLink", + flags: FLAG_GENELINK | FLAG_NO_SETINT, + reset: genelink_reset, + check_connect: genelink_check_connect, + initialize_private: genelink_init_priv, + release_private: genelink_release_priv, + + in: 1, out: 2, // direction distinguishes these + epsize: 64, +}; + +#endif /* CONFIG_USB_GENELINK */ + + + /*------------------------------------------------------------------------- * * Network Device Driver (peer link to "Host Device", from USB host) @@ -785,6 +994,11 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, int flags) unsigned long lockflags; size_t size; +#ifdef CONFIG_USB_GENELINK + if (dev->driver_info->flags & FLAG_GENELINK) + size = GL_RCV_BUF_SIZE; + else +#endif if (dev->driver_info->flags & FLAG_FRAMING_NC) size = FRAMED_SIZE (dev->net.mtu); else @@ -908,9 +1122,114 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb) // the extra byte we may have appended } +#ifdef CONFIG_USB_GENELINK + if (dev->driver_info->flags & FLAG_GENELINK) { + struct gl_header *header; + struct gl_packet *current_packet; + struct sk_buff *gl_skb; + int status; + u32 size; + + header = (struct gl_header *)skb->data; + + // get the packet count of the received skb + le32_to_cpus(&header->packet_count); + +// dbg("receive packet count = %d", header->packet_count); + + if ((header->packet_count > GL_MAX_TRANSMIT_PACKETS) || + (header->packet_count < 0)) { + dbg("genelink: illegal received packet count %d", header->packet_count); + goto error; + } + + // set the current packet pointer to the first packet + current_packet = &(header->packets); + + // decrement the length for the packet count size 4 bytes + skb_pull(skb, 4); + + while (header->packet_count > 1) { + // get the packet length + size = current_packet->packet_length; + + // this may be a broken packet + if (size > GL_MAX_PACKET_LEN) { + dbg("genelink: illegal received packet length %d, maybe a broken packet", size); + goto error; + } + + // allocate the skb for the individual packet + gl_skb = alloc_skb (size, in_interrupt () ? GFP_ATOMIC : GFP_KERNEL); + + if (gl_skb == 0) + goto error; + + // copy the packet data to the new skb + memcpy(gl_skb->data,current_packet->packet_data,size); + + // set skb data size + gl_skb->len = size; +/* + dbg("rx_process one gl_packet, size = %d...", size); + + dbg("%02X %02X %02X %02X %02X %02X", + (u8)gl_skb->data[0],(u8)gl_skb->data[1],(u8)gl_skb->data[2], + (u8)gl_skb->data[3],(u8)gl_skb->data[4],(u8)gl_skb->data[5]); + dbg("%02X %02X %02X %02X %02X %02X\n", + (u8)gl_skb->data[6],(u8)gl_skb->data[7],(u8)gl_skb->data[8], + (u8)gl_skb->data[9],(u8)gl_skb->data[10],(u8)gl_skb->data[11]); +*/ + gl_skb->dev = &dev->net; + + // determine the packet's protocol ID + gl_skb->protocol = eth_type_trans(gl_skb, &dev->net); + + // update the status + dev->stats.rx_packets++; + dev->stats.rx_bytes += size; + + // notify os of the received packet + status = netif_rx (gl_skb); + +// dev_kfree_skb (gl_skb); // just for debug purpose, delete this line for normal operation + + // advance to the next packet + current_packet = (struct gl_packet *)(current_packet->packet_data + size); + + header->packet_count --; + + // shift the data pointer to the next gl_packet + skb_pull(skb, size + 4); + } // while (header->packet_count > 1) + + // skip the packet length field 4 bytes + skb_pull(skb, 4); + } +#endif + if (skb->len) { int status; +#ifdef CONFIG_USB_GENELINK +/* + dbg("rx_process one packet, size = %d", skb->len); + + dbg("%02X %02X %02X %02X %02X %02X", + (u8)skb->data[0],(u8)skb->data[1],(u8)skb->data[2], + (u8)skb->data[3],(u8)skb->data[4],(u8)skb->data[5]); + dbg("%02X %02X %02X %02X %02X %02X\n", + (u8)skb->data[6],(u8)skb->data[7],(u8)skb->data[8], + (u8)skb->data[9],(u8)skb->data[10],(u8)skb->data[11]); +*/ + + if ((dev->driver_info->flags & FLAG_GENELINK) && + (skb->len > GL_MAX_PACKET_LEN)) { + dbg("genelink: illegal received packet length %d, maybe a broken packet", skb->len); + goto error; + } +#endif + // FIXME: eth_copy_and_csum "small" packets to new SKB (small < ~200 bytes) ? skb->dev = &dev->net; @@ -1063,6 +1382,9 @@ static int usbnet_stop (struct net_device *net) dev->wait = 0; remove_wait_queue (&unlink_wakeup, &wait); + if (dev->driver_info->release_private) + dev->driver_info->release_private(dev); + mutex_unlock (&dev->mutex); return 0; } @@ -1090,6 +1412,14 @@ static int usbnet_open (struct net_device *net) goto done; } + // initialize the private resources + if (info->initialize_private) { + if ((retval = info->initialize_private(dev)) < 0) { + dbg("%s: open initialize private fail", dev->net.name); + goto done; + } + } + // insist peer be connected if (info->check_connect && (retval = info->check_connect (dev)) < 0) { devdbg (dev, "can't open; %d", retval); @@ -1196,6 +1526,44 @@ static inline struct sk_buff *fixup_skb (struct sk_buff *skb, int flags) /*-------------------------------------------------------------------------*/ +#ifdef CONFIG_USB_GENELINK +static struct sk_buff *gl_build_skb (struct sk_buff *skb) +{ + struct sk_buff *skb2; + int padlen; + + int headroom = skb_headroom (skb); + int tailroom = skb_tailroom (skb); + +// dbg("headroom = %d, tailroom = %d", headroom, tailroom); + + padlen = ((skb->len + (4 + 4*1)) % 64) ? 0 : 1; + + if ((!skb_cloned (skb)) && ((headroom + tailroom) >= (padlen + (4 + 4*1)))) { + if ((headroom < (4 + 4*1)) || (tailroom < padlen)) { + skb->data = memmove (skb->head + (4 + 4*1), + skb->data, skb->len); + skb->tail = skb->data + skb->len; + } + skb2 = skb; + } else { + skb2 = skb_copy_expand (skb, (4 + 4*1) , padlen, in_interrupt () ? GFP_ATOMIC : GFP_KERNEL); + + if (!skb2) { + dbg("genelink: skb_copy_expand fail"); + return 0; + } + + // free the original skb + dev_kfree_skb_any (skb); + } + + return skb2; +} +#endif + +/*-------------------------------------------------------------------------*/ + static int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) { struct usbnet *dev = (struct usbnet *) net->priv; @@ -1220,6 +1588,13 @@ static int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) skb = skb2; } +#ifdef CONFIG_USB_GENELINK + if ((info->flags & FLAG_GENELINK) && (skb = gl_build_skb(skb)) == 0) { + dbg("can't build skb for genelink transmit"); + goto drop; + } +#endif + if (!(urb = usb_alloc_urb (0))) { dbg ("no urb"); goto drop; @@ -1238,7 +1613,27 @@ static int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) if (!((skb->len + sizeof *trailer) & 0x01)) *skb_put (skb, 1) = PAD_BYTE; trailer = (struct nc_trailer *) skb_put (skb, sizeof *trailer); - } else if ((length % EP_SIZE (dev)) == 0) { + } +#ifdef CONFIG_USB_GENELINK + else if (info->flags & FLAG_GENELINK) { + u32 *packet_count, *packet_len; + + // attach the packet count to the header + packet_count = (u32 *)skb_push(skb, (4 + 4*1)); + packet_len = packet_count + 1; + + // set packet to 1 + *packet_count = 1; + + // set packet length + *packet_len = length; + + // add padding byte + if ((skb->len % EP_SIZE(dev)) == 0) + skb_put(skb, 1); + } +#endif + else if ((length % EP_SIZE (dev)) == 0) { // not all hardware behaves with USB_ZERO_PACKET, // so we add an extra one-byte packet if (skb_shared (skb)) { @@ -1516,10 +1911,6 @@ static const struct usb_device_id products [] = { }, #endif -// GeneSys GL620USB (www.genesyslogic.com.tw) -// (patch exists against an older driver version) - - #ifdef CONFIG_USB_LINUXDEV /* * for example, this can be a host side talk-to-PDA driver. @@ -1553,6 +1944,13 @@ static const struct usb_device_id products [] = { }, #endif +#ifdef CONFIG_USB_GENELINK +{ + USB_DEVICE (0x05e3, 0x0502), // GL620USB-A + driver_info: (unsigned long) &genelink_info, +}, +#endif + { }, // END }; MODULE_DEVICE_TABLE (usb, products); diff --git a/drivers/video/aty128fb.c b/drivers/video/aty128fb.c index 849cbd83b389..7168d0dff1f1 100644 --- a/drivers/video/aty128fb.c +++ b/drivers/video/aty128fb.c @@ -7,6 +7,9 @@ * Ani Joshi / Jeff Garzik * - Code cleanup * + * Andreas Hundt <andi@convergence.de> + * - FB_ACTIVATE fixes + * * Based off of Geert's atyfb.c and vfb.c. * * TODO: @@ -143,7 +146,7 @@ enum { }; /* supported Rage128 chipsets */ -static const struct aty128_chip_info aty128_pci_probe_list[] __initdata = +static struct aty128_chip_info aty128_pci_probe_list[] __initdata = { {"Rage128 RE (PCI)", PCI_DEVICE_ID_ATI_RAGE128_RE, rage_128}, {"Rage128 RF (AGP)", PCI_DEVICE_ID_ATI_RAGE128_RF, rage_128}, @@ -217,7 +220,7 @@ static char *font __initdata = NULL; static char *mode __initdata = NULL; static int nomtrr __initdata = 0; -static const char *mode_option __initdata = NULL; +static char *mode_option __initdata = NULL; #ifdef CONFIG_PPC static int default_vmode __initdata = VMODE_1024_768_60; @@ -880,7 +883,11 @@ aty128_var_to_crtc(const struct fb_var_screeninfo *var, crtc->pitch = vxres >> 3; crtc->offset = 0; - crtc->offset_cntl = 0; + + if ((var->activate & FB_ACTIVATE_MASK) == FB_ACTIVATE_NOW) + crtc->offset_cntl = 0x00010000; + else + crtc->offset_cntl = 0; crtc->vxres = vxres; crtc->vyres = vyres; @@ -1363,7 +1370,7 @@ aty128fb_set_var(struct fb_var_screeninfo *var, int con, struct fb_info *fb) aty128_encode_var(var, &par, info); - if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) + if ((var->activate & FB_ACTIVATE_MASK) == FB_ACTIVATE_TEST) return 0; oldxres = display->var.xres; @@ -2591,6 +2598,7 @@ static struct display_switch fbcon_aty128_32 = { #ifdef MODULE MODULE_AUTHOR("(c)1999-2000 Brad Douglas <brad@neruo.com>"); MODULE_DESCRIPTION("FBDev driver for ATI Rage128 / Pro cards"); +MODULE_LICENSE("GPL"); MODULE_PARM(noaccel, "i"); MODULE_PARM_DESC(noaccel, "Disable hardware acceleration (0 or 1=disabled) (default=0)"); MODULE_PARM(font, "s"); diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c index 0ff02e09926e..ff08bdc0b7bc 100644 --- a/drivers/video/vesafb.c +++ b/drivers/video/vesafb.c @@ -520,10 +520,11 @@ int __init vesafb_init(void) FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; if (!request_mem_region(video_base, video_size, "vesafb")) { - printk(KERN_ERR + printk(KERN_WARNING "vesafb: abort, cannot reserve video memory at 0x%lx\n", video_base); - return -EBUSY; + /* We cannot make this fatal. Sometimes this comes from magic + spaces our resource handlers simply don't know about */ } video_vbase = ioremap(video_base, video_size); @@ -635,7 +636,12 @@ int __init vesafb_init(void) if (mtrr) { int temp_size = video_size; - while (mtrr_add(video_base, temp_size, MTRR_TYPE_WRCOMB, 1)==-EINVAL) { + /* Find the largest power-of-two */ + while (temp_size & (temp_size - 1)) + temp_size &= (temp_size - 1); + + /* Try and find a power of two to add */ + while (temp_size && mtrr_add(video_base, temp_size, MTRR_TYPE_WRCOMB, 1)==-EINVAL) { temp_size >>= 1; } } @@ -666,3 +672,5 @@ int __init vesafb_init(void) * c-basic-offset: 8 * End: */ + +MODULE_LICENSE("GPL"); diff --git a/fs/Config.in b/fs/Config.in index 1aea0d4f8d21..7459c16cf842 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -39,9 +39,9 @@ if [ "$CONFIG_JFFS_FS" = "y" -o "$CONFIG_JFFS_FS" = "m" ] ; then int 'JFFS debugging verbosity (0 = quiet, 3 = noisy)' CONFIG_JFFS_FS_VERBOSE 0 bool 'JFFS stats available in /proc filesystem' CONFIG_JFFS_PROC_FS fi -dep_tristate 'Journalling Flash File System v2 (JFFS2) support (EXPERIMENTAL)' CONFIG_JFFS2_FS $CONFIG_EXPERIMENTAL $CONFIG_MTD -if [ "$CONFIG_JFFS2_FS" != "n" ] ; then - int 'JFFS2 debugging verbosity (0 = quiet, 3 = noisy)' CONFIG_JFFS2_FS_DEBUG 0 +dep_tristate 'Journalling Flash File System v2 (JFFS2) support' CONFIG_JFFS2_FS $CONFIG_MTD +if [ "$CONFIG_JFFS2_FS" = "y" -o "$CONFIG_JFFS2_FS" = "m" ] ; then + int 'JFFS2 debugging verbosity (0 = quiet, 2 = noisy)' CONFIG_JFFS2_FS_DEBUG 0 fi tristate 'Compressed ROM file system support' CONFIG_CRAMFS bool 'Virtual memory file system support (former shm fs)' CONFIG_TMPFS diff --git a/fs/Makefile b/fs/Makefile index 4db3a84d55ee..d97aa64dbb23 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -14,7 +14,7 @@ obj-y := open.o read_write.o devices.o file_table.o buffer.o \ super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ - filesystems.o namespace.o + filesystems.o namespace.o seq_file.o ifeq ($(CONFIG_QUOTA),y) obj-y += dquot.o diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index c42ea9f55f6a..bcc088a374c7 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -39,37 +39,27 @@ * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. * - * Return >=0 on success or a -ve error code. + * Return buffer_head of bitmap on success or NULL. */ -static int read_inode_bitmap (struct super_block * sb, - unsigned long block_group, - unsigned int bitmap_nr) +static struct buffer_head *read_inode_bitmap (struct super_block * sb, + unsigned long block_group) { - struct ext2_group_desc * gdp; - struct buffer_head * bh = NULL; - int retval = 0; + struct ext2_group_desc *desc; + struct buffer_head *bh = NULL; - gdp = ext2_get_group_desc (sb, block_group, NULL); - if (!gdp) { - retval = -EIO; + desc = ext2_get_group_desc(sb, block_group, NULL); + if (!desc) goto error_out; - } - bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_inode_bitmap), sb->s_blocksize); - if (!bh) { + + bh = bread(sb->s_dev, le32_to_cpu(desc->bg_inode_bitmap), + sb->s_blocksize); + if (!bh) ext2_error (sb, "read_inode_bitmap", "Cannot read inode bitmap - " "block_group = %lu, inode_bitmap = %lu", - block_group, (unsigned long) gdp->bg_inode_bitmap); - retval = -EIO; - } - /* - * On IO error, just leave a zero in the superblock's block pointer for - * this group. The IO will be retried next time. - */ + block_group, (unsigned long) desc->bg_inode_bitmap); error_out: - sb->u.ext2_sb.s_inode_bitmap_number[bitmap_nr] = block_group; - sb->u.ext2_sb.s_inode_bitmap[bitmap_nr] = bh; - return retval; + return bh; } /* @@ -83,79 +73,62 @@ error_out: * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, * this function reads the bitmap without maintaining a LRU cache. * - * Return the slot used to store the bitmap, or a -ve error code. + * Return the buffer_head of the bitmap or the ERR_PTR(error) */ -static int load_inode_bitmap (struct super_block * sb, - unsigned int block_group) +static struct buffer_head *load_inode_bitmap (struct super_block * sb, + unsigned int block_group) { - int i, j, retval = 0; - unsigned long inode_bitmap_number; - struct buffer_head * inode_bitmap; + int i, slot = 0; + struct ext2_sb_info *sbi = &sb->u.ext2_sb; + struct buffer_head *bh = sbi->s_inode_bitmap[0]; - if (block_group >= sb->u.ext2_sb.s_groups_count) + if (block_group >= sbi->s_groups_count) ext2_panic (sb, "load_inode_bitmap", "block_group >= groups_count - " "block_group = %d, groups_count = %lu", - block_group, sb->u.ext2_sb.s_groups_count); - if (sb->u.ext2_sb.s_loaded_inode_bitmaps > 0 && - sb->u.ext2_sb.s_inode_bitmap_number[0] == block_group && - sb->u.ext2_sb.s_inode_bitmap[0] != NULL) - return 0; - if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) { - if (sb->u.ext2_sb.s_inode_bitmap[block_group]) { - if (sb->u.ext2_sb.s_inode_bitmap_number[block_group] != block_group) - ext2_panic (sb, "load_inode_bitmap", - "block_group != inode_bitmap_number"); - else - return block_group; - } else { - retval = read_inode_bitmap (sb, block_group, - block_group); - if (retval < 0) - return retval; - return block_group; - } + block_group, sbi->s_groups_count); + + if (sbi->s_loaded_inode_bitmaps > 0 && + sbi->s_inode_bitmap_number[0] == block_group && bh) + goto found; + + if (sbi->s_groups_count <= EXT2_MAX_GROUP_LOADED) { + slot = block_group; + bh = sbi->s_inode_bitmap[slot]; + if (!bh) + goto read_it; + if (sbi->s_inode_bitmap_number[slot] == slot) + goto found; + ext2_panic (sb, "load_inode_bitmap", + "block_group != inode_bitmap_number"); } - for (i = 0; i < sb->u.ext2_sb.s_loaded_inode_bitmaps && - sb->u.ext2_sb.s_inode_bitmap_number[i] != block_group; + bh = NULL; + for (i = 0; i < sbi->s_loaded_inode_bitmaps && + sbi->s_inode_bitmap_number[i] != block_group; i++) ; - if (i < sb->u.ext2_sb.s_loaded_inode_bitmaps && - sb->u.ext2_sb.s_inode_bitmap_number[i] == block_group) { - inode_bitmap_number = sb->u.ext2_sb.s_inode_bitmap_number[i]; - inode_bitmap = sb->u.ext2_sb.s_inode_bitmap[i]; - for (j = i; j > 0; j--) { - sb->u.ext2_sb.s_inode_bitmap_number[j] = - sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; - sb->u.ext2_sb.s_inode_bitmap[j] = - sb->u.ext2_sb.s_inode_bitmap[j - 1]; - } - sb->u.ext2_sb.s_inode_bitmap_number[0] = inode_bitmap_number; - sb->u.ext2_sb.s_inode_bitmap[0] = inode_bitmap; - - /* - * There's still one special case here --- if inode_bitmap == 0 - * then our last attempt to read the bitmap failed and we have - * just ended up caching that failure. Try again to read it. - */ - if (!inode_bitmap) - retval = read_inode_bitmap (sb, block_group, 0); - - } else { - if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) - sb->u.ext2_sb.s_loaded_inode_bitmaps++; - else - brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]); - for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) { - sb->u.ext2_sb.s_inode_bitmap_number[j] = - sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; - sb->u.ext2_sb.s_inode_bitmap[j] = - sb->u.ext2_sb.s_inode_bitmap[j - 1]; - } - retval = read_inode_bitmap (sb, block_group, 0); + if (i < sbi->s_loaded_inode_bitmaps) + bh = sbi->s_inode_bitmap[i]; + else if (sbi->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) + sbi->s_loaded_inode_bitmaps++; + else + brelse (sbi->s_inode_bitmap[--i]); + + while (i--) { + sbi->s_inode_bitmap_number[i+1] = sbi->s_inode_bitmap_number[i]; + sbi->s_inode_bitmap[i+1] = sbi->s_inode_bitmap[i]; } - return retval; + +read_it: + if (!bh) + bh = read_inode_bitmap (sb, block_group); + sbi->s_inode_bitmap_number[slot] = block_group; + sbi->s_inode_bitmap[slot] = bh; + if (!bh) + return ERR_PTR(-EIO); +found: + return bh; } /* @@ -183,8 +156,7 @@ void ext2_free_inode (struct inode * inode) struct buffer_head * bh2; unsigned long block_group; unsigned long bit; - int bitmap_nr; - struct ext2_group_desc * gdp; + struct ext2_group_desc * desc; struct ext2_super_block * es; ino = inode->i_ino; @@ -215,24 +187,22 @@ void ext2_free_inode (struct inode * inode) } block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); - bitmap_nr = load_inode_bitmap (sb, block_group); - if (bitmap_nr < 0) + bh = load_inode_bitmap (sb, block_group); + if (IS_ERR(bh)) goto error_return; - bh = sb->u.ext2_sb.s_inode_bitmap[bitmap_nr]; - /* Ok, now we can actually update the inode bitmaps.. */ if (!ext2_clear_bit (bit, bh->b_data)) ext2_error (sb, "ext2_free_inode", "bit already cleared for inode %lu", ino); else { - gdp = ext2_get_group_desc (sb, block_group, &bh2); - if (gdp) { - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) + 1); + desc = ext2_get_group_desc (sb, block_group, &bh2); + if (desc) { + desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); if (is_directory) - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) - 1); + desc->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); } mark_buffer_dirty(bh2); es->s_free_inodes_count = @@ -259,23 +229,101 @@ error_return: * For other inodes, search forward from the parent directory\'s block * group to find a free inode. */ + +static int find_group_dir(struct super_block *sb, int parent_group) +{ + struct ext2_super_block * es = sb->u.ext2_sb.s_es; + int ngroups = sb->u.ext2_sb.s_groups_count; + int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups; + struct ext2_group_desc *desc, *best_desc = NULL; + struct buffer_head *bh, *best_bh = NULL; + int group, best_group = -1; + + for (group = 0; group < ngroups; group++) { + desc = ext2_get_group_desc (sb, group, &bh); + if (!desc || !desc->bg_free_inodes_count) + continue; + if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + continue; + if (!best_desc || + (le16_to_cpu(desc->bg_free_blocks_count) > + le16_to_cpu(best_desc->bg_free_blocks_count))) { + best_group = group; + best_desc = desc; + best_bh = bh; + } + } + if (!best_desc) + return -1; + best_desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1); + best_desc->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1); + mark_buffer_dirty(best_bh); + return best_group; +} + +static int find_group_other(struct super_block *sb, int parent_group) +{ + int ngroups = sb->u.ext2_sb.s_groups_count; + struct ext2_group_desc *desc; + struct buffer_head *bh; + int group, i; + + /* + * Try to place the inode in its parent directory + */ + group = parent_group; + desc = ext2_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + goto found; + + /* + * Use a quadratic hash to find a group with a + * free inode + */ + for (i = 1; i < ngroups; i <<= 1) { + group += i; + if (group >= ngroups) + group -= ngroups; + desc = ext2_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + goto found; + } + + /* + * That failed: try linear search for a free inode + */ + group = parent_group + 1; + for (i = 2; i < ngroups; i++) { + if (++group >= ngroups) + group = 0; + desc = ext2_get_group_desc (sb, group, &bh); + if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + goto found; + } + + return -1; + +found: + desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1); + mark_buffer_dirty(bh); + return group; +} + struct inode * ext2_new_inode (const struct inode * dir, int mode) { struct super_block * sb; struct buffer_head * bh; struct buffer_head * bh2; - int i, j, avefreei; + int group, i; + ino_t ino; struct inode * inode; - int bitmap_nr; - struct ext2_group_desc * gdp; - struct ext2_group_desc * tmp; + struct ext2_group_desc * desc; struct ext2_super_block * es; int err; - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) - return ERR_PTR(-EPERM); - sb = dir->i_sb; inode = new_inode(sb); if (!inode) @@ -284,138 +332,41 @@ struct inode * ext2_new_inode (const struct inode * dir, int mode) lock_super (sb); es = sb->u.ext2_sb.s_es; repeat: - gdp = NULL; i=0; - - if (S_ISDIR(mode)) { - avefreei = le32_to_cpu(es->s_free_inodes_count) / - sb->u.ext2_sb.s_groups_count; -/* I am not yet convinced that this next bit is necessary. - i = dir->u.ext2_i.i_block_group; - for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { - tmp = ext2_get_group_desc (sb, i, &bh2); - if (tmp && - (le16_to_cpu(tmp->bg_used_dirs_count) << 8) < - le16_to_cpu(tmp->bg_free_inodes_count)) { - gdp = tmp; - break; - } - else - i = ++i % sb->u.ext2_sb.s_groups_count; - } -*/ - if (!gdp) { - for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { - tmp = ext2_get_group_desc (sb, j, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count) && - le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) { - if (!gdp || - (le16_to_cpu(tmp->bg_free_blocks_count) > - le16_to_cpu(gdp->bg_free_blocks_count))) { - i = j; - gdp = tmp; - } - } - } - } - } + if (S_ISDIR(mode)) + group = find_group_dir(sb, dir->u.ext2_i.i_block_group); else - { - /* - * Try to place the inode in its parent directory - */ - i = dir->u.ext2_i.i_block_group; - tmp = ext2_get_group_desc (sb, i, &bh2); - if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) - gdp = tmp; - else - { - /* - * Use a quadratic hash to find a group with a - * free inode - */ - for (j = 1; j < sb->u.ext2_sb.s_groups_count; j <<= 1) { - i += j; - if (i >= sb->u.ext2_sb.s_groups_count) - i -= sb->u.ext2_sb.s_groups_count; - tmp = ext2_get_group_desc (sb, i, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count)) { - gdp = tmp; - break; - } - } - } - if (!gdp) { - /* - * That failed: try linear search for a free inode - */ - i = dir->u.ext2_i.i_block_group + 1; - for (j = 2; j < sb->u.ext2_sb.s_groups_count; j++) { - if (++i >= sb->u.ext2_sb.s_groups_count) - i = 0; - tmp = ext2_get_group_desc (sb, i, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count)) { - gdp = tmp; - break; - } - } - } - } + group = find_group_other(sb, dir->u.ext2_i.i_block_group); err = -ENOSPC; - if (!gdp) + if (group == -1) goto fail; err = -EIO; - bitmap_nr = load_inode_bitmap (sb, i); - if (bitmap_nr < 0) - goto fail; + bh = load_inode_bitmap (sb, group); + if (IS_ERR(bh)) + goto fail2; - bh = sb->u.ext2_sb.s_inode_bitmap[bitmap_nr]; - if ((j = ext2_find_first_zero_bit ((unsigned long *) bh->b_data, - EXT2_INODES_PER_GROUP(sb))) < - EXT2_INODES_PER_GROUP(sb)) { - if (ext2_set_bit (j, bh->b_data)) { - ext2_error (sb, "ext2_new_inode", - "bit already set for inode %d", j); - goto repeat; - } - mark_buffer_dirty(bh); - if (sb->s_flags & MS_SYNCHRONOUS) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } - } else { - if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) { - ext2_error (sb, "ext2_new_inode", - "Free inodes count corrupted in group %d", - i); - /* Is it really ENOSPC? */ - err = -ENOSPC; - if (sb->s_flags & MS_RDONLY) - goto fail; - - gdp->bg_free_inodes_count = 0; - mark_buffer_dirty(bh2); - } - goto repeat; + i = ext2_find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_INODES_PER_GROUP(sb)); + if (i >= EXT2_INODES_PER_GROUP(sb)) + goto bad_count; + ext2_set_bit (i, bh->b_data); + + mark_buffer_dirty(bh); + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); } - j += i * EXT2_INODES_PER_GROUP(sb) + 1; - if (j < EXT2_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { + + ino = group * EXT2_INODES_PER_GROUP(sb) + i + 1; + if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext2_error (sb, "ext2_new_inode", "reserved inode or inode > inodes count - " - "block_group = %d,inode=%d", i, j); + "block_group = %d,inode=%ld", group, ino); err = -EIO; - goto fail; + goto fail2; } - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); - if (S_ISDIR(mode)) - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); - mark_buffer_dirty(bh2); + es->s_free_inodes_count = cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); mark_buffer_dirty(sb->u.ext2_sb.s_sbh); @@ -431,22 +382,15 @@ repeat: inode->i_gid = current->fsgid; inode->i_mode = mode; - inode->i_ino = j; + inode->i_ino = ino; inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->u.ext2_i.i_new_inode = 1; inode->u.ext2_i.i_flags = dir->u.ext2_i.i_flags; if (S_ISLNK(mode)) - inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FL | EXT2_APPEND_FL); - inode->u.ext2_i.i_faddr = 0; - inode->u.ext2_i.i_frag_no = 0; - inode->u.ext2_i.i_frag_size = 0; - inode->u.ext2_i.i_file_acl = 0; - inode->u.ext2_i.i_dir_acl = 0; - inode->u.ext2_i.i_dtime = 0; - inode->u.ext2_i.i_prealloc_count = 0; - inode->u.ext2_i.i_block_group = i; + inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); + inode->u.ext2_i.i_block_group = group; if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) inode->i_flags |= S_SYNC; insert_inode_hash(inode); @@ -464,40 +408,59 @@ repeat: ext2_debug ("allocating inode %lu\n", inode->i_ino); return inode; +fail2: + desc = ext2_get_group_desc (sb, group, &bh2); + desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); + if (S_ISDIR(mode)) + desc->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); + mark_buffer_dirty(bh2); fail: unlock_super(sb); make_bad_inode(inode); iput(inode); return ERR_PTR(err); + +bad_count: + ext2_error (sb, "ext2_new_inode", + "Free inodes count corrupted in group %d", + group); + /* Is it really ENOSPC? */ + err = -ENOSPC; + if (sb->s_flags & MS_RDONLY) + goto fail; + + desc = ext2_get_group_desc (sb, group, &bh2); + desc->bg_free_inodes_count = 0; + mark_buffer_dirty(bh2); + goto repeat; } unsigned long ext2_count_free_inodes (struct super_block * sb) { #ifdef EXT2FS_DEBUG struct ext2_super_block * es; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext2_group_desc * gdp; + unsigned long desc_count = 0, bitmap_count = 0; int i; lock_super (sb); es = sb->u.ext2_sb.s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { - gdp = ext2_get_group_desc (sb, i, NULL); - if (!gdp) + struct ext2_group_desc *desc = ext2_get_group_desc (sb, i, NULL); + struct buffer_head *bh; + unsigned x; + + if (!desc) continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); - bitmap_nr = load_inode_bitmap (sb, i); - if (bitmap_nr < 0) + desc_count += le16_to_cpu(desc->bg_free_inodes_count); + bh = load_inode_bitmap (sb, i); + if (IS_ERR(bh)) continue; - x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr], - EXT2_INODES_PER_GROUP(sb) / 8); + x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8); printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); + i, le16_to_cpu(desc->bg_free_inodes_count), x); bitmap_count += x; } printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", @@ -513,39 +476,35 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) /* Called at mount-time, super-block is locked */ void ext2_check_inodes_bitmap (struct super_block * sb) { - struct ext2_super_block * es; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext2_group_desc * gdp; + struct ext2_super_block * es = sb->u.ext2_sb.s_es; + unsigned long desc_count = 0, bitmap_count = 0; int i; - es = sb->u.ext2_sb.s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { - gdp = ext2_get_group_desc (sb, i, NULL); - if (!gdp) + struct ext2_group_desc *desc = ext2_get_group_desc(sb, i, NULL); + struct buffer_head *bh; + unsigned x; + + if (!desc) continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); - bitmap_nr = load_inode_bitmap (sb, i); - if (bitmap_nr < 0) + desc_count += le16_to_cpu(desc->bg_free_inodes_count); + bh = load_inode_bitmap (sb, i); + if (IS_ERR(bh)) continue; - x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr], - EXT2_INODES_PER_GROUP(sb) / 8); - if (le16_to_cpu(gdp->bg_free_inodes_count) != x) + x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8); + if (le16_to_cpu(desc->bg_free_inodes_count) != x) ext2_error (sb, "ext2_check_inodes_bitmap", "Wrong free inodes count in group %d, " "stored = %d, counted = %lu", i, - le16_to_cpu(gdp->bg_free_inodes_count), x); + le16_to_cpu(desc->bg_free_inodes_count), x); bitmap_count += x; } if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count) ext2_error (sb, "ext2_check_inodes_bitmap", "Wrong free inodes count in super block, " "stored = %lu, counted = %lu", - (unsigned long) le32_to_cpu(es->s_free_inodes_count), + (unsigned long)le32_to_cpu(es->s_free_inodes_count), bitmap_count); } #endif diff --git a/fs/intermezzo/Makefile b/fs/intermezzo/Makefile new file mode 100644 index 000000000000..b43dd509c9c0 --- /dev/null +++ b/fs/intermezzo/Makefile @@ -0,0 +1,12 @@ +# +# Makefile 1.00 Peter Braam <braam@clusterfs.com> +# + +O_TARGET := intermezzo.o + +obj-y := journal_reiserfs.o cache.o journal.o presto.o vfs.o psdev.o upcall.o methods.o sysctl.o dcache.o dir.o super.o journal_ext2.o journal_ext3.o journal_xfs.o inode.o file.o journal_obdfs.o + + +obj-m := $(O_TARGET) + +include $(TOPDIR)/Rules.make diff --git a/fs/intermezzo/cache.c b/fs/intermezzo/cache.c new file mode 100644 index 000000000000..db9d6948bd36 --- /dev/null +++ b/fs/intermezzo/cache.c @@ -0,0 +1,256 @@ +/* + * + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * + * + */ + +#define __NO_VERSION__ +#include <linux/module.h> +#include <stdarg.h> +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> + +/* + This file contains the routines associated with managing a + cache of files for InterMezzo. These caches have two reqs: + - need to be found fast so they are hashed by the device, + with an attempt to have collision chains of length 1. + The methods for the cache are set up in methods. +*/ + +/* the intent of this hash is to have collision chains of length 1 */ +#define CACHES_BITS 8 +#define CACHES_SIZE (1 << CACHES_BITS) +#define CACHES_MASK CACHES_SIZE - 1 +static struct list_head presto_caches[CACHES_SIZE]; + +static inline int presto_cache_hash(kdev_t dev) +{ + return (CACHES_MASK) & ((0x000F & (dev)) + ((0x0F00 & (dev)) >>8)); +} + +inline void presto_cache_add(struct presto_cache *cache, kdev_t dev) +{ + list_add(&cache->cache_chain, + &presto_caches[presto_cache_hash(dev)]); + cache->cache_dev = dev; +} + +inline void presto_init_cache_hash(void) +{ + int i; + for ( i = 0; i < CACHES_SIZE; i++ ) { + INIT_LIST_HEAD(&presto_caches[i]); + } +} + +/* map a device to a cache */ +struct presto_cache *presto_find_cache(kdev_t dev) +{ + struct presto_cache *cache; + struct list_head *lh, *tmp; + + lh = tmp = &(presto_caches[presto_cache_hash(dev)]); + while ( (tmp = lh->next) != lh ) { + cache = list_entry(tmp, struct presto_cache, cache_chain); + if ( cache->cache_dev == dev ) { + return cache; + } + } + return NULL; +} + + +/* map an inode to a cache */ +struct presto_cache *presto_get_cache(struct inode *inode) +{ + struct presto_cache *cache; + + /* find the correct presto_cache here, based on the device */ + cache = presto_find_cache(inode->i_dev); + if ( !cache ) { + printk("WARNING: no presto cache for dev %x, ino %ld\n", + inode->i_dev, inode->i_ino); + EXIT; + return NULL; + } + return cache; +} + + +/* list cache mount points for ioctl's or /proc/fs/intermezzo/mounts */ +int presto_sprint_mounts(char *buf, int buflen, int minor) +{ + int len = 0; + int i; + struct list_head *head, *tmp; + struct presto_cache *cache; + + buf[0] = '\0'; + for (i=0 ; i<CACHES_SIZE ; i++) { + head = tmp = &presto_caches[i]; + while ( (tmp = tmp->next) != head ) { + cache = list_entry(tmp, struct presto_cache, + cache_chain); + if ( !cache->cache_root_fileset || !cache->cache_mtpt) + continue; + if ((minor != -1) && + (cache->cache_psdev->uc_minor != minor)) + continue; + if ( strlen(cache->cache_root_fileset) + + strlen(cache->cache_mtpt) + + strlen(cache->cache_psdev->uc_devname) + + 4 > buflen - len) + break; + len += sprintf(buf + len, "%s %s %s\n", + cache->cache_root_fileset, + cache->cache_mtpt, + cache->cache_psdev->uc_devname); + } + } + + buf[buflen-1] = '\0'; + CDEBUG(D_SUPER, "%s\n", buf); + return len; +} + +#ifdef CONFIG_KREINT +/* get mount point by volname + Arthur Ma, 2000.12.25 + */ +int presto_get_mount (char *buf, int buflen, char *volname) +{ + int i; + struct list_head *head, *tmp; + struct presto_cache *cache = NULL; + char *path = ""; + + buf[0] = '\0'; + for (i=0 ; i<CACHES_SIZE ; i++) { + head = tmp = &presto_caches[i]; + while ( (tmp = tmp->next) != head ) { + cache = list_entry(tmp, struct presto_cache, + cache_chain); + if ( !cache->cache_root_fileset || !cache->cache_mtpt) + continue; + if ( strcmp(cache->cache_root_fileset, volname) == 0) + break; + } + } + if (cache != NULL) + path = cache->cache_mtpt; + strncpy (buf, path, buflen); + return strlen (buf); +} +#endif + +/* another debugging routine: check fs is InterMezzo fs */ +int presto_ispresto(struct inode *inode) +{ + struct presto_cache *cache; + + if ( !inode ) + return 0; + cache = presto_get_cache(inode); + if ( !cache ) + return 0; + return (inode->i_dev == cache->cache_dev); +} + +/* setup a cache structure when we need one */ +struct presto_cache *presto_init_cache(void) +{ + struct presto_cache *cache; + + /* make a presto_cache structure for the hash */ + PRESTO_ALLOC(cache, struct presto_cache *, sizeof(struct presto_cache)); + if ( cache ) { + memset(cache, 0, sizeof(struct presto_cache)); + INIT_LIST_HEAD(&cache->cache_chain); + INIT_LIST_HEAD(&cache->cache_fset_list); + } + cache->cache_lock = SPIN_LOCK_UNLOCKED; + cache->cache_reserved = 0; + return cache; +} + + +/* free a cache structure and all of the memory it is pointing to */ +inline void presto_free_cache(struct presto_cache *cache) +{ + if (!cache) + return; + + list_del(&cache->cache_chain); + if (cache->cache_mtpt) + PRESTO_FREE(cache->cache_mtpt, strlen(cache->cache_mtpt) + 1); + if (cache->cache_type) + PRESTO_FREE(cache->cache_type, strlen(cache->cache_type) + 1); + if (cache->cache_root_fileset) + PRESTO_FREE(cache->cache_root_fileset, strlen(cache->cache_root_fileset) + 1); + + PRESTO_FREE(cache, sizeof(struct presto_cache)); +} + +int presto_reserve_space(struct presto_cache *cache, loff_t req) +{ + struct filter_fs *filter; + loff_t avail; + struct super_block *sb = cache->cache_sb; + filter = cache->cache_filter; + if (!filter ) { + EXIT; + return 0; + } + if (!filter->o_trops ) { + EXIT; + return 0; + } + if (!filter->o_trops->tr_avail ) { + EXIT; + return 0; + } + avail = filter->o_trops->tr_avail(cache, sb); + CDEBUG(D_SUPER, "ESC::%ld +++> %ld \n", (long) cache->cache_reserved, + (long) (cache->cache_reserved + req)); + CDEBUG(D_SUPER, "ESC::Avail::%ld \n", (long) avail); + spin_lock(&cache->cache_lock); + if (req + cache->cache_reserved > avail) { + spin_unlock(&cache->cache_lock); + EXIT; + return -ENOSPC; + } + cache->cache_reserved += req; + spin_unlock(&cache->cache_lock); + + return 0; +} + +void presto_release_space(struct presto_cache *cache, loff_t req) +{ + CDEBUG(D_SUPER, "ESC::%ld ---> %ld \n", (long) cache->cache_reserved, + (long) (cache->cache_reserved - req)); + spin_lock(&cache->cache_lock); + cache->cache_reserved -= req; + spin_unlock(&cache->cache_lock); +} diff --git a/fs/intermezzo/dcache.c b/fs/intermezzo/dcache.c new file mode 100644 index 000000000000..87a6d5e6c668 --- /dev/null +++ b/fs/intermezzo/dcache.c @@ -0,0 +1,136 @@ +/* + * Directory operations for InterMezzo filesystem + * Original version: (C) 1996 P. Braam and M. Callahan + * Rewritten for Linux 2.1. (C) 1997 Carnegie Mellon University + * + * Stelias encourages users to contribute improvements to + * the InterMezzo project. Contact Peter Braam (coda@stelias.com). + */ + +#define __NO_VERSION__ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/slab.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> + +#include <linux/intermezzo_fs.h> + +static int presto_dentry_revalidate(struct dentry *de, int ); +static kmem_cache_t * presto_dentry_slab; + +/* called when a cache lookup succeeds */ +static int presto_dentry_revalidate(struct dentry *de, int flag) +{ + struct inode *inode = de->d_inode; + ENTRY; + if (!inode) { + EXIT; + return 1; + } + if (is_bad_inode(inode)) { + EXIT; + return 0; + } + + if ( S_ISDIR(inode->i_mode) ) { + EXIT; + return (presto_chk(de, PRESTO_DATA) && + (presto_chk(de, PRESTO_ATTR))); + } else { + EXIT; + return presto_chk(de, PRESTO_ATTR); + } +} + +static void presto_d_release(struct dentry *dentry) +{ + if (!presto_d2d(dentry)) { + printk("VERY BAD: dentry: %p\n", dentry); + if (dentry->d_inode) + printk(" inode: %ld\n", dentry->d_inode->i_ino); + return; + } + + presto_d2d(dentry)->dd_count--; + + if (! presto_d2d(dentry)->dd_count) { + kmem_cache_free(presto_dentry_slab, presto_d2d(dentry)); + dentry->d_fsdata = NULL; + } +} + +struct dentry_operations presto_dentry_ops = +{ + d_revalidate: presto_dentry_revalidate, + d_release: presto_d_release +}; + + +// XXX THIS DEPENDS ON THE KERNEL LOCK! + +void presto_set_dd(struct dentry * dentry) +{ + ENTRY; + if (dentry->d_fsdata) { + printk("VERY BAD: dentry: %p\n", dentry); + if (dentry->d_inode) + printk(" inode: %ld\n", dentry->d_inode->i_ino); + return; + } + + if (! dentry->d_inode) { + dentry->d_fsdata = kmem_cache_alloc(presto_dentry_slab, + SLAB_KERNEL); + memset(dentry->d_fsdata, 0, sizeof(struct presto_dentry_data)); + presto_d2d(dentry)->dd_count = 1; + EXIT; + return; + } + + /* If there's already a dentry for this inode, share the data */ + if (dentry->d_alias.next != &dentry->d_inode->i_dentry || + dentry->d_alias.prev != &dentry->d_inode->i_dentry) { + struct dentry *de; + + if (dentry->d_alias.next != &dentry->d_inode->i_dentry) + de = list_entry(dentry->d_alias.next, struct dentry, + d_alias); + else + de = list_entry(dentry->d_alias.prev, struct dentry, + d_alias); + + dentry->d_fsdata = de->d_fsdata; + presto_d2d(dentry)->dd_count++; + EXIT; + return; + } + + dentry->d_fsdata = kmem_cache_alloc(presto_dentry_slab, SLAB_KERNEL); + memset(dentry->d_fsdata, 0, sizeof(struct presto_dentry_data)); + presto_d2d(dentry)->dd_count = 1; + EXIT; + return; +} + +void presto_init_ddata_cache(void) +{ + ENTRY; + presto_dentry_slab = + kmem_cache_create("presto_cache", + sizeof(struct presto_dentry_data), 0, + SLAB_HWCACHE_ALIGN|SLAB_POISON, NULL, + NULL); + EXIT; +} + +void presto_cleanup_ddata_cache(void) +{ + kmem_cache_destroy(presto_dentry_slab); +} diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c new file mode 100644 index 000000000000..8df4a62623be --- /dev/null +++ b/fs/intermezzo/dir.c @@ -0,0 +1,884 @@ +/* + * + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Tacitus Systems + * Copyright (C) 2000 Peter J. Braam + * + */ + + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <linux/smp_lock.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> + +static inline void presto_relock_sem(struct inode *dir) +{ + /* the lock from sys_mkdir / lookup_create */ + down(&dir->i_sem); + /* the rest is done by the do_{create,mkdir, ...} */ +} + +static inline void presto_relock_other(struct inode *dir) +{ + /* vfs_mkdir locks */ + down(&dir->i_zombie); + lock_kernel(); +} + +static inline void presto_fulllock(struct inode *dir) +{ + /* the lock from sys_mkdir / lookup_create */ + down(&dir->i_sem); + /* vfs_mkdir locks */ + down(&dir->i_zombie); + lock_kernel(); +} + +static inline void presto_unlock(struct inode *dir) +{ + /* vfs_mkdir locks */ + unlock_kernel(); + up(&dir->i_zombie); + /* the lock from sys_mkdir / lookup_create */ + up(&dir->i_sem); +} + + +/* + * these are initialized in super.c + */ +extern int presto_permission(struct inode *inode, int mask); +int presto_ilookup_uid = 0; + +extern int presto_prep(struct dentry *, struct presto_cache **, + struct presto_file_set **); + +static int dentry2id(struct dentry *dentry, ino_t *id, unsigned int *generation) +{ + char *tmpname; + char *next; + int error = 0; + + ENTRY; + if (dentry->d_name.len > EXT2_NAME_LEN) { + EXIT; + return -ENAMETOOLONG; + } + + /* prefix is 7 characters: '...ino:' */ + if ( dentry->d_name.len < 7 || + memcmp(dentry->d_name.name, PRESTO_ILOOKUP_MAGIC, 7) != 0 ) { + EXIT; + return 1; + } + + PRESTO_ALLOC(tmpname, char *, dentry->d_name.len - 7 + 1); + if ( !tmpname ) { + EXIT; + return -ENOMEM; + } + + memcpy(tmpname, dentry->d_name.name + 7, dentry->d_name.len - 7); + *(tmpname + dentry->d_name.len) = '\0'; + + /* name is of the form <inode number>:<generation> */ + *id = simple_strtoul(tmpname, &next, 0); + if ( *next == PRESTO_ILOOKUP_SEP ) { + *generation = simple_strtoul(next + 1, 0, 0); + CDEBUG(D_INODE, "INO to find = %s\n", tmpname); + CDEBUG(D_INODE, "Id = %lx (%lu), generation %x (%d)\n", + *id, *id, *generation, *generation); + } else + error = 1; + + PRESTO_FREE(tmpname, dentry->d_name.len - 7 + 1); + EXIT; + return error; +} + +static int presto_opendir_upcall(int minor, struct dentry *de, + struct dentry *root, int async) +{ + int rc; + char *path, *buffer; + int pathlen; + + PRESTO_ALLOC(buffer, char *, PAGE_SIZE); + if ( !buffer ) { + printk("PRESTO: out of memory!\n"); + return ENOMEM; + } + path = presto_path(de, root, buffer, PAGE_SIZE); + pathlen = MYPATHLEN(buffer, path); + CDEBUG(D_INODE, "path: %*s, len %d\n", pathlen, path, pathlen); + rc = lento_opendir(minor, pathlen, path, async); + PRESTO_FREE(buffer, PAGE_SIZE); + return rc; +} + +inline int presto_can_ilookup(void) +{ + return (current->euid == presto_ilookup_uid || + capable(CAP_DAC_READ_SEARCH)); +} + +struct dentry *presto_ilookup(struct inode *dir, struct dentry *dentry, + ino_t ino, unsigned int generation) +{ + struct inode *inode; + int error; + + ENTRY; + + /* if we can't ilookup, forbid anything with this name to + * avoid any security issues/name clashes. + */ + if ( !presto_can_ilookup() ) { + CDEBUG(D_CACHE, "ilookup denied: euid %u, ilookup_uid %u\n", + current->euid, presto_ilookup_uid); + EXIT; + return ERR_PTR(-EPERM); + } + inode = iget(dir->i_sb, ino); + if (!inode || !inode->i_nlink || is_bad_inode(inode)) { + CDEBUG(D_PIOCTL, "fatal: invalid inode %ld (%s).\n", + ino, inode ? inode->i_nlink ? "bad inode" : + "no links" : "NULL"); + error = -ENOENT; + EXIT; + goto cleanup_iput; + } + + /* We need to make sure we have the right inode (by checking the + * generation) so we don't write into the wrong file (old inode was + * deleted and then a new one was created with the same number). + */ + if (inode->i_generation != generation) { + CDEBUG(D_PIOCTL, "fatal: bad generation %u (want %u)\n", + inode->i_generation, generation); + error = -ENOENT; + EXIT; + goto cleanup_iput; + } + + d_instantiate(dentry, inode); + EXIT; + return NULL; + +cleanup_iput: + if (inode) + iput(inode); + return ERR_PTR(error); +} + + +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry) +{ + int rc = 0; + struct dentry *de; + struct presto_cache *cache; + struct presto_file_set *fset; + int error; + int minor; + ino_t ino; + unsigned int generation; + + ENTRY; + CDEBUG(D_CACHE, "calling presto_prep on dentry %p\n", dentry); + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return ERR_PTR(error); + } + minor = presto_c2m(cache); + + CDEBUG(D_CACHE, "dir ino: %ld, name: %*s\n", + dir->i_ino, dentry->d_name.len, dentry->d_name.name); + if ( ISLENTO(minor) ) + CDEBUG(D_CACHE, "We are lento\n"); + + rc = dentry2id(dentry, &ino, &generation); + CDEBUG(D_CACHE, "dentry2id returned %d\n", rc); + if ( rc < 0 ) { + EXIT; + goto exit; + } + + if ( rc == 0 ) { + de = presto_ilookup(dir, dentry, ino, generation); + } else { + struct inode_operations *iops = filter_c2cdiops(cache->cache_filter); + rc = 0; + /* recursively do a cache lookup in dir */ + if (iops && iops->lookup) + de = iops->lookup(dir, dentry); + else { + printk("filesystem has no lookup\n"); + EXIT; + goto exit; + } + } + /* XXX this needs some work to handle returning de if we get it */ + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + if ( IS_ERR(de) ) { + rc = PTR_ERR(de); + CDEBUG(D_CACHE, "dentry lookup error %d\n", rc); + EXIT; + goto exit; + } + + presto_set_dd(dentry); + + /* some file systems set the methods in lookup, not in + read_inode, as a result we should set the methods here + as well as in read_inode + */ + if (dentry->d_inode) { + presto_set_ops(dentry->d_inode, cache->cache_filter); + } + EXIT; +exit: + return ERR_PTR(rc); +} + +int presto_setattr(struct dentry *de, struct iattr *iattr) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct lento_vfs_context info = { 0, 0, 0 }; + + ENTRY; + error = presto_prep(de, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + if (!iattr->ia_valid) + CDEBUG(D_INODE, "presto_setattr: iattr is not valid\n"); + + CDEBUG(D_INODE, "valid %#x, mode %#o, uid %u, gid %u, size %Lu, " + "atime %lu mtime %lu ctime %lu flags %d\n", + iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, iattr->ia_gid, + iattr->ia_size, iattr->ia_atime, iattr->ia_mtime, + iattr->ia_ctime, iattr->ia_attr_flags); + + if ( presto_get_permit(de->d_inode) < 0 ) { + EXIT; + return -EROFS; + } + + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_setattr(fset, de, iattr, &info); + presto_put_permit(de->d_inode); + return error; +} + +/* + * Now the meat: the fs operations that require journaling + * + * + * XXX: some of these need modifications for hierarchical filesets + */ + +int presto_prep(struct dentry *dentry, struct presto_cache **cache, + struct presto_file_set **fset) +{ + *fset = presto_fset(dentry); + if ( !*fset ) { + CDEBUG(D_INODE, "No file set for dentry at %p\n", dentry); + return -EROFS; + } + + *cache = (*fset)->fset_cache; + if ( !*cache ) { + printk("PRESTO: BAD, BAD: cannot find cache\n"); + return -EBADF; + } + + CDEBUG(D_PIOCTL, "---> cache flags %x, fset flags %x\n", + (*cache)->cache_flags, (*fset)->fset_flags); + if( presto_is_read_only(*fset) ) { + printk("PRESTO: cannot modify read-only fileset, minor %d.\n", + presto_c2m(*cache)); + return -EROFS; + } + return 0; +} + +static int presto_create(struct inode * dir, struct dentry * dentry, int mode) +{ + int error; + struct presto_cache *cache; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + struct presto_file_set *fset; + + ENTRY; + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + presto_unlock(dir); + + /* Does blocking and non-blocking behavious need to be + checked for. Without blocking (return 1), the permit + was acquired without reintegration + */ + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_create(fset, parent, dentry, mode, &info); + presto_relock_other(dir); + presto_put_permit(dir); + EXIT; + return error; +} + +static int presto_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + int error; + struct presto_cache *cache, *new_cache; + struct presto_file_set *fset, *new_fset; + struct dentry *parent = new_dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_prep(old_dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(new_dentry->d_parent, &new_cache, &new_fset); + if ( error ) { + EXIT; + return error; + } + + if (fset != new_fset) { + EXIT; + return -EXDEV; + } + + presto_unlock(dir); + if ( presto_get_permit(old_dentry->d_inode) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = new_dentry->d_parent; + + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_link(fset, old_dentry, parent, + new_dentry, &info); + presto_relock_other(dir); + presto_put_permit(dir); + presto_put_permit(old_dentry->d_inode); + return error; +} + +static int presto_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int error; + struct presto_file_set *fset; + struct presto_cache *cache; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + + presto_relock_sem(dir); + parent = dentry->d_parent; + error = presto_do_mkdir(fset, parent, dentry, mode, &info); + presto_relock_other(dir); + presto_put_permit(dir); + return error; +} + + +static int presto_symlink(struct inode *dir, struct dentry *dentry, + const char *name) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_symlink(fset, parent, dentry, name, &info); + presto_relock_other(dir); + presto_put_permit(dir); + return error; +} + +int presto_unlink(struct inode *dir, struct dentry *dentry) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_unlink(fset, parent, dentry, &info); + presto_relock_other(dir); + presto_put_permit(dir); + return error; +} + +static int presto_rmdir(struct inode *dir, struct dentry *dentry) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + CDEBUG(D_FILE, "prepping presto\n"); + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + CDEBUG(D_FILE, "unlocking\n"); + /* We need to dget() before the dput in double_unlock, to ensure we + * still have dentry references. double_lock doesn't do dget for us. + */ + unlock_kernel(); + if (d_unhashed(dentry)) + d_rehash(dentry); + double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); + double_up(&dir->i_sem, &dentry->d_inode->i_sem); + + CDEBUG(D_FILE, "getting permit\n"); + if ( presto_get_permit(parent->d_inode) < 0 ) { + EXIT; + double_down(&dir->i_sem, &dentry->d_inode->i_sem); + double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); + + lock_kernel(); + return -EROFS; + } + CDEBUG(D_FILE, "locking\n"); + + double_down(&dir->i_sem, &dentry->d_inode->i_sem); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_rmdir(fset, parent, dentry, &info); + presto_put_permit(parent->d_inode); + lock_kernel(); + EXIT; + return error; +} + +static int presto_mknod(struct inode * dir, struct dentry * dentry, int mode, int rdev) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_mknod(fset, parent, dentry, mode, rdev, &info); + presto_relock_other(dir); + presto_put_permit(dir); + EXIT; + return error; +} + +inline void presto_triple_unlock(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, int triple) +{ + /* rename_dir case */ + if (S_ISDIR(old_dentry->d_inode->i_mode)) { + if (triple) { + triple_up(&old_dir->i_zombie, + &new_dir->i_zombie, + &new_dentry->d_inode->i_zombie); + } else { + double_up(&old_dir->i_zombie, + &new_dir->i_zombie); + } + up(&old_dir->i_sb->s_vfs_rename_sem); + } else /* this case is rename_other */ + double_up(&old_dir->i_zombie, &new_dir->i_zombie); + /* done by do_rename */ + unlock_kernel(); + double_up(&old_dir->i_sem, &new_dir->i_sem); +} + +inline void presto_triple_fulllock(struct inode *old_dir, + struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, int triple) +{ + /* done by do_rename */ + double_down(&old_dir->i_sem, &new_dir->i_sem); + lock_kernel(); + /* rename_dir case */ + if (S_ISDIR(old_dentry->d_inode->i_mode)) { + down(&old_dir->i_sb->s_vfs_rename_sem); + if (triple) { + triple_down(&old_dir->i_zombie, + &new_dir->i_zombie, + &new_dentry->d_inode->i_zombie); + } else { + double_down(&old_dir->i_zombie, + &new_dir->i_zombie); + } + } else /* this case is rename_other */ + double_down(&old_dir->i_zombie, &new_dir->i_zombie); +} + +inline void presto_triple_relock_sem(struct inode *old_dir, + struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, int triple) +{ + /* done by do_rename */ + double_down(&old_dir->i_sem, &new_dir->i_sem); + lock_kernel(); +} + +inline void presto_triple_relock_other(struct inode *old_dir, + struct inode *new_dir, + struct dentry *old_dentry, + struct dentry *new_dentry, int triple) +{ + /* rename_dir case */ + if (S_ISDIR(old_dentry->d_inode->i_mode)) { + down(&old_dir->i_sb->s_vfs_rename_sem); + if (triple) { + triple_down(&old_dir->i_zombie, + &new_dir->i_zombie, + &new_dentry->d_inode->i_zombie); + } else { + double_down(&old_dir->i_zombie, + &new_dir->i_zombie); + } + } else /* this case is rename_other */ + double_down(&old_dir->i_zombie, &new_dir->i_zombie); +} + + +// XXX this can be optimized: renamtes across filesets only require +// multiple KML records, but can locally be executed normally. +int presto_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int error; + struct presto_cache *cache, *new_cache; + struct presto_file_set *fset, *new_fset; + struct lento_vfs_context info; + struct dentry *old_parent = old_dentry->d_parent; + struct dentry *new_parent = new_dentry->d_parent; + int triple; + + ENTRY; + error = presto_prep(old_dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + error = presto_prep(new_parent, &new_cache, &new_fset); + if ( error ) { + EXIT; + return error; + } + + if ( fset != new_fset ) { + EXIT; + return -EXDEV; + } + + /* We need to do dget before the dput in double_unlock, to ensure we + * still have dentry references. double_lock doesn't do dget for us. + */ + + triple = (S_ISDIR(old_dentry->d_inode->i_mode) && new_dentry->d_inode)? + 1:0; + + presto_triple_unlock(old_dir, new_dir, old_dentry, new_dentry, triple); + + if ( presto_get_permit(old_dir) < 0 ) { + EXIT; + presto_triple_fulllock(old_dir, new_dir, old_dentry, new_dentry, triple); + return -EROFS; + } + if ( presto_get_permit(new_dir) < 0 ) { + EXIT; + presto_triple_fulllock(old_dir, new_dir, old_dentry, new_dentry, triple); + return -EROFS; + } + + presto_triple_relock_sem(old_dir, new_dir, old_dentry, new_dentry, triple); + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_rename(fset, old_parent, old_dentry, new_parent, + new_dentry, &info); + presto_triple_relock_other(old_dir, new_dir, old_dentry, new_dentry, triple); + + presto_put_permit(new_dir); + presto_put_permit(old_dir); + return error; +} + +/* basically this allows the ilookup processes access to all files for + * reading, while not making ilookup totally insecure. This could all + * go away if we could set the CAP_DAC_READ_SEARCH capability for the client. + */ +/* If posix acls are available, the underlying cache fs will export the + * appropriate permission function. Thus we do not worry here about ACLs + * or EAs. -SHP + */ +int presto_permission(struct inode *inode, int mask) +{ + unsigned short mode = inode->i_mode; + struct presto_cache *cache; + int rc; + + ENTRY; + if ( presto_can_ilookup() && !(mask & S_IWOTH)) { + CDEBUG(D_CACHE, "ilookup on %ld OK\n", inode->i_ino); + EXIT; + return 0; + } + + cache = presto_get_cache(inode); + + if ( cache ) { + /* we only override the file/dir permission operations */ + struct inode_operations *fiops = filter_c2cfiops(cache->cache_filter); + struct inode_operations *diops = filter_c2cdiops(cache->cache_filter); + + if ( S_ISREG(mode) && fiops && fiops->permission ) { + EXIT; + return fiops->permission(inode, mask); + } + if ( S_ISDIR(mode) && diops && diops->permission ) { + EXIT; + return diops->permission(inode, mask); + } + } + + /* The cache filesystem doesn't have its own permission function, + * but we don't want to duplicate the VFS code here. In order + * to avoid looping from permission calling this function again, + * we temporarily override the permission operation while we call + * the VFS permission function. + */ + inode->i_op->permission = NULL; + rc = permission(inode, mask); + inode->i_op->permission = &presto_permission; + + EXIT; + return rc; +} + + +static int presto_dir_open(struct inode *inode, struct file *file) +{ + int rc = 0; + struct dentry *de = file->f_dentry; + struct file_operations *fops; + struct presto_cache *cache; + struct presto_file_set *fset; + int minor; + int error; + + ENTRY; + + error = presto_prep(file->f_dentry, &cache, &fset); + if ( error ) { + EXIT; + make_bad_inode(inode); + return error; + } + minor = presto_c2m(cache); + + CDEBUG(D_CACHE, "minor %d, DATA_OK: %d, ino: %ld\n", + minor, presto_chk(de, PRESTO_DATA), inode->i_ino); + + if ( ISLENTO(minor) ) + goto cache; + + if ( !presto_chk(de, PRESTO_DATA) ) { + CDEBUG(D_CACHE, "doing lento_opendir\n"); + rc = presto_opendir_upcall(minor, file->f_dentry, fset->fset_mtpt, SYNCHRONOUS); + } + + if ( rc ) { + printk("presto_dir_open: DATA_OK: %d, ino: %ld, error %d\n", + presto_chk(de, PRESTO_DATA), inode->i_ino, rc); + return rc ; + } + + cache: + fops = filter_c2cdfops(cache->cache_filter); + if ( fops->open ) { + rc = fops->open(inode, file); + } + presto_set(de, PRESTO_DATA | PRESTO_ATTR); + CDEBUG(D_CACHE, "returns %d, data %d, attr %d\n", rc, + presto_chk(de, PRESTO_DATA), presto_chk(de, PRESTO_ATTR)); + return 0; +} + +struct file_operations presto_dir_fops = { + open: presto_dir_open +}; + +struct inode_operations presto_dir_iops = { + create: presto_create, + lookup: presto_lookup, + link: presto_link, + unlink: presto_unlink, + symlink: presto_symlink, + mkdir: presto_mkdir, + rmdir: presto_rmdir, + mknod: presto_mknod, + rename: presto_rename, + permission: presto_permission, + setattr: presto_setattr, +#ifdef CONFIG_FS_EXT_ATTR + set_ext_attr: presto_set_ext_attr, +#endif + +}; diff --git a/fs/intermezzo/ext_attr.c b/fs/intermezzo/ext_attr.c new file mode 100644 index 000000000000..b2f4e1af0bf4 --- /dev/null +++ b/fs/intermezzo/ext_attr.c @@ -0,0 +1,196 @@ +/* + * Extended attribute handling for presto. + * + * Copyright (C) 2001. All rights reserved. + * Shirish H. Phatak + * Tacit Networks, Inc. + * + */ + +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/unistd.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/string.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/segment.h> +#include <linux/smp_lock.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + + +#ifdef CONFIG_FS_EXT_ATTR +#include <linux/ext_attr.h> + +extern inline void presto_debug_fail_blkdev(struct presto_file_set *fset, + unsigned long value); + +extern int presto_prep(struct dentry *, struct presto_cache **, + struct presto_file_set **); + + +/* VFS interface */ +/* XXX! Fixme test for user defined attributes */ +int presto_set_ext_attr(struct inode *inode, + const char *name, void *buffer, + size_t buffer_len, int flags) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct lento_vfs_context info; + struct dentry *dentry; + int minor = presto_i2m(inode); + char *buf = NULL; + + ENTRY; + if (minor < 0) { + EXIT; + return -1; + } + + if ( ISLENTO(minor) ) { + EXIT; + return -EINVAL; + } + + /* BAD...vfs should really pass down the dentry to use, especially + * since every other operation in iops does. But for now + * we do a reverse mapping from inode to the first dentry + */ + if (list_empty(&inode->i_dentry)) { + printk("No alias for inode %d\n", (int) inode->i_ino); + EXIT; + return -EINVAL; + } + + dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + + error = presto_prep(dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + if ((buffer != NULL) && (buffer_len != 0)) { + /* If buffer is a user space pointer copy it to kernel space + * and reset the flag. We do this since the journal functions need + * access to the contents of the buffer, and the file system + * does not care. When we actually invoke the function, we remove + * the EXT_ATTR_FLAG_USER flag. + * + * XXX:Check if the "fs does not care" assertion is always true -SHP + * (works for ext3) + */ + if (flags & EXT_ATTR_FLAG_USER) { + PRESTO_ALLOC(buf, char *, buffer_len); + if (!buf) { + printk("InterMezzo: out of memory!!!\n"); + return -ENOMEM; + } + error = copy_from_user(buf, buffer, buffer_len); + if (error) + return error; + } else + buf = buffer; + } else + buf = buffer; + + if ( presto_get_permit(inode) < 0 ) { + EXIT; + if (buffer_len && (flags & EXT_ATTR_FLAG_USER)) + PRESTO_FREE(buf, buffer_len); + return -EROFS; + } + + /* Simulate presto_setup_info */ + memset(&info, 0, sizeof(info)); + /* For now redundant..but we keep it around just in case */ + info.flags = LENTO_FL_IGNORE_TIME; + if (!ISLENTO(cache->cache_psdev->uc_minor)) + info.flags |= LENTO_FL_KML; + + /* We pass in the kernel space pointer and reset the + * EXT_ATTR_FLAG_USER flag. + * See comments above. + */ + /* Note that mode is already set by VFS so we send in a NULL */ + error = presto_do_set_ext_attr(fset, dentry, name, buf, + buffer_len, flags & ~EXT_ATTR_FLAG_USER, + NULL, &info); + presto_put_permit(inode); + + if (buffer_len && (flags & EXT_ATTR_FLAG_USER)) + PRESTO_FREE(buf, buffer_len); + EXIT; + return error; +} + +/* Lento Interface */ +/* XXX: ignore flags? We should be forcing these operations through? -SHP*/ +int lento_set_ext_attr(const char *path, const char *name, + void *buffer, size_t buffer_len, int flags, mode_t mode, + struct lento_vfs_context *info) +{ + int error; + char * pathname; + struct nameidata nd; + struct dentry *dentry; + struct presto_file_set *fset; + + ENTRY; + lock_kernel(); + + pathname=getname(path); + error = PTR_ERR(pathname); + if (IS_ERR(pathname)) { + EXIT; + goto exit; + } + + /* Note that ext_attrs apply to both files and directories..*/ + error=presto_walk(pathname,&nd); + if (error) + goto exit; + dentry = nd.dentry; + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit_dentry; + } + + if (buffer==NULL) buffer_len=0; + + error = presto_do_set_ext_attr(fset, dentry, name, buffer, + buffer_len, flags, &mode, info); +exit_dentry: + path_release(&nd); +exit_path: + putname(pathname); +exit: + unlock_kernel(); + return error; +} + +#endif /*CONFIG_FS_EXT_ATTR*/ diff --git a/fs/intermezzo/file.c b/fs/intermezzo/file.c new file mode 100644 index 000000000000..1a9eae8db572 --- /dev/null +++ b/fs/intermezzo/file.c @@ -0,0 +1,426 @@ +/* + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory. + * Copyright (C) 2000 Tacitus Systems + * Copyright (C) 2000 Peter J. Braam + * Copyright (C) 2001 Mountain View Data, Inc. + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + */ + + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_kml.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/fsfilter.h> +/* + * these are initialized in super.c + */ +extern int presto_permission(struct inode *inode, int mask); +extern int presto_opendir_upcall(int minor, struct dentry *de, int async); + +extern int presto_prep(struct dentry *, struct presto_cache **, + struct presto_file_set **); + + +#if 0 +static int presto_open_upcall(int minor, struct dentry *de) +{ + int rc; + char *path, *buffer; + int pathlen; + + PRESTO_ALLOC(buffer, char *, PAGE_SIZE); + if ( !buffer ) { + printk("PRESTO: out of memory!\n"); + return ENOMEM; + } + path = presto_path(de, buffer, PAGE_SIZE); + pathlen = MYPATHLEN(buffer, path); + rc = lento_open(minor, pathlen, path); + PRESTO_FREE(buffer, PAGE_SIZE); + return rc; +} +#endif + + +static int presto_file_open(struct inode *inode, struct file *file) +{ + int rc = 0; + struct file_operations *fops; + struct presto_cache *cache; + struct presto_file_data *fdata; + int writable = (file->f_flags & (O_RDWR | O_WRONLY)); + int minor; + int i; + + ENTRY; + + cache = presto_get_cache(inode); + if ( !cache ) { + printk("PRESTO: BAD, BAD: cannot find cache\n"); + EXIT; + return -EBADF; + } + + minor = presto_c2m(cache); + + CDEBUG(D_CACHE, "presto_file_open: DATA_OK: %d, ino: %ld\n", + presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino); + + if ( ISLENTO(minor) ) + goto cache; + + if ( file->f_flags & O_RDWR || file->f_flags & O_WRONLY) { + CDEBUG(D_CACHE, "presto_file_open: calling presto_get_permit\n"); + /* lock needed to protect permit_count manipulations -SHP */ + if ( presto_get_permit(inode) < 0 ) { + EXIT; + return -EROFS; + } + presto_put_permit(inode); + } + + /* XXX name space synchronization here for data/streaming on demand?*/ + /* XXX Lento can make us wait here for backfetches to complete */ +#if 0 + if ( !presto_chk(file->f_dentry, PRESTO_DATA) || + !presto_has_all_data(file->f_dentry->d_inode) ) { + CDEBUG(D_CACHE, "presto_file_open: presto_open_upcall\n"); + rc = presto_open_upcall(minor, file->f_dentry); + } + +#endif + rc = 0; + cache: + fops = filter_c2cffops(cache->cache_filter); + if ( fops->open ) { + CDEBUG(D_CACHE, "presto_file_open: calling fs open\n"); + rc = fops->open(inode, file); + } + if (rc) { + EXIT; + return rc; + } + + CDEBUG(D_CACHE, "presto_file_open: setting DATA, ATTR\n"); + if( ISLENTO(minor) ) + presto_set(file->f_dentry, PRESTO_ATTR ); + else + presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA); + + if (writable) { + PRESTO_ALLOC(fdata, struct presto_file_data *, sizeof(*fdata)); + if (!fdata) { + EXIT; + return -ENOMEM; + } + /* we believe that on open the kernel lock + assures that only one process will do this allocation */ + fdata->fd_do_lml = 0; + fdata->fd_fsuid = current->fsuid; + fdata->fd_fsgid = current->fsgid; + fdata->fd_mode = file->f_dentry->d_inode->i_mode; + fdata->fd_ngroups = current->ngroups; + for (i=0 ; i<current->ngroups ; i++) + fdata->fd_groups[i] = current->groups[i]; + fdata->fd_bytes_written = 0; /*when open,written data is zero*/ + file->private_data = fdata; + } else { + file->private_data = NULL; + } + + return 0; +} + +static int presto_file_release(struct inode *inode, struct file *file) +{ + struct rec_info rec; + int rc; + int writable = (file->f_flags & (O_RDWR | O_WRONLY)); + struct file_operations *fops; + struct presto_cache *cache; + struct presto_file_set *fset; + void *handle; + struct presto_file_data *fdata = + (struct presto_file_data *)file->private_data; + + ENTRY; + rc = presto_prep(file->f_dentry, &cache, &fset); + if ( rc ) { + EXIT; + return rc; + } + + fops = filter_c2cffops(cache->cache_filter); + rc = fops->release(inode, file); + + CDEBUG(D_CACHE, "islento = %d (minor %d), writable = %d, rc %d, data %p\n", + ISLENTO(cache->cache_psdev->uc_minor), + cache->cache_psdev->uc_minor, + writable, rc, fdata); + + if (fdata && fdata->fd_do_lml) { + CDEBUG(D_CACHE, "LML at %lld\n", fdata->fd_lml_offset); + } + + /* don't journal close if file couldn't have been written to */ + /* if (!ISLENTO(cache->cache_prestominor) && !rc && writable) {*/ + if (fdata && fdata->fd_do_lml && + !rc && writable && (! ISLENTO(cache->cache_psdev->uc_minor))) { + struct presto_version new_ver; + + presto_getversion(&new_ver, inode); + + /* XXX: remove when lento gets file granularity cd */ + /* Lock needed to protect permit_count manipulations -SHP */ + if ( presto_get_permit(inode) < 0 ) { + EXIT; + return -EROFS; + } + CDEBUG(D_CACHE, "presto_file_release: writing journal\n"); + + rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (rc) { + presto_put_permit(inode); + EXIT; + return rc; + } + handle = presto_trans_start(fset, file->f_dentry->d_inode, + PRESTO_OP_RELEASE); + if ( IS_ERR(handle) ) { + printk("presto_release: no space for transaction\n"); + presto_put_permit(inode); + return -ENOSPC; + } + rc = presto_journal_close(&rec, fset, file, file->f_dentry, + &new_ver); + if (rc) { + printk("presto_close: cannot journal close\n"); + /* XXX oops here to get this bug */ + *(int *)0 = 1; + presto_put_permit(inode); + return -ENOSPC; + } + presto_trans_commit(fset, handle); + + /* cancel the LML record */ + handle = presto_trans_start + (fset, inode, PRESTO_OP_WRITE); + if ( IS_ERR(handle) ) { + printk("presto_release: no space for clear\n"); + presto_put_permit(inode); + return -ENOSPC; + } + rc = presto_clear_lml_close(fset, + fdata->fd_lml_offset); + if (rc < 0 ) { + /* XXX oops here to get this bug */ + *(int *)0 = 1; + presto_put_permit(inode); + printk("presto_close: cannot journal close\n"); + return -ENOSPC; + } + presto_trans_commit(fset, handle); + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + + presto_truncate_lml(fset); + + presto_put_permit(inode); + } + + if (!rc && fdata) { + PRESTO_FREE(fdata, sizeof(*fdata)); + } + file->private_data = NULL; + + EXIT; + return rc; +} + + + +static void presto_apply_write_policy(struct file *file, struct presto_file_set *fset, loff_t res) +{ + struct presto_file_data *fdata = (struct presto_file_data *)file->private_data; + struct presto_cache *cache = fset->fset_cache; + struct presto_version new_file_ver; + int error; + struct rec_info rec; + + /* Here we do a journal close after a fixed or a specified + amount of KBytes, currently a global parameter set with + sysctl. If files are open for a long time, this gives added + protection. (XXX todo: per cache, add ioctl, handle + journaling in a thread, add more options etc.) + */ + + if ( (fset->fset_flags & FSET_JCLOSE_ON_WRITE) + && (!ISLENTO(cache->cache_psdev->uc_minor))) { + fdata->fd_bytes_written += res; + + if (fdata->fd_bytes_written >= fset->fset_file_maxio) { + presto_getversion(&new_file_ver, file->f_dentry->d_inode); + /* This is really heavy weight and should be fixed + ASAP. At most we should be recording the number + of bytes written and not locking the kernel, + wait for permits, etc, on the write path. SHP + */ + lock_kernel(); + if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) { + EXIT; + /* we must be disconnected, not to worry */ + return; + } + error = presto_journal_close + (&rec, fset, file, file->f_dentry, &new_file_ver); + presto_put_permit(file->f_dentry->d_inode); + unlock_kernel(); + if ( error ) { + printk("presto_close: cannot journal close\n"); + /* XXX these errors are really bad */ + /* panic(); */ + return; + } + fdata->fd_bytes_written = 0; + } + } +} + +static ssize_t presto_file_write(struct file *file, const char *buf, size_t size, + loff_t *off) +{ + struct rec_info rec; + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct file_operations *fops; + ssize_t res; + int do_lml_here; + void *handle = NULL; + unsigned long blocks; + struct presto_file_data *fdata; + loff_t res_size; + + error = presto_prep(file->f_dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1; + /* XXX 3 is for ext2 indirect blocks ... */ + res_size = 2 * PRESTO_REQHIGH + ((blocks+3) + << file->f_dentry->d_inode->i_sb->s_blocksize_bits); + + error = presto_reserve_space(fset->fset_cache, res_size); + CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size); + if ( error ) { + EXIT; + return -ENOSPC; + } + + /* XXX lock something here */ + CDEBUG(D_INODE, "islento %d, minor: %d\n", ISLENTO(cache->cache_psdev->uc_minor), + cache->cache_psdev->uc_minor); + read_lock(&fset->fset_lml.fd_lock); + fdata = (struct presto_file_data *)file->private_data; + do_lml_here = (!ISLENTO(cache->cache_psdev->uc_minor)) && + size && (fdata->fd_do_lml == 0); + + if (do_lml_here) + fdata->fd_do_lml = 1; + read_unlock(&fset->fset_lml.fd_lock); + + /* XXX we have two choices: + - we do the transaction for the LML record BEFORE any write + transaction starts - that has the benefit that no other + short write can complete without the record being there. + The disadvantage is that even if no write happens we get + the LML record. + - we bundle the transaction with this write. In that case + we may not have an LML record is a short write goes through + before this one (can that actually happen?). + */ + res = 0; + if (do_lml_here) { + /* handle different space reqs from file system below! */ + handle = presto_trans_start(fset, file->f_dentry->d_inode, + PRESTO_OP_WRITE); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, res_size); + printk("presto_write: no space for transaction\n"); + return -ENOSPC; + } + res = presto_journal_write(&rec, fset, file); + fdata->fd_lml_offset = rec.offset; + if ( res ) { + /* XXX oops here to get this bug */ + /* *(int *)0 = 1; */ + EXIT; + goto exit_write; + } + + presto_trans_commit(fset, handle); + } + + fops = filter_c2cffops(cache->cache_filter); + res = fops->write(file, buf, size, off); + if ( res != size ) { + CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res); + } + + if ( (res > 0) && fdata ) + presto_apply_write_policy(file, fset, res); + + exit_write: + presto_release_space(fset->fset_cache, res_size); + return res; +} + +struct file_operations presto_file_fops = { + write: presto_file_write, + open: presto_file_open, + release: presto_file_release +}; + +struct inode_operations presto_file_iops = { + permission: presto_permission, + setattr: presto_setattr, +#ifdef CONFIG_FS_EXT_ATTR + set_ext_attr: presto_set_ext_attr, +#endif +}; + + + diff --git a/fs/intermezzo/inode.c b/fs/intermezzo/inode.c new file mode 100644 index 000000000000..c4816a363825 --- /dev/null +++ b/fs/intermezzo/inode.c @@ -0,0 +1,166 @@ +/* + * Super block/filesystem wide operations + * + * Copryright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and + * Michael Callahan <callahan@maths.ox.ac.uk> + * + * Rewritten for Linux 2.1. Peter Braam <braam@cs.cmu.edu> + * Copyright (C) Carnegie Mellon University + */ + +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/unistd.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/string.h> +#include <asm/uaccess.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/segment.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> + +extern int presto_remount(struct super_block *, int *, char *); + +int presto_excluded_gid = PRESTO_EXCL_GID; + +extern int presto_prep(struct dentry *, struct presto_cache **, + struct presto_file_set **); +extern void presto_free_cache(struct presto_cache *); + + +void presto_set_ops(struct inode *inode, struct filter_fs *filter) +{ + ENTRY; + if (inode->i_gid == presto_excluded_gid ) { + EXIT; + CDEBUG(D_INODE, "excluded methods for %ld at %p, %p\n", + inode->i_ino, inode->i_op, inode->i_fop); + return; + } + if (S_ISREG(inode->i_mode)) { + if ( !filter_c2cfiops(filter) ) { + filter_setup_file_ops(filter, + inode, &presto_file_iops, + &presto_file_fops); + } + inode->i_op = filter_c2ufiops(filter); + inode->i_fop = filter_c2uffops(filter); + CDEBUG(D_INODE, "set file methods for %ld to %p\n", + inode->i_ino, inode->i_op); + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = filter_c2udiops(filter); + inode->i_fop = filter_c2udfops(filter); + CDEBUG(D_INODE, "set dir methods for %ld to %p lookup %p\n", + inode->i_ino, inode->i_op, inode->i_op->lookup); + } else if (S_ISLNK(inode->i_mode)) { + if ( !filter_c2csiops(filter)) { + filter_setup_symlink_ops(filter, + inode, + &presto_sym_iops, + &presto_sym_fops); + } + inode->i_op = filter_c2usiops(filter); + inode->i_fop = filter_c2usfops(filter); + CDEBUG(D_INODE, "set link methods for %ld to %p\n", + inode->i_ino, inode->i_op); + } + EXIT; +} + +void presto_read_inode(struct inode *inode) +{ + struct presto_cache *cache; + + cache = presto_get_cache(inode); + if ( !cache ) { + printk("PRESTO: BAD, BAD: cannot find cache\n"); + make_bad_inode(inode); + return ; + } + + filter_c2csops(cache->cache_filter)->read_inode(inode); + + CDEBUG(D_INODE, "presto_read_inode: ino %ld, gid %d\n", + inode->i_ino, inode->i_gid); + + // if (inode->i_gid == presto_excluded_gid) + // return; + + presto_set_ops(inode, cache->cache_filter); + /* XXX handle special inodes here or not - probably not? */ +} + +void presto_put_super(struct super_block *sb) +{ + struct presto_cache *cache; + struct upc_comm *psdev; + struct super_operations *sops; + struct list_head *lh; + + ENTRY; + cache = presto_find_cache(sb->s_dev); + if (!cache) { + EXIT; + goto exit; + } + psdev = &upc_comms[presto_c2m(cache)]; + + sops = filter_c2csops(cache->cache_filter); + if (sops->put_super) + sops->put_super(sb); + + /* free any remaining async upcalls when the filesystem is unmounted */ + lh = psdev->uc_pending.next; + while ( lh != &psdev->uc_pending) { + struct upc_req *req; + req = list_entry(lh, struct upc_req, rq_chain); + + /* assignment must be here: we are about to free &lh */ + lh = lh->next; + if ( ! (req->rq_flags & REQ_ASYNC) ) + continue; + list_del(&(req->rq_chain)); + PRESTO_FREE(req->rq_data, req->rq_bufsize); + PRESTO_FREE(req, sizeof(struct upc_req)); + } + + presto_free_cache(cache); + +exit: + CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + MOD_DEC_USE_COUNT; + return ; +} + + +/* symlinks can be chowned */ +struct inode_operations presto_sym_iops = { + setattr: presto_setattr +}; + +/* NULL for now */ +struct file_operations presto_sym_fops; + +struct super_operations presto_super_ops = { + read_inode: presto_read_inode, + put_super: presto_put_super, + remount_fs: presto_remount +}; +MODULE_LICENSE("GPL"); diff --git a/fs/intermezzo/journal.c b/fs/intermezzo/journal.c new file mode 100644 index 000000000000..7ccdb51f7203 --- /dev/null +++ b/fs/intermezzo/journal.c @@ -0,0 +1,2058 @@ +/* + * Intermezzo. (C) 1998 Peter J. Braam + * + * Support for journalling extended attributes + * (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. + */ + + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +static int presto_log(struct presto_file_set *fset, struct rec_info *rec, + const char *buf, size_t size, + const char *string1, int len1, + const char *string2, int len2, + const char *string3, int len3); + +/* + * reserve record space and/or atomically request state of the log + * rec will hold the location reserved record upon return + * this reservation will be placed in the queue + */ +static void presto_reserve_record(struct presto_file_set *fset, + struct presto_log_fd *fd, + struct rec_info *rec, + struct presto_reservation_data *rd) +{ + int chunked_record = 0; + ENTRY; + + write_lock(&fd->fd_lock); + if ( rec->is_kml ) { + int chunk = 1 << fset->fset_chunkbits; + int chunk_mask = ~(chunk -1); + loff_t boundary; + + boundary = (fd->fd_offset + chunk - 1) & chunk_mask; + if ( fd->fd_offset + rec->size >= boundary ) { + chunked_record = 1; + fd->fd_offset = boundary; + } + } + + fd->fd_recno++; + + /* this move the fd_offset back after truncation */ + if ( list_empty(&fd->fd_reservations) && + !chunked_record) { + fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size; + } + + rec->offset = fd->fd_offset; + rec->recno = fd->fd_recno; + + fd->fd_offset += rec->size; + + /* add the reservation data to the end of the list */ + list_add(&rd->ri_list, fd->fd_reservations.prev); + rd->ri_offset = rec->offset; + rd->ri_size = rec->size; + rd->ri_recno = rec->recno; + + write_unlock(&fd->fd_lock); + + EXIT; +} + +static inline void presto_release_record(struct presto_log_fd *fd, + struct presto_reservation_data *rd) +{ + write_lock(&fd->fd_lock); + list_del(&rd->ri_list); + write_unlock(&fd->fd_lock); +} + +static int presto_do_truncate(struct presto_file_set *fset, + struct dentry *dentry, loff_t length, + loff_t size_check) +{ + struct inode *inode = dentry->d_inode; + struct inode_operations *op; + int error; + struct iattr newattrs; + + ENTRY; + + /* Not pretty: "inode->i_size" shouldn't really be "loff_t". */ + if ((off_t) length < 0) + return -EINVAL; + + fs_down(&inode->i_sem); + lock_kernel(); + + if (size_check != inode->i_size) { + fs_up(&inode->i_sem); + EXIT; + return -EALREADY; + } + + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + op = filter_c2cfiops(fset->fset_cache->cache_filter); + + if (op != NULL && op->setattr != NULL) + error = op->setattr(dentry, &newattrs); + else { + inode_setattr(dentry->d_inode, &newattrs); + /* Some filesystems, e.g. ext2 and older versions of ext3 + legitimately do not have a <fs>_setattr method. -SHP + */ + /* + printk ("Warning:: int presto_do_truncate(xxx), op->setattr == NULL"); + error = -EOPNOTSUPP; + */ + error = 0; + } + unlock_kernel(); + fs_up(&inode->i_sem); + EXIT; + return error; +} + + +void *presto_trans_start(struct presto_file_set *fset, struct inode *inode, + int op) +{ + ENTRY; + if ( !fset->fset_cache->cache_filter->o_trops ) + return NULL; + EXIT; + return fset->fset_cache->cache_filter->o_trops->tr_start + (fset, inode, op); +} + +void presto_trans_commit(struct presto_file_set *fset, void *handle) +{ + ENTRY; + if (!fset->fset_cache->cache_filter->o_trops ) + return; + EXIT; + return fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle); + +} + +inline int presto_no_journal(struct presto_file_set *fset) +{ + int minor = fset->fset_cache->cache_psdev->uc_minor; + return upc_comms[minor].uc_no_journal; +} + +#define size_round(x) (((x)+3) & ~0x3) + +#define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE) +#define BUFF_ALLOC(newbuf, oldbuf) \ + PRESTO_ALLOC(newbuf, char *, PAGE_SIZE); \ + if ( !newbuf ) { \ + if (oldbuf) \ + BUFF_FREE(oldbuf); \ + return -ENOMEM; \ + } + +/* + * "buflen" should be PAGE_SIZE or more. + * Give relative path wrt to a fsetroot + */ +char * presto_path(struct dentry *dentry, struct dentry *root, + char *buffer, int buflen) +{ + char * end = buffer+buflen; + char * retval; + + *--end = '\0'; + buflen--; + if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) { + buflen -= 10; + end -= 10; + memcpy(end, " (deleted)", 10); + } + + /* Get '/' right */ + retval = end-1; + *retval = '/'; + + for (;;) { + struct dentry * parent; + int namelen; + + if (dentry == root) + break; + parent = dentry->d_parent; + if (dentry == parent) + break; + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + break; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + retval = end; + dentry = parent; + } + return retval; +} + +static inline char *logit(char *buf, const void *value, int size) +{ + char *ptr = (char *)value; + + memcpy(buf, ptr, size); + buf += size; + return buf; +} + + +static inline char * +journal_log_prefix_with_groups_and_ids(char *buf, int opcode, + struct rec_info *rec, + __u32 ngroups, gid_t *groups, + __u32 fsuid, __u32 fsgid) +{ + struct big_journal_prefix p; + int i; + + p.len = cpu_to_le32(rec->size); + p.version = PRESTO_KML_MAJOR_VERSION | PRESTO_KML_MINOR_VERSION; + p.pid = cpu_to_le32(current->pid); + p.uid = cpu_to_le32(current->uid); + p.fsuid = cpu_to_le32(fsuid); + p.fsgid = cpu_to_le32(fsgid); + p.ngroups = cpu_to_le32(ngroups); + p.opcode = cpu_to_le32(opcode); + for (i=0 ; i < ngroups ; i++) + p.groups[i] = cpu_to_le32((__u32) groups[i]); + + buf = logit(buf, &p, sizeof(struct journal_prefix) + + sizeof(__u32) * ngroups); + return buf; +} + +static inline char * +journal_log_prefix(char *buf, int opcode, struct rec_info *rec) +{ + __u32 groups[NGROUPS_MAX]; + int i; + + /* convert 16 bit gid's to 32 bit gid's */ + for (i=0; i<current->ngroups; i++) + groups[i] = (__u32) current->groups[i]; + + return journal_log_prefix_with_groups_and_ids(buf, opcode, rec, + (__u32)current->ngroups, + groups, + (__u32)current->fsuid, + (__u32)current->fsgid); +} + +static inline char * +journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec, + __u32 ngroups, gid_t *groups) +{ + return journal_log_prefix_with_groups_and_ids(buf, opcode, rec, + ngroups, groups, + (__u32)current->fsuid, + (__u32)current->fsgid); +} + +static inline char *log_version(char *buf, struct dentry *dentry) +{ + struct presto_version version; + + presto_getversion(&version, dentry->d_inode); + + return logit(buf, &version, sizeof(version)); +} + +static inline char *journal_log_suffix(char *buf, char *log, + struct presto_file_set *fset, + struct dentry *dentry, + struct rec_info *rec) +{ + struct journal_suffix s; + struct journal_prefix *p = (struct journal_prefix *)log; + +#if 0 + /* XXX needs to be done after reservation, + disable ths until version 1.2 */ + if ( dentry ) { + s.prevrec = cpu_to_le32(rec->offset - + presto_d2d(dentry)->dd_kml_offset); + presto_d2d(dentry)->dd_kml_offset = rec->offset; + } else { + s.prevrec = -1; + } +#endif + s.prevrec = 0; + + /* record number needs to be filled in after reservation + s.recno = cpu_to_le32(rec->recno); */ + s.time = cpu_to_le32(CURRENT_TIME); + s.len = cpu_to_le32(p->len); + return logit(buf, &s, sizeof(s)); +} + +int presto_close_journal_file(struct presto_file_set *fset) +{ + int rc = 0; + int rc2 = 0; + int rc3 = 0; + + ENTRY; + if ( fset->fset_kml.fd_file) { + rc =filp_close(fset->fset_kml.fd_file, 0); + fset->fset_kml.fd_file = NULL; + } else { + printk("hehehehe no filp\n"); + } + if ( rc ) { + printk("presto: close files: kml filp won't close %d\n", rc); + } + + if ( fset->fset_last_rcvd) { + rc2 = filp_close(fset->fset_last_rcvd, 0); + fset->fset_last_rcvd = NULL; + } else { + printk("hehehehe no filp\n"); + } + + if ( rc2 ) { + if ( !rc ) + rc = rc2; + printk("presto: close files: last_rcvd filp won't close %d\n", rc2); + } + + if ( fset->fset_lml.fd_file) { + rc3 = filp_close(fset->fset_lml.fd_file, 0); + fset->fset_lml.fd_file = NULL; + } else { + printk("hehehehe no filp\n"); + } + if ( rc3 ) { + if ( (!rc) && (!rc2) ) + rc = rc3; + printk("presto: close files: lml filp won't close %d\n", rc3); + } + return rc; +} + +int presto_fwrite(struct file *file, const char *str, int len, loff_t *off) +{ + int rc; + mm_segment_t old_fs; + ENTRY; + + rc = -EINVAL; + if ( !off ) { + EXIT; + return rc; + } + + if ( ! file ) { + EXIT; + return rc; + } + + if ( ! file->f_op ) { + EXIT; + return rc; + } + + if ( ! file->f_op->write ) { + EXIT; + return rc; + } + + old_fs = get_fs(); + set_fs(get_ds()); + rc = file->f_op->write(file, str, len, off); + if (rc != len) { + printk("presto_fwrite: wrote %d bytes instead of " + "%d at %ld\n", rc, len, (long)*off); + rc = -EIO; + } + set_fs(old_fs); + EXIT; + return rc; +} + +int presto_fread(struct file *file, char *str, int len, loff_t *off) +{ + int rc; + mm_segment_t old_fs; + ENTRY; + + if ( len > 512 ) { + printk("presto_fread: read at %Ld for %d bytes, ino %ld\n", + *off, len, file->f_dentry->d_inode->i_ino); + } + + rc = -EINVAL; + if ( !off ) { + EXIT; + return rc; + } + + if ( ! file ) { + EXIT; + return rc; + } + + if ( ! file->f_op ) { + EXIT; + return rc; + } + + if ( ! file->f_op->read ) { + EXIT; + return rc; + } + + old_fs = get_fs(); + set_fs(get_ds()); + rc = file->f_op->read(file, str, len, off); + if (rc != len) { + printk("presto_fread: read %d bytes instead of " + "%d at %ld\n", rc, len, (long)*off); + rc = -EIO; + } + set_fs(old_fs); + return rc; +} + + +static int presto_kml_dispatch(struct presto_file_set *fset) +{ + int rc = 0; + unsigned int kml_recno; + struct presto_log_fd *fd = &fset->fset_kml; + loff_t offset; + ENTRY; + + write_lock(&fd->fd_lock); + + /* Determine the largest valid offset, i.e. up until the first + * reservation held on the file. */ + if ( !list_empty(&fd->fd_reservations) ) { + struct presto_reservation_data *rd; + rd = list_entry(fd->fd_reservations.next, + struct presto_reservation_data, + ri_list); + offset = rd->ri_offset; + kml_recno = rd->ri_recno; + } else { + offset = fd->fd_file->f_dentry->d_inode->i_size; + kml_recno = fset->fset_kml.fd_recno; + } + + if ( kml_recno < fset->fset_lento_recno ) { + printk("presto_kml_dispatch: smoke is coming\n"); + write_unlock(&fd->fd_lock); + return 0; + } else if ( kml_recno == fset->fset_lento_recno ) { + write_unlock(&fd->fd_lock); + EXIT; + return 0; + } + CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name); + rc = lento_kml(fset->fset_cache->cache_psdev->uc_minor, + fset->fset_lento_off, fset->fset_lento_recno, + offset, kml_recno, strlen(fset->fset_name), + fset->fset_name); + + if ( rc ) { + write_unlock(&fd->fd_lock); + EXIT; + return rc; + } + + fset->fset_lento_off = offset; + fset->fset_lento_recno = kml_recno; + write_unlock(&fd->fd_lock); + EXIT; + return 0; +} + + +/* structure of an extended log record: + + buf-prefix buf-body [string1 [string2 [string3]]] buf-suffix + + note: moves offset forward +*/ +static inline int presto_write_record(struct file *f, loff_t *off, + const char *buf, size_t size, + const char *string1, int len1, + const char *string2, int len2, + const char *string3, int len3) +{ + size_t prefix_size; + int rc; + + prefix_size = size - sizeof(struct journal_suffix); + rc = presto_fwrite(f, buf, prefix_size, off); + if ( rc != prefix_size ) { + printk("Write error!\n"); + EXIT; + return -EIO; + } + + if ( string1 && len1 ) { + rc = presto_fwrite(f, string1, len1, off); + if ( rc != len1 ) { + printk("Write error!\n"); + EXIT; + return -EIO; + } + } + + if ( string2 && len2 ) { + rc = presto_fwrite(f, string2, len2, off); + if ( rc != len2 ) { + printk("Write error!\n"); + EXIT; + return -EIO; + } + } + + if ( string3 && len3 ) { + rc = presto_fwrite(f, string3, len3, off); + if ( rc != len3 ) { + printk("Write error!\n"); + EXIT; + return -EIO; + } + } + + rc = presto_fwrite(f, buf + prefix_size, + sizeof(struct journal_suffix), off); + if ( rc != sizeof(struct journal_suffix) ) { + printk("Write error!\n"); + EXIT; + return -EIO; + } + return 0; +} + + +/* + * rec->size must be valid prior to calling this function. + */ +static int presto_log(struct presto_file_set *fset, struct rec_info *rec, + const char *buf, size_t size, + const char *string1, int len1, + const char *string2, int len2, + const char *string3, int len3) +{ + int rc; + struct presto_reservation_data rd; + loff_t offset; + struct presto_log_fd *fd; + struct journal_suffix *s; + int prefix_size; + + ENTRY; + + /* buf is NULL when no_journal is in effect */ + if (!buf) { + EXIT; + return -EINVAL; + } + + if (rec->is_kml) { + fd = &fset->fset_kml; + } else { + fd = &fset->fset_lml; + } + + presto_reserve_record(fset, fd, rec, &rd); + offset = rec->offset; + + /* now we know the record number */ + prefix_size = size - sizeof(struct journal_suffix); + s = (struct journal_suffix *) (buf + prefix_size); + s->recno = cpu_to_le32(rec->recno); + + rc = presto_write_record(fd->fd_file, &offset, buf, size, + string1, len1, string2, len2, string3, len3); + if (rc) { + printk("presto: error writing record to %s\n", + rec->is_kml ? "KML" : "LML"); + return rc; + } + presto_release_record(fd, &rd); + + rc = presto_kml_dispatch(fset); + + EXIT; + return rc; +} + +/* read from the record at tail */ +static int presto_last_record(struct presto_log_fd *fd, loff_t *size, + loff_t *tail_offset, __u32 *recno, loff_t tail) +{ + struct journal_suffix suffix; + int rc; + loff_t zeroes; + + *recno = 0; + *tail_offset = 0; + *size = 0; + + if (tail < sizeof(struct journal_prefix) + sizeof(suffix)) { + EXIT; + return 0; + } + + zeroes = tail - sizeof(int); + while ( zeroes >= 0 ) { + int data; + rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data), + &zeroes); + if ( rc != sizeof(data) ) { + rc = -EIO; + return rc; + } + if (data) + break; + zeroes -= 2 * sizeof(data); + } + + /* zeroes at the begining of file. this is needed to prevent + presto_fread errors -SHP + */ + if (zeroes <= 0) return 0; + + zeroes -= sizeof(suffix); + rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes); + if ( rc != sizeof(suffix) ) { + EXIT; + return rc; + } + if ( suffix.len > 500 ) { + printk("PRESTO: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n", + (long) zeroes, (long)*tail_offset, suffix.len); + } + + *recno = suffix.recno; + *size = suffix.len; + *tail_offset = zeroes; + return 0; +} + +static int presto_kml_last_recno(struct presto_file_set *fset) +{ + int rc; + loff_t size; + loff_t tail_offset; + int recno; + loff_t tail = fset->fset_kml.fd_file->f_dentry->d_inode->i_size; + + if ((rc = presto_last_record(&fset->fset_kml, &size, + &tail_offset, &recno, tail)) ) { + EXIT; + return rc; + } + + fset->fset_kml.fd_offset = tail_offset; + fset->fset_kml.fd_recno = recno; + CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset %Ld\n", + recno, tail_offset); + EXIT; + return 0; +} + +static struct file *presto_log_open(struct presto_file_set *fset, char *name, int flags) +{ + struct presto_cache *cache = fset->fset_cache; + struct file *f; + int error; + int mtpt_len, path_len; + char *path; + ENTRY; + + mtpt_len = strlen(cache->cache_mtpt); + path_len = mtpt_len + strlen("/.intermezzo/") + + strlen(fset->fset_name) + strlen(name); + + error = -ENOMEM; + PRESTO_ALLOC(path, char *, path_len + 1); + if ( !path ) { + EXIT; + return ERR_PTR(-ENOMEM); + } + + sprintf(path, "%s/.intermezzo/%s/%s", cache->cache_mtpt, + fset->fset_name, name); + CDEBUG(D_INODE, "opening file %s\n", path); + + f = filp_open(path, flags, 0); + error = PTR_ERR(f); + if (IS_ERR(f)) { + CDEBUG(D_INODE, "Error %d\n", error); + EXIT; + goto out_free; + } + + error = -EINVAL; + if ( cache != presto_get_cache(f->f_dentry->d_inode) ) { + printk("PRESTO: %s cache does not match fset cache!\n", name); + fset->fset_kml.fd_file = NULL; + filp_close(f, NULL); + goto out_free; + } + + if (cache->cache_filter && cache->cache_filter->o_trops && + cache->cache_filter->o_trops->tr_journal_data) { + CDEBUG(D_INODE, "\n"); + cache->cache_filter->o_trops->tr_journal_data + (f->f_dentry->d_inode); + } else { + printk("WARNING: InterMezzo no file data logging!\n"); + } + + out_free: + PRESTO_FREE(path, path_len + 1); + + EXIT; + return f; +} + +int presto_init_kml_file(struct presto_file_set *fset) +{ + int error = 0; + struct file *f; + + ENTRY; + if (fset->fset_kml.fd_file) { + CDEBUG(D_INODE, "fset already has KML open\n"); + EXIT; + return 0; + } + + fset->fset_kml.fd_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&fset->fset_kml.fd_reservations); + f = presto_log_open(fset, "kml", O_RDWR | O_CREAT); + if ( IS_ERR(f) ) { + error = PTR_ERR(f); + return error; + } + + fset->fset_kml.fd_file = f; + error = presto_kml_last_recno(fset); + + if ( error ) { + EXIT; + fset->fset_kml.fd_file = NULL; + filp_close(f, NULL); + printk("presto: IO error in KML of fset %s\n", + fset->fset_name); + } + fset->fset_lento_off = fset->fset_kml.fd_offset; + fset->fset_lento_recno = fset->fset_kml.fd_recno; + + EXIT; + return error; +} + + +int presto_init_last_rcvd_file(struct presto_file_set *fset) +{ + int error = 0; + struct file *f; + + ENTRY; + if (fset->fset_last_rcvd) { + CDEBUG(D_INODE, "fset already has last_rcvd open\n"); + EXIT; + return 0; + } + + f = presto_log_open(fset, "last_rcvd", O_RDWR | O_CREAT); + if ( IS_ERR(f) ) { + error = PTR_ERR(f); + return error; + } + + fset->fset_last_rcvd = f; + + EXIT; + return error; +} + +int presto_init_lml_file(struct presto_file_set *fset) +{ + int error = 0; + struct file *f; + + ENTRY; + if (fset->fset_lml.fd_file) { + CDEBUG(D_INODE, "fset already has lml open\n"); + EXIT; + return 0; + } + + fset->fset_lml.fd_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&fset->fset_lml.fd_reservations); + f = presto_log_open(fset, "lml", O_RDWR | O_CREAT); + if ( IS_ERR(f) ) { + error = PTR_ERR(f); + return error; + } + + fset->fset_lml.fd_file = f; + fset->fset_lml.fd_offset = + fset->fset_lml.fd_file->f_dentry->d_inode->i_size; + + EXIT; + return error; +} + +/* Write the last_rcvd values to the last)_rcvd file */ +int presto_write_last_rcvd(struct rec_info *recinfo, + struct presto_file_set *fset, + struct lento_vfs_context *info) +{ + int ret; + loff_t off = info->slot_offset; + struct { + __u32 remote_recno; + __u64 remote_offset; + __u32 local_recno; + __u64 local_offset; + } rcvd_rec; + + rcvd_rec.remote_recno = cpu_to_le32(info->recno); + rcvd_rec.remote_offset = cpu_to_le64(info->kml_offset); + rcvd_rec.local_recno = cpu_to_le32(recinfo->recno); + rcvd_rec.local_offset = cpu_to_le64(recinfo->offset + recinfo->size); + + ret = presto_fwrite(fset->fset_last_rcvd, (char *)(&rcvd_rec), + sizeof(rcvd_rec), &off); + + if (ret == sizeof(rcvd_rec)) + ret = 0; + + return ret; +} + +/* LML records here */ +/* this writes the LML records for close, in conjunction with the KML */ +int presto_write_lml_close(struct rec_info *rec, + struct presto_file_set *fset, + struct file *file, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct presto_version *new_file_ver) +{ + int opcode = PRESTO_OP_CLOSE; + char *buffer; + struct dentry *dentry = file->f_dentry; + __u64 ino; + __u32 pathlen; + char *path; + __u32 generation; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + root = fset->fset_mtpt; + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + CDEBUG(D_INODE, "Path: %s\n", path); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + ino = cpu_to_le64(dentry->d_inode->i_ino); + generation = cpu_to_le32(dentry->d_inode->i_generation); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + sizeof(*new_file_ver) + + sizeof(ino) + sizeof(generation) + sizeof(pathlen) + + sizeof(remote_ino) + sizeof(remote_generation) + + sizeof(remote_version) + sizeof(rec->offset) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 0; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, new_file_ver, sizeof(*new_file_ver)); + logrecord = logit(logrecord, &ino, sizeof(ino)); + logrecord = logit(logrecord, &generation, sizeof(generation)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino)); + logrecord = logit(logrecord, &remote_generation, + sizeof(remote_generation)); + logrecord = logit(logrecord, &remote_version, sizeof(remote_version)); + logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + + EXIT; + return error; +} + +int presto_journal_write(struct rec_info *rec, + struct presto_file_set *fset, + struct file *file) +{ + struct presto_version file_version; + int rc; + ENTRY; + + presto_getversion(&file_version, file->f_dentry->d_inode); + /* append this record */ + rc = presto_write_lml_close + (rec, + fset, + file, + 0, /* remote_ino */ + 0, /* remote_generation */ + 0, /* remote_version */ + &file_version); + EXIT; + return rc; +} + +/* + * Check if the given record is at the end of the file. If it is, truncate + * the lml to the record's offset, removing it. Repeat on prior record, + * until we reach an active record or a reserved record (as defined by the + * reservations list). + */ +static int presto_truncate_lml_tail(struct presto_file_set *fset) +{ + loff_t lml_tail; + loff_t lml_last_rec; + loff_t lml_last_recsize; + loff_t local_offset; + int recno; + struct journal_prefix prefix; + struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode; + void *handle; + int rc; + + ENTRY; + /* If someone else is already truncating the LML, return. */ + write_lock(&fset->fset_lml.fd_lock); + if (fset->fset_lml.fd_truncating == 1 ) { + write_unlock(&fset->fset_lml.fd_lock); + EXIT; + return 0; + } + /* someone is about to write to the end of the LML */ + if ( !list_empty(&fset->fset_lml.fd_reservations) ) { + write_unlock(&fset->fset_lml.fd_lock); + EXIT; + return 0; + } + lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size; + /* Nothing to truncate?*/ + if (lml_tail == 0) { + write_unlock(&fset->fset_lml.fd_lock); + EXIT; + return 0; + } + fset->fset_lml.fd_truncating = 1; + write_unlock(&fset->fset_lml.fd_lock); + + presto_last_record(&fset->fset_lml, &lml_last_recsize, + &lml_last_rec, &recno, lml_tail); + /* Do we have a record to check? If not we have zeroes at the + beginning of the file. -SHP + */ + if (lml_last_recsize != 0) { + local_offset = lml_last_rec - lml_last_recsize; + rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix, + sizeof(prefix), &local_offset); + if (rc != sizeof(prefix)) { + EXIT; + goto tr_out; + } + + if ( prefix.opcode != PRESTO_OP_NOOP ) { + EXIT; + rc = 0; + /* We may have zeroes at the end of the file, should + we clear them out? -SHP + */ + goto tr_out; + } + } else + lml_last_rec=0; + + handle = presto_trans_start(fset, inode, PRESTO_OP_TRUNC); + if ( !handle ) { + EXIT; + rc = -ENOMEM; + goto tr_out; + } + + rc = presto_do_truncate(fset, fset->fset_lml.fd_file->f_dentry, + lml_last_rec - lml_last_recsize, lml_tail); + presto_trans_commit(fset, handle); + if ( rc == 0 ) { + rc = 1; + } + EXIT; + + tr_out: + CDEBUG(D_JOURNAL, "rc = %d\n", rc); + write_lock(&fset->fset_lml.fd_lock); + fset->fset_lml.fd_truncating = 0; + write_unlock(&fset->fset_lml.fd_lock); + return rc; +} + +int presto_truncate_lml(struct presto_file_set *fset) +{ + + int rc; + ENTRY; + + while ( (rc = presto_truncate_lml_tail(fset)) > 0); + if ( rc < 0 && rc != -EALREADY) { + printk("truncate_lml error %d\n", rc); + } + EXIT; + return rc; +} + + + +int presto_clear_lml_close(struct presto_file_set *fset, + loff_t lml_offset) +{ + int rc; + struct journal_prefix record; + loff_t offset = lml_offset; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %d\n", + (long)lml_offset, sizeof(record)); + rc = presto_fread(fset->fset_lml.fd_file, (char *)&record, + sizeof(record), &offset); + + if ( rc != sizeof(record) ) { + printk("presto: clear_lml io error %d\n", rc); + EXIT; + return -EIO; + } + + /* overwrite the prefix */ + CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset); + record.opcode = PRESTO_OP_NOOP; + offset = lml_offset; + /* note: this does just a single transaction in the cache */ + rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record), + sizeof(record), &offset); + if ( rc != sizeof(record) ) { + EXIT; + return -EIO; + } + + EXIT; + return 0; +} + + + +/* now a journal function for every operation */ + +int presto_journal_setattr(struct rec_info *rec, + struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *old_ver, struct iattr *iattr) +{ + int opcode = PRESTO_OP_SETATTR; + char *buffer; + char *path; + __u32 pathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + __u32 uid, gid, mode, valid, flags; + __u64 fsize, mtime, ctime; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + sizeof(*old_ver) + + sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) + + sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) + + sizeof(pathlen) + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + /* Only journal one kind of mtime, and not atime at all. Also don't + * journal bogus data in iattr, to make the journal more compressible. + */ + if (iattr->ia_valid & ATTR_MTIME_SET) + iattr->ia_valid = iattr->ia_valid | ATTR_MTIME; + valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET | + ATTR_ATIME_SET)); + mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0; + uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0; + gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0; + fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0; + mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime): 0; + ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime): 0; + flags = iattr->ia_valid & ATTR_ATTR_FLAG ? + cpu_to_le32(iattr->ia_attr_flags): 0; + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, old_ver, sizeof(*old_ver)); + logrecord = logit(logrecord, &valid, sizeof(valid)); + logrecord = logit(logrecord, &mode, sizeof(mode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &fsize, sizeof(fsize)); + logrecord = logit(logrecord, &mtime, sizeof(mtime)); + logrecord = logit(logrecord, &ctime, sizeof(ctime)); + logrecord = logit(logrecord, &flags, sizeof(flags)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_file_ver, int mode) +{ + int opcode = PRESTO_OP_CREATE; + char *buffer; + char *path; + __u32 pathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + __u32 uid, gid, lmode; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + lmode = cpu_to_le32(mode); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, dentry->d_parent); + logrecord = logit(logrecord, new_file_ver, sizeof(*new_file_ver)); + logrecord = logit(logrecord, &lmode, sizeof(lmode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset, struct dentry *dentry, + const char *target, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver) +{ + int opcode = PRESTO_OP_SYMLINK; + char *buffer; + char *path; + __u32 pathlen; + int size; + char *logrecord; + char record[292]; + __u32 targetlen = cpu_to_le32(strlen(target)); + struct dentry *root; + __u32 uid, gid; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + + sizeof(targetlen) + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(targetlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, dentry->d_parent); + logrecord = logit(logrecord, new_link_ver, sizeof(*new_link_ver)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &targetlen, sizeof(targetlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + target, size_round(le32_to_cpu(targetlen)), + NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset, struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_dir_ver, int mode) +{ + int opcode = PRESTO_OP_MKDIR; + char *buffer; + char *path; + __u32 pathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + __u32 uid, gid, lmode; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + lmode = cpu_to_le32(mode); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + logrecord = journal_log_prefix(record, opcode, rec); + + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, dentry->d_parent); + logrecord = logit(logrecord, new_dir_ver, sizeof(*new_dir_ver)); + logrecord = logit(logrecord, &lmode, sizeof(lmode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + + +int +presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dir, struct presto_version *tgt_dir_ver, + struct presto_version *old_dir_ver, int len, + const char *name) +{ + int opcode = PRESTO_OP_RMDIR; + char *buffer; + char *path; + __u32 pathlen, llen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + llen = cpu_to_le32(len); + BUFF_ALLOC(buffer, NULL); + path = presto_path(dir, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(pathlen) + sizeof(llen) + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n", + path, pathlen, name, len, size); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(len); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, dir); + logrecord = logit(logrecord, old_dir_ver, sizeof(*old_dir_ver)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &llen, sizeof(llen)); + logrecord = journal_log_suffix(logrecord, record, fset, dir, rec); + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + name, size_round(len), + NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + + +int +presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, struct presto_version *tgt_dir_ver, + struct presto_version *new_node_ver, int mode, + int dmajor, int dminor ) +{ + int opcode = PRESTO_OP_MKNOD; + char *buffer; + char *path; + __u32 pathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + __u32 uid, gid, lmode, lmajor, lminor; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + lmode = cpu_to_le32(mode); + lmajor = cpu_to_le32(dmajor); + lminor = cpu_to_le32(dminor); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) + + sizeof(lminor) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, dentry->d_parent); + logrecord = logit(logrecord, new_node_ver, sizeof(*new_node_ver)); + logrecord = logit(logrecord, &lmode, sizeof(lmode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &lmajor, sizeof(lmajor)); + logrecord = logit(logrecord, &lminor, sizeof(lminor)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int +presto_journal_link(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver) +{ + int opcode = PRESTO_OP_LINK; + char *buffer, *srcbuffer; + char *path, *srcpath; + __u32 pathlen, srcpathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + BUFF_ALLOC(srcbuffer, NULL); + srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE); + srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath)); + + BUFF_ALLOC(buffer, srcbuffer); + path = presto_path(tgt, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(srcpathlen) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(srcpathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, tgt->d_parent); + logrecord = logit(logrecord, new_link_ver, sizeof(*new_link_ver)); + logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec); + + error = presto_log(fset, rec, record, size, + srcpath, size_round(le32_to_cpu(srcpathlen)), + path, size_round(le32_to_cpu(pathlen)), + NULL, 0); + + BUFF_FREE(srcbuffer); + BUFF_FREE(buffer); + EXIT; + return error; +} + + +int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset, struct dentry *src, + struct dentry *tgt, + struct presto_version *src_dir_ver, + struct presto_version *tgt_dir_ver) +{ + int opcode = PRESTO_OP_RENAME; + char *buffer, *srcbuffer; + char *path, *srcpath; + __u32 pathlen, srcpathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + BUFF_ALLOC(srcbuffer, NULL); + srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE); + srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath)); + + BUFF_ALLOC(buffer, srcbuffer); + path = presto_path(tgt, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 4 * sizeof(*src_dir_ver) + + sizeof(srcpathlen) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(srcpathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, src_dir_ver, sizeof(*src_dir_ver)); + logrecord = log_version(logrecord, src->d_parent); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, tgt->d_parent); + logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec); + + error = presto_log(fset, rec, record, size, + srcpath, size_round(le32_to_cpu(srcpathlen)), + path, size_round(le32_to_cpu(pathlen)), + NULL, 0); + + BUFF_FREE(buffer); + BUFF_FREE(srcbuffer); + EXIT; + return error; +} + + +int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset, struct dentry *dir, + struct presto_version *tgt_dir_ver, + struct presto_version *old_file_ver, int len, + const char *name) +{ + int opcode = PRESTO_OP_UNLINK; + char *buffer; + char *path; + __u32 pathlen, llen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + llen = cpu_to_le32(len); + BUFF_ALLOC(buffer, NULL); + path = presto_path(dir, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + 3 * sizeof(*tgt_dir_ver) + + sizeof(pathlen) + sizeof(llen) + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, tgt_dir_ver, sizeof(*tgt_dir_ver)); + logrecord = log_version(logrecord, dir); + logrecord = logit(logrecord, old_file_ver, sizeof(*old_file_ver)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &llen, sizeof(llen)); + logrecord = journal_log_suffix(logrecord, record, fset, dir, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + name, size_round(len), + NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int +presto_journal_close(struct rec_info *rec, struct presto_file_set *fset, + struct file *file, struct dentry *dentry, + struct presto_version *new_file_ver) +{ + int opcode = PRESTO_OP_CLOSE; + struct presto_file_data *fd; + char *buffer; + char *path; + __u64 ino; + __u32 pathlen; + __u32 generation; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + __u32 open_fsuid; + __u32 open_fsgid; + __u32 open_ngroups; + __u32 open_groups[NGROUPS_MAX]; + __u32 open_mode; + __u32 open_uid; + __u32 open_gid; + int i; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + fd = (struct presto_file_data *)file->private_data; + if (fd) { + open_ngroups = fd->fd_ngroups; + for (i = 0; i < fd->fd_ngroups; i++) + open_groups[i] = (__u32) fd->fd_groups[i]; + open_mode = fd->fd_mode; + open_uid = fd->fd_uid; + open_gid = fd->fd_gid; + open_fsuid = fd->fd_fsuid; + open_fsgid = fd->fd_fsgid; + } else { + open_ngroups = current->ngroups; + for (i=0; i<current->ngroups; i++) + open_groups[i] = (__u32) current->groups[i]; + open_mode = dentry->d_inode->i_mode; + open_uid = dentry->d_inode->i_uid; + open_gid = dentry->d_inode->i_gid; + open_fsuid = current->fsuid; + open_fsgid = current->fsgid; + } + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + ino = cpu_to_le64(dentry->d_inode->i_ino); + generation = cpu_to_le32(dentry->d_inode->i_generation); + size = sizeof(__u32) * open_ngroups + + sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) + + sizeof(struct journal_prefix) + sizeof(*new_file_ver) + + sizeof(ino) + sizeof(generation) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix_with_groups_and_ids( + record, opcode, rec, open_ngroups, open_groups, + open_fsuid, open_fsgid); + logrecord = logit(logrecord, &open_mode, sizeof(open_mode)); + logrecord = logit(logrecord, &open_uid, sizeof(open_uid)); + logrecord = logit(logrecord, &open_gid, sizeof(open_gid)); + logrecord = logit(logrecord, new_file_ver, sizeof(*new_file_ver)); + logrecord = logit(logrecord, &ino, sizeof(ino)); + logrecord = logit(logrecord, &generation, sizeof(generation)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + BUFF_FREE(buffer); + + EXIT; + return error; +} + +int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset, + char *path, __u32 pathlen, + int ngroups, __u32 *groups, + __u64 ino, __u32 generation, + struct presto_version *new_file_ver) +{ + int opcode = PRESTO_OP_CLOSE; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + size = sizeof(__u32) * ngroups + + sizeof(struct journal_prefix) + sizeof(*new_file_ver) + + sizeof(ino) + sizeof(generation) + + sizeof(le32_to_cpu(pathlen)) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix_with_groups(record, opcode, rec, + ngroups, groups); + logrecord = logit(logrecord, new_file_ver, sizeof(*new_file_ver)); + logrecord = logit(logrecord, &ino, sizeof(ino)); + logrecord = logit(logrecord, &generation, sizeof(generation)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + EXIT; + return error; +} + + +/* write closes for the local close records in the LML */ +int presto_complete_lml(struct presto_file_set *fset) +{ + __u32 groups[NGROUPS_MAX]; + loff_t lml_offset; + loff_t read_offset; + char *buffer; + void *handle; + struct rec_info rec; + struct close_rec { + struct presto_version new_file_ver; + __u64 ino; + __u32 generation; + __u32 pathlen; + __u64 remote_ino; + __u32 remote_generation; + __u32 remote_version; + __u64 lml_offset; + } close_rec; + struct file *file = fset->fset_lml.fd_file; + struct journal_prefix prefix; + int rc = 0; + ENTRY; + + lml_offset = 0; + again: + if (lml_offset >= file->f_dentry->d_inode->i_size) { + EXIT; + return rc; + } + + read_offset = lml_offset; + rc = presto_fread(file, (char *)&prefix, + sizeof(prefix), &read_offset); + if ( rc != sizeof(prefix) ) { + EXIT; + printk("presto_complete_lml: ioerror - 1, tell Peter\n"); + return -EIO; + } + + if ( prefix.opcode == PRESTO_OP_NOOP ) { + lml_offset += prefix.len; + goto again; + } + + rc = presto_fread(file, (char *)groups, + prefix.ngroups * sizeof(__u32), &read_offset); + if ( rc != prefix.ngroups * sizeof(__u32) ) { + EXIT; + printk("presto_complete_lml: ioerror - 2, tell Peter\n"); + return -EIO; + } + + rc = presto_fread(file, (char *)&close_rec, + sizeof(close_rec), &read_offset); + if ( rc != sizeof(close_rec) ) { + EXIT; + printk("presto_complete_lml: ioerror - 3, tell Peter\n"); + return -EIO; + } + + /* is this a backfetch or a close record? */ + if ( le64_to_cpu(close_rec.remote_ino) != 0 ) { + lml_offset += prefix.len; + goto again; + } + + BUFF_ALLOC(buffer, NULL); + rc = presto_fread(file, (char *)buffer, + le32_to_cpu(close_rec.pathlen), &read_offset); + if ( rc != le32_to_cpu(close_rec.pathlen) ) { + EXIT; + printk("presto_complete_lml: ioerror - 4, tell Peter\n"); + return -EIO; + } + + handle = presto_trans_start(fset, file->f_dentry->d_inode, + PRESTO_OP_RELEASE); + if ( !handle ) { + EXIT; + return -ENOMEM; + } + + rc = presto_clear_lml_close(fset, lml_offset); + if ( rc ) { + printk("error during clearing: %d\n", rc); + presto_trans_commit(fset, handle); + EXIT; + return rc; + } + + rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen, + prefix.ngroups, groups, + close_rec.ino, close_rec.generation, + &close_rec.new_file_ver); + if ( rc ) { + printk("error during rewrite close: %d\n", rc); + presto_trans_commit(fset, handle); + EXIT; + return rc; + } + + presto_trans_commit(fset, handle); + if ( rc ) { + printk("error during truncation: %d\n", rc); + EXIT; + return rc; + } + + lml_offset += prefix.len; + CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset); + goto again; + + EXIT; + return -EINVAL; +} + + +#ifdef CONFIG_FS_EXT_ATTR +/* Journal an ea operation. A NULL buffer implies the attribute is + * getting deleted. In this case we simply change the opcode, but nothing + * else is affected. + */ +int presto_journal_set_ext_attr (struct rec_info *rec, + struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *ver, const char *name, + const char *buffer, int buffer_len, + int flags) +{ + int opcode = (buffer == NULL) ? + PRESTO_OP_DELEXTATTR : + PRESTO_OP_SETEXTATTR ; + char *temp; + char *path; + __u32 pathlen; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + __u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX)); + __u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0); + __u32 mode; + + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) ) { + EXIT; + return 0; + } + + root = fset->fset_mtpt; + + BUFF_ALLOC(temp, NULL); + path = presto_path(dentry, root, temp, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(temp, path)); + + flags=cpu_to_le32(flags); + /* Ugly, but needed. posix ACLs change the mode without using + * setattr, we need to record these changes. The EA code per se + * is not really affected. + */ + mode=cpu_to_le32(dentry->d_inode->i_mode); + + size = sizeof(__u32) * current->ngroups + + sizeof(struct journal_prefix) + + 2 * sizeof(struct presto_version) + + sizeof(flags) + sizeof(mode) + sizeof(namelen) + + sizeof(buflen) + sizeof(pathlen) + + sizeof(struct journal_suffix); + + if ( size > sizeof(record) ) { + printk("PRESTO: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + } + + rec->is_kml = 1; + /* Make space for a path, a attr name and value*/ + /* We use the buflen instead of buffer_len to make sure that we + * journal the right length. This may be a little paranoid, but + * with 64 bits round the corner, I would rather be safe than sorry! + * Also this handles deletes with non-zero buffer_lengths correctly. + * SHP + */ + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(namelen)) + + size_round(le32_to_cpu(buflen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = logit(logrecord, ver, sizeof(*ver)); + logrecord = log_version(logrecord, dentry); + logrecord = logit(logrecord, &flags, sizeof(flags)); + logrecord = logit(logrecord, &mode, sizeof(flags)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &namelen, sizeof(namelen)); + logrecord = logit(logrecord, &buflen, sizeof(buflen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + name, size_round(le32_to_cpu(namelen)), + buffer, size_round(le32_to_cpu(buflen))); + + BUFF_FREE(temp); + EXIT; + return error; +} +#endif + diff --git a/fs/intermezzo/journal_ext2.c b/fs/intermezzo/journal_ext2.c new file mode 100644 index 000000000000..434e8fe236f6 --- /dev/null +++ b/fs/intermezzo/journal_ext2.c @@ -0,0 +1,70 @@ + +/* + * Intermezzo. (C) 1998 Peter J. Braam + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/ext2_fs.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +#if defined(CONFIG_EXT2_FS) + +/* EXT2 has no journalling, so these functions do nothing */ +static loff_t presto_e2_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + unsigned long freebl = le32_to_cpu(sb->u.ext2_sb.s_es->s_free_blocks_count); + unsigned long avail = freebl - le32_to_cpu(sb->u.ext2_sb.s_es->s_r_blocks_count); + return (avail << EXT2_BLOCK_SIZE_BITS(sb)); +} + +/* start the filesystem journal operations */ +static void *presto_e2_trans_start(struct presto_file_set *fset, struct inode *inode, int op) +{ + __u32 avail_kmlblocks; + + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "ext2")) + return NULL; + + avail_kmlblocks = inode->i_sb->u.ext2_sb.s_es->s_free_blocks_count; + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + return (void *) 1; +} + +static void presto_e2_trans_commit(struct presto_file_set *fset, void *handle) +{ + do {} while (0); +} + +struct journal_ops presto_ext2_journal_ops = { + tr_avail: presto_e2_freespace, + tr_start: presto_e2_trans_start, + tr_commit: presto_e2_trans_commit, + tr_journal_data: NULL +}; + +#endif /* CONFIG_EXT2_FS */ diff --git a/fs/intermezzo/journal_ext3.c b/fs/intermezzo/journal_ext3.c new file mode 100644 index 000000000000..1a2fea3e756c --- /dev/null +++ b/fs/intermezzo/journal_ext3.c @@ -0,0 +1,205 @@ + +/* + * Intermezzo. (C) 1998 Peter J. Braam + * Intermezzo. (C) 2000 Red Hat, Inc. + * Intermezzo. (C) 2000 Los Alamos National Laboratory + * Intermezzo. (C) 2000 TurboLinux, Inc. + * Intermezzo. (C) 2001 Mountain View Data, Inc. + */ + +#include <linux/types.h> +#include <linux/param.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) +#include <linux/jbd.h> +#include <linux/ext3_fs.h> +#include <linux/ext3_jbd.h> +#endif + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) + +#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) +#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) + +/* space requirements: + presto_do_truncate: + used to truncate the KML forward to next fset->chunksize boundary + - zero partial block + - update inode + presto_write_record: + write header (< one block) + write one path (< MAX_PATHLEN) + possibly write another path (< MAX_PATHLEN) + write suffix (< one block) + presto_update_last_rcvd + write one block +*/ + +static loff_t presto_e3_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + loff_t freebl = le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); + loff_t avail = freebl - + le32_to_cpu(sb->u.ext3_sb.s_es->s_r_blocks_count); + return (avail << EXT3_BLOCK_SIZE_BITS(sb)); +} + +/* start the filesystem journal operations */ +static void *presto_e3_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + int jblocks; + int trunc_blks, one_path_blks, extra_path_blks, + extra_name_blks, lml_blks; + __u32 avail_kmlblocks; + handle_t *handle; + + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "ext3")) + { + CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", + fset->fset_cache->cache_type); + return NULL; + } + + avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count; + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + + /* Need journal space for: + at least three writes to KML (two one block writes, one a path) + possibly a second name (unlink, rmdir) + possibly a second path (symlink, rename) + a one block write to the last rcvd file + */ + + trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; + one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; + lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; + extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); + extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); + + /* additional blocks appear for "two pathname" operations + and operations involving the LML records + */ + switch (op) { + case PRESTO_OP_TRUNC: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_RELEASE: + /* + jblocks = one_path_blks + lml_blks + 2*trunc_blks; + */ + jblocks = one_path_blks; + break; + case PRESTO_OP_SETATTR: + jblocks = one_path_blks + trunc_blks + 1 ; + break; + case PRESTO_OP_CREATE: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3 + 2; + break; + case PRESTO_OP_LINK: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 2; + break; + case PRESTO_OP_UNLINK: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS + 2; + break; + case PRESTO_OP_SYMLINK: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 5; + break; + case PRESTO_OP_MKDIR: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 4 + 2; + break; + case PRESTO_OP_RMDIR: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS + 1; + break; + case PRESTO_OP_MKNOD: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3 + 2; + break; + case PRESTO_OP_RENAME: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + 2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; + break; + case PRESTO_OP_WRITE: + jblocks = one_path_blks; + /* add this when we can wrap our transaction with + that of ext3_file_write (ordered writes) + + EXT3_DATA_TRANS_BLOCKS; + */ + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); + /* journal_start/stop does not do its own locking while updating + * the handle/transaction information. Hence we create our own + * critical section to protect these calls. -SHP + */ + lock_kernel(); + handle = journal_start(EXT3_JOURNAL(inode), jblocks); + unlock_kernel(); + return handle; +} + +void presto_e3_trans_commit(struct presto_file_set *fset, void *handle) +{ + if ( presto_no_journal(fset) || !handle) + return; + + /* See comments before journal_start above. -SHP */ + lock_kernel(); + journal_stop(handle); + unlock_kernel(); +} + +void presto_e3_journal_file_data(struct inode *inode) +{ +#ifdef EXT3_JOURNAL_DATA_FL + inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; +#else +#warning You must have a facility to enable journaled writes for recovery! +#endif +} + +struct journal_ops presto_ext3_journal_ops = { + tr_avail: presto_e3_freespace, + tr_start: presto_e3_trans_start, + tr_commit: presto_e3_trans_commit, + tr_journal_data: presto_e3_journal_file_data +}; + +#endif /* CONFIG_EXT3_FS */ diff --git a/fs/intermezzo/journal_obdfs.c b/fs/intermezzo/journal_obdfs.c new file mode 100644 index 000000000000..6c0ea9ef741e --- /dev/null +++ b/fs/intermezzo/journal_obdfs.c @@ -0,0 +1,180 @@ + +/* + * Intermezzo. (C) 1998 Peter J. Braam + * Intermezzo. (C) 2000 Red Hat, Inc. + * Intermezzo. (C) 2000 Los Alamos National Laboratory + * Intermezzo. (C) 2000 TurboLinux, Inc. + * Intermezzo. (C) 2001 Mountain View Data, Inc. + */ + +#include <linux/types.h> +#include <linux/param.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#ifdef CONFIG_OBDFS_FS +#include /usr/src/obd/include/linux/obdfs.h +#endif + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +#ifdef CONFIG_OBDFS_FS + + +static unsigned long presto_obdfs_freespace(struct presto_file_set *fset, + struct super_block *sb) +{ + return 0x0fffff; +} + +/* start the filesystem journal operations */ +static void *presto_obdfs_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + + return (void *) 1; +} + +#if 0 + int jblocks; + int trunc_blks, one_path_blks, extra_path_blks, + extra_name_blks, lml_blks; + __u32 avail_kmlblocks; + + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "ext3")) + { + CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", + fset->fset_cache->cache_type); + return NULL; + } + + avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count; + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + + /* Need journal space for: + at least three writes to KML (two one block writes, one a path) + possibly a second name (unlink, rmdir) + possibly a second path (symlink, rename) + a one block write to the last rcvd file + */ + + trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; + one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; + lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; + extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); + extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); + + /* additional blocks appear for "two pathname" operations + and operations involving the LML records + */ + switch (op) { + case PRESTO_OP_TRUNC: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_RELEASE: + /* + jblocks = one_path_blks + lml_blks + 2*trunc_blks; + */ + jblocks = one_path_blks; + break; + case PRESTO_OP_SETATTR: + jblocks = one_path_blks + trunc_blks + 1 ; + break; + case PRESTO_OP_CREATE: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3; + break; + case PRESTO_OP_LINK: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS; + break; + case PRESTO_OP_UNLINK: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_SYMLINK: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 5; + break; + case PRESTO_OP_MKDIR: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 4; + break; + case PRESTO_OP_RMDIR: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_MKNOD: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3; + break; + case PRESTO_OP_RENAME: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + 2 * EXT3_DATA_TRANS_BLOCKS + 2; + break; + case PRESTO_OP_WRITE: + jblocks = one_path_blks; + /* add this when we can wrap our transaction with + that of ext3_file_write (ordered writes) + + EXT3_DATA_TRANS_BLOCKS; + */ + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); + return journal_start(EXT3_JOURNAL(inode), jblocks); +} +#endif + +void presto_obdfs_trans_commit(struct presto_file_set *fset, void *handle) +{ +#if 0 + if ( presto_no_journal(fset) || !handle) + return; + + journal_stop(handle); +#endif +} + +void presto_obdfs_journal_file_data(struct inode *inode) +{ +#ifdef EXT3_JOURNAL_DATA_FL + inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; +#else +#warning You must have a facility to enable journaled writes for recovery! +#endif +} + +struct journal_ops presto_obdfs_journal_ops = { + tr_avail: presto_obdfs_freespace, + tr_start: presto_obdfs_trans_start, + tr_commit: presto_obdfs_trans_commit, + tr_journal_data: presto_obdfs_journal_file_data +}; + +#endif diff --git a/fs/intermezzo/journal_reiserfs.c b/fs/intermezzo/journal_reiserfs.c new file mode 100644 index 000000000000..233376bef9fa --- /dev/null +++ b/fs/intermezzo/journal_reiserfs.c @@ -0,0 +1,119 @@ + +/* + * Intermezzo. (C) 1998 Peter J. Braam + * Intermezzo. (C) 2000 Red Hat, Inc. + * Intermezzo. (C) 2000 Los Alamos National Laboratory + * Intermezzo. (C) 2000 TurboLinux, Inc. + * Intermezzo. (C) 2001 Mountain View Data, Inc. + */ + +#include <linux/types.h> +#include <linux/param.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/smp_lock.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#if 0 +#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) +#include <linux/reiserfs_fs.h> +#include <linux/reiserfs_fs_sb.h> +#include <linux/reiserfs_fs_i.h> +#endif + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> +#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) + + +static loff_t presto_reiserfs_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (sb); + loff_t avail; + + avail = le32_to_cpu(rs->s_free_blocks) * + le16_to_cpu(rs->s_blocksize); + return avail; +} + +/* start the filesystem journal operations */ +static void *presto_reiserfs_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + int jblocks; + __u32 avail_kmlblocks; + struct reiserfs_transaction_handle *th ; + + PRESTO_ALLOC(th, struct reiserfs_transaction_handle *, sizeof(*th)); + if (!th) { + printk("presto: No memory for trans handle\n"); + return NULL; + } + + avail_kmlblocks = presto_reiserfs_freespace(fset->fset_cache, + inode->i_sb); + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "reiserfs")) + { + CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", + fset->fset_cache->cache_type); + return NULL; + } + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + + jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4; + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); + + lock_kernel(); + //journal_begin(th, inode->i_sb, jblocks); + unlock_kernel(); + return th; +} + +void presto_reiserfs_trans_commit(struct presto_file_set *fset, void *handle) +{ + int jblocks; + jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4; + + lock_kernel(); + //journal_end(handle, fset->fset_cache->cache_sb, jblocks); + unlock_kernel(); + PRESTO_FREE(handle, sizeof(struct reiserfs_transaction_handle)); +} + +void presto_reiserfs_journal_file_data(struct inode *inode) +{ +#ifdef EXT3_JOURNAL_DATA_FL + inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; +#else +#warning You must have a facility to enable journaled writes for recovery! +#endif +} + +struct journal_ops presto_reiserfs_journal_ops = { + tr_avail: presto_reiserfs_freespace, + tr_start: presto_reiserfs_trans_start, + tr_commit: presto_reiserfs_trans_commit, + tr_journal_data: presto_reiserfs_journal_file_data +}; + +#endif +#endif diff --git a/fs/intermezzo/journal_xfs.c b/fs/intermezzo/journal_xfs.c new file mode 100644 index 000000000000..822a6de2db67 --- /dev/null +++ b/fs/intermezzo/journal_xfs.c @@ -0,0 +1,137 @@ + +/* + * * Intermezzo. (C) 1998 Peter J. Braam + * */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#ifdef CONFIG_FS_XFS +#include <linux/xfs_fs.h> +#endif +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> +#include <linux/intermezzo_journal.h> + +#if defined(CONFIG_XFS_FS) + +/* XFS has journalling, but these functions do nothing yet... */ + +static unsigned long presto_xfs_freespace(struct presto_file_set *fset, + struct super_block *sb) +{ + +#if 0 + vfs_t *vfsp = LINVFS_GET_VFS(sb); + struct statvfs_t stat; + bhv_desc_t *bdp; + unsigned long avail; + int rc; + + VFS_STATVFS(vfsp, &stat, NULL, rc); + avail = statp.f_bfree; + + return sbp->sb_fdblocks;; +#endif + return 0x0fffffff; +} + + +/* start the filesystem journal operations */ +static void * +presto_xfs_trans_start(struct presto_file_set *fset, + struct inode *inode, int op) +{ + int xfs_op; + /* do a free blocks check as in journal_ext3? does anything protect + * the space in that case or can it disappear out from under us + * anyway? */ + +/* copied from xfs_trans.h, skipping header maze for now */ +#define XFS_TRANS_SETATTR_NOT_SIZE 1 +#define XFS_TRANS_SETATTR_SIZE 2 +#define XFS_TRANS_INACTIVE 3 +#define XFS_TRANS_CREATE 4 +#define XFS_TRANS_CREATE_TRUNC 5 +#define XFS_TRANS_TRUNCATE_FILE 6 +#define XFS_TRANS_REMOVE 7 +#define XFS_TRANS_LINK 8 +#define XFS_TRANS_RENAME 9 +#define XFS_TRANS_MKDIR 10 +#define XFS_TRANS_RMDIR 11 +#define XFS_TRANS_SYMLINK 12 + + /* map the op onto the values for XFS so it can do reservation. if + * we don't have enough info to differentiate between e.g. setattr + * with or without size, what do we do? will it adjust? */ + switch (op) { + case PRESTO_OP_SETATTR: + /* or XFS_TRANS_SETATTR_NOT_SIZE? */ + xfs_op = XFS_TRANS_SETATTR_SIZE; + break; + case PRESTO_OP_CREATE: + /* or CREATE_TRUNC? */ + xfs_op = XFS_TRANS_CREATE; + break; + case PRESTO_OP_LINK: + xfs_op = XFS_TRANS_LINK; + break; + case PRESTO_OP_UNLINK: + xfs_op = XFS_TRANS_REMOVE; + break; + case PRESTO_OP_SYMLINK: + xfs_op = XFS_TRANS_SYMLINK; + break; + case PRESTO_OP_MKDIR: + xfs_op = XFS_TRANS_MKDIR; + break; + case PRESTO_OP_RMDIR: + xfs_op = XFS_TRANS_RMDIR; + break; + case PRESTO_OP_MKNOD: + /* XXX can't find an analog for mknod? */ + xfs_op = XFS_TRANS_CREATE; + break; + case PRESTO_OP_RENAME: + xfs_op = XFS_TRANS_RENAME; + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + return xfs_trans_start(inode, xfs_op); +} + +static void presto_xfs_trans_commit(struct presto_file_set *fset, void *handle) +{ + /* assert (handle == current->j_handle) */ + xfs_trans_stop(handle); +} + +void presto_xfs_journal_file_data(struct inode *inode) +{ + return; +} + +struct journal_ops presto_xfs_journal_ops = { + tr_avail: presto_xfs_freespace, + tr_start: presto_xfs_trans_start, + tr_commit: presto_xfs_trans_commit, + tr_journal_data: presto_xfs_journal_file_data +}; + +#endif /* CONFIG_XFS_FS */ + + diff --git a/fs/intermezzo/kml.c b/fs/intermezzo/kml.c new file mode 100644 index 000000000000..9a362ada7fc7 --- /dev/null +++ b/fs/intermezzo/kml.c @@ -0,0 +1,199 @@ +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +static struct presto_file_set * kml_getfset (char *path) +{ + return presto_path2fileset(path); +} + +/* Send the KML buffer and related volume info into kernel */ +int begin_kml_reint (struct file *file, unsigned long arg) +{ + struct { + char *volname; + int namelen; + char *recbuf; + int reclen; /* int newpos; */ + } input; + struct kml_fsdata *kml_fsdata = NULL; + struct presto_file_set *fset = NULL; + char *path; + int error; + + ENTRY; + /* allocate buffer & copy it to kernel space */ + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + if (input.reclen > kml_fsdata->kml_maxsize) + return -ENOMEM; /* we'll find solution to this in the future */ + + PRESTO_ALLOC(path, char *, input.namelen + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.volname, input.namelen); + if ( error ) { + PRESTO_FREE(path, input.namelen + 1); + EXIT; + return error; + } + path[input.namelen] = '\0'; + fset = kml_getfset (path); + PRESTO_FREE(path, input.namelen + 1); + + kml_fsdata = FSET_GET_KMLDATA(fset); + /* read the buf from user memory here */ + error = copy_from_user(kml_fsdata->kml_buf, input.recbuf, input.reclen); + if ( error ) { + EXIT; + return error; + } + kml_fsdata->kml_len = input.reclen; + + decode_kmlrec (&kml_fsdata->kml_reint_cache, + kml_fsdata->kml_buf, kml_fsdata->kml_len); + + kml_fsdata->kml_reint_current = kml_fsdata->kml_reint_cache.next; + kml_fsdata->kml_reintpos = 0; + kml_fsdata->kml_count = 0; + return 0; +} + +/* DO_KML_REINT */ +int do_kml_reint (struct file *file, unsigned long arg) +{ + struct { + char *volname; + int namelen; + char *path; + int pathlen; + int recno; + int offset; + int len; + int generation; + __u64 ino; + } input; + int error; + char *path; + struct kml_rec *close_rec; + struct kml_fsdata *kml_fsdata; + struct presto_file_set *fset; + + ENTRY; + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + PRESTO_ALLOC(path, char *, input.namelen + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.volname, input.namelen); + if ( error ) { + PRESTO_FREE(path, input.namelen + 1); + EXIT; + return error; + } + path[input.namelen] = '\0'; + fset = kml_getfset (path); + PRESTO_FREE(path, input.namelen + 1); + + kml_fsdata = FSET_GET_KMLDATA(fset); + + error = kml_reintbuf(kml_fsdata, + fset->fset_mtpt->d_name.name, + &close_rec); + + if (error == KML_CLOSE_BACKFETCH && close_rec != NULL) { + struct kml_close *close = &close_rec->rec_kml.close; + input.ino = close->ino; + input.generation = close->generation; + if (strlen (close->path) + 1 < input.pathlen) { + strcpy (input.path, close->path); + input.pathlen = strlen (close->path) + 1; + input.recno = close_rec->rec_tail.recno; + input.offset = close_rec->rec_kml_offset; + input.len = close_rec->rec_size; + input.generation = close->generation; + input.ino = close->ino; + } + else { + CDEBUG(D_KML, "KML_DO_REINT::no space to save:%d < %d", + strlen (close->path) + 1, input.pathlen); + error = -ENOMEM; + } + copy_to_user((char *)arg, &input, sizeof (input)); + } + return error; +} + +/* END_KML_REINT */ +int end_kml_reint (struct file *file, unsigned long arg) +{ + /* Free KML buffer and related volume info */ + struct { + char *volname; + int namelen; +#if 0 + int count; + int newpos; +#endif + } input; + struct presto_file_set *fset = NULL; + struct kml_fsdata *kml_fsdata = NULL; + int error; + char *path; + + ENTRY; + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + PRESTO_ALLOC(path, char *, input.namelen + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.volname, input.namelen); + if ( error ) { + PRESTO_FREE(path, input.namelen + 1); + EXIT; + return error; + } + path[input.namelen] = '\0'; + fset = kml_getfset (path); + PRESTO_FREE(path, input.namelen + 1); + + kml_fsdata = FSET_GET_KMLDATA(fset); + delete_kmlrec (&kml_fsdata->kml_reint_cache); + + /* kml reint support */ + kml_fsdata->kml_reint_current = NULL; + kml_fsdata->kml_len = 0; + kml_fsdata->kml_reintpos = 0; + kml_fsdata->kml_count = 0; +#if 0 + input.newpos = kml_upc->newpos; + input.count = kml_upc->count; + copy_to_user((char *)arg, &input, sizeof (input)); +#endif + return error; +} diff --git a/fs/intermezzo/kml_decode.c b/fs/intermezzo/kml_decode.c new file mode 100644 index 000000000000..c7fc1c3f3e99 --- /dev/null +++ b/fs/intermezzo/kml_decode.c @@ -0,0 +1,1017 @@ +/* + * KML Decoding + * + * Copryright (C) 1996 Arthur Ma <arthur.ma@mountainviewdata.com> + * + * Copyright (C) 2001 Mountainview Data, Inc. + */ +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_kml.h> + +static int size_round (int val); +static int unpack_create (struct kml_create *rec, char *buf, + int pos, int *rec_offs); +static int unpack_open (struct kml_open *rec, char *buf, + int pos, int *rec_offs); +static int unpack_symlink (struct kml_symlink *rec, char *buf, + int pos, int *rec_offs); +static int unpack_mknod (struct kml_mknod *rec, char *buf, + int pos, int *rec_offs); +static int unpack_link (struct kml_link *rec, char *buf, + int pos, int *rec_offs); +static int unpack_rename (struct kml_rename *rec, char *buf, + int pos, int *rec_offs); +static int unpack_unlink (struct kml_unlink *rec, char *buf, + int pos, int *rec_offs); +static int unpack_rmdir (struct kml_rmdir *rec, char *buf, + int pos, int *rec_offs); +static int unpack_setattr (struct kml_setattr *rec, char *buf, + int pos, int *rec_offs); +static int unpack_close (struct kml_close *rec, char *buf, + int pos, int *rec_offs); +static int unpack_mkdir (struct kml_mkdir *rec, char *buf, + int pos, int *rec_offs); + +#if 0 +static int unpack_endmark (struct kml_endmark *rec, char *buf, + int pos, int *rec_offs); +static void print_kml_endmark (struct kml_endmark *rec); +#endif + +static int kml_unpack (char *kml_buf, int rec_size, int kml_offset, + struct kml_rec **newrec); +static char *kml_version (struct presto_version *ver); +static void print_kml_prefix (struct big_journal_prefix *head); +static void print_kml_create (struct kml_create *rec); +static void print_kml_mkdir (struct kml_mkdir *rec); +static void print_kml_unlink (struct kml_unlink *rec); +static void print_kml_rmdir (struct kml_rmdir *rec); +static void print_kml_close (struct kml_close *rec); +static void print_kml_symlink (struct kml_symlink *rec); +static void print_kml_rename (struct kml_rename *rec); +static void print_kml_setattr (struct kml_setattr *rec); +static void print_kml_link (struct kml_link *rec); +static void print_kml_mknod (struct kml_mknod *rec); +static void print_kml_open (struct kml_open *rec); +static void print_kml_suffix (struct journal_suffix *tail); +static char *readrec (char *recbuf, int reclen, int pos, int *size); + +#define KML_PREFIX_WORDS 8 +static int kml_unpack (char *kml_buf, int rec_size, int kml_offset, + struct kml_rec **newrec) +{ + struct kml_rec *rec; + char *p; + int pos, rec_offs; + int error; + + ENTRY; + if (rec_size < sizeof (struct journal_prefix) + + sizeof (struct journal_suffix)) + return -EBADF; + + PRESTO_ALLOC(rec, struct kml_rec *, sizeof (struct kml_rec)); + if (rec == NULL) { + EXIT; + return -ENOMEM; + } + rec->rec_kml_offset = kml_offset; + rec->rec_size = rec_size; + p = kml_buf; + p = dlogit (&rec->rec_head, p, KML_PREFIX_WORDS * sizeof (int)); + p = dlogit (&rec->rec_head.groups, p, + sizeof (int) * rec->rec_head.ngroups); + + pos = sizeof (struct journal_prefix) + + sizeof (int) * rec->rec_head.ngroups; + switch (rec->rec_head.opcode) + { + case KML_CREATE: + error = unpack_create (&rec->rec_kml.create, + kml_buf, pos, &rec_offs); + break; + case KML_MKDIR: + error = unpack_mkdir (&rec->rec_kml.mkdir, + kml_buf, pos, &rec_offs); + break; + case KML_UNLINK: + error = unpack_unlink (&rec->rec_kml.unlink, + kml_buf, pos, &rec_offs); + break; + case KML_RMDIR: + error = unpack_rmdir (&rec->rec_kml.rmdir, + kml_buf, pos, &rec_offs); + break; + case KML_CLOSE: + error = unpack_close (&rec->rec_kml.close, + kml_buf, pos, &rec_offs); + break; + case KML_SYMLINK: + error = unpack_symlink (&rec->rec_kml.symlink, + kml_buf, pos, &rec_offs); + break; + case KML_RENAME: + error = unpack_rename (&rec->rec_kml.rename, + kml_buf, pos, &rec_offs); + break; + case KML_SETATTR: + error = unpack_setattr (&rec->rec_kml.setattr, + kml_buf, pos, &rec_offs); + break; + case KML_LINK: + error = unpack_link (&rec->rec_kml.link, + kml_buf, pos, &rec_offs); + break; + case KML_OPEN: + error = unpack_open (&rec->rec_kml.open, + kml_buf, pos, &rec_offs); + break; + case KML_MKNOD: + error = unpack_mknod (&rec->rec_kml.mknod, + kml_buf, pos, &rec_offs); + break; +#if 0 + case KML_ENDMARK: + error = unpack_endmark (&rec->rec_kml.endmark, + kml_buf, pos, &rec_offs); + break; +#endif + default: + CDEBUG (D_KML, "wrong opcode::%u\n", + rec->rec_head.opcode); + EXIT; + return -EINVAL; + } + if (error) { + PRESTO_FREE (rec, sizeof (struct kml_rec)); + return -EINVAL; + } + p = kml_buf + rec_offs; + p = dlogit (&rec->rec_tail, p, sizeof (struct journal_suffix)); + memset (&rec->kml_optimize, 0, sizeof (struct kml_optimize)); + *newrec = rec; + EXIT; + return 0; +} + +static int size_round (int val) +{ + return (val + 3) & (~0x3); +} + +static int unpack_create (struct kml_create *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 88; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->mode, p, sizeof (int)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_open (struct kml_open *rec, char *buf, + int pos, int *rec_offs) +{ + *rec_offs = pos; + return 0; +} + +static int unpack_symlink (struct kml_symlink *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 88; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->sourcepath = q; + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->sourcepath, pathlen + 1); + EXIT; + return -ENOMEM; + } + + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->targetpath = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_mknod (struct kml_mknod *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 96; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->mode, p, sizeof (int)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&rec->major, p, sizeof (int)); + p = dlogit (&rec->minor, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_link (struct kml_link *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 80; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->sourcepath = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->sourcepath, pathlen + 1); + EXIT; + return -ENOMEM; + } + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->targetpath = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_rename (struct kml_rename *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 104; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->sourcepath = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->sourcepath, pathlen + 1); + EXIT; + return -ENOMEM; + } + + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->targetpath = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_unlink (struct kml_unlink *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 80; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->path, pathlen + 1); + EXIT; + return -ENOMEM; + } + + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->name = q; + + /* fix the presto_journal_unlink problem */ + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_rmdir (struct kml_rmdir *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 80; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->path, pathlen + 1); + EXIT; + return -ENOMEM; + } + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->name = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_setattr (struct kml_setattr *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 72; + struct kml_attr { + __u64 size, mtime, ctime; + } objattr; + int valid, mode, uid, gid, flags; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version)); + p = dlogit (&valid, p, sizeof (int)); + p = dlogit (&mode, p, sizeof (int)); + p = dlogit (&uid, p, sizeof (int)); + p = dlogit (&gid, p, sizeof (int)); + p = dlogit (&objattr, p, sizeof (struct kml_attr)); + p = dlogit (&flags, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + rec->iattr.ia_valid = valid; + rec->iattr.ia_mode = mode; + rec->iattr.ia_uid = uid; + rec->iattr.ia_gid = gid; + rec->iattr.ia_size = objattr.size; + rec->iattr.ia_mtime = objattr.mtime; + rec->iattr.ia_ctime = objattr.ctime; + rec->iattr.ia_atime = 0; + rec->iattr.ia_attr_flags = flags; + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += pathlen; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_close (struct kml_close *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 52; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->open_mode, p, sizeof (int)); + p = dlogit (&rec->open_uid, p, sizeof (int)); + p = dlogit (&rec->open_gid, p, sizeof (int)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->ino, p, sizeof (__u64)); + p = dlogit (&rec->generation, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += pathlen; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_mkdir (struct kml_mkdir *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 88; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->mode, p, sizeof (int)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += pathlen; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +#if 0 +static int unpack_endmark (struct kml_endmark *rec, char *buf, + int pos, int *rec_offs) +{ + char *p; + p = buf + pos; + p = dlogit (&rec->total, p, sizeof (int)); + + PRESTO_ALLOC (rec->kop, struct kml_kop_node *, + sizeof (struct kml_kop_node) * rec->total); + if (rec->kop == NULL) { + EXIT; + return -ENOMEM; + } + + p = dlogit (rec->kop, p, sizeof (struct kml_kop_node) * rec->total); + + *rec_offs = pos + sizeof (int) + sizeof (struct kml_kop_node) * rec->total; + return 0; +} +#endif + +static char *kml_version (struct presto_version *ver) +{ + static char buf[256]; + sprintf (buf, "mt::%lld, ct::%lld, size::%lld", + ver->pv_mtime, ver->pv_ctime, ver->pv_size); + return buf; +} + +static void print_kml_prefix (struct big_journal_prefix *head) +{ + int i; + + CDEBUG (D_KML, " === KML PREFIX\n"); + CDEBUG (D_KML, " len = %u\n", head->len); + CDEBUG (D_KML, " version = %u\n", head->version); + CDEBUG (D_KML, " pid = %u\n", head->pid); + CDEBUG (D_KML, " uid = %u\n", head->uid); + CDEBUG (D_KML, " fsuid = %u\n", head->fsuid); + CDEBUG (D_KML, " fsgid = %u\n", head->fsgid); + CDEBUG (D_KML, " opcode = %u\n", head->opcode); + CDEBUG (D_KML, " ngroup = %u", head->ngroups); + for (i = 0; i < head->ngroups; i++) + CDEBUG (D_KML, "%u ", head->groups[i]); + CDEBUG (D_KML, "\n"); +} + +static void print_kml_create (struct kml_create *rec) +{ + CDEBUG (D_KML, " === CREATE\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " mode::%o\n", rec->mode); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); +} + +static void print_kml_mkdir (struct kml_mkdir *rec) +{ + CDEBUG (D_KML, " === MKDIR\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " mode::%o\n", rec->mode); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); +} + +static void print_kml_unlink (struct kml_unlink *rec) +{ + CDEBUG (D_KML, " === UNLINK\n"); + CDEBUG (D_KML, " path::%s/%s\n", rec->path, rec->name); + CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); +} + +static void print_kml_rmdir (struct kml_rmdir *rec) +{ + CDEBUG (D_KML, " === RMDIR\n"); + CDEBUG (D_KML, " path::%s/%s\n", rec->path, rec->name); + CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); +} + +static void print_kml_close (struct kml_close *rec) +{ + CDEBUG (D_KML, " === CLOSE\n"); + CDEBUG (D_KML, " mode::%o\n", rec->open_mode); + CDEBUG (D_KML, " uid::%d\n", rec->open_uid); + CDEBUG (D_KML, " gid::%d\n", rec->open_gid); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " ino::%lld\n", rec->ino); + CDEBUG (D_KML, " gen::%u\n", rec->generation); +} + +static void print_kml_symlink (struct kml_symlink *rec) +{ + CDEBUG (D_KML, " === SYMLINK\n"); + CDEBUG (D_KML, " s-path::%s\n", rec->sourcepath); + CDEBUG (D_KML, " t-path::%s\n", rec->targetpath); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); +} + +static void print_kml_rename (struct kml_rename *rec) +{ + CDEBUG (D_KML, " === RENAME\n"); + CDEBUG (D_KML, " s-path::%s\n", rec->sourcepath); + CDEBUG (D_KML, " t-path::%s\n", rec->targetpath); + CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv)); + CDEBUG (D_KML, " new_tgtv::%s\n", kml_version (&rec->new_tgtv)); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_objv::%s\n", kml_version (&rec->old_objectv)); +} + +static void print_kml_setattr (struct kml_setattr *rec) +{ + CDEBUG (D_KML, " === SETATTR\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " old_objv::%s\n", kml_version (&rec->old_objectv)); + CDEBUG (D_KML, " valid::0x%x\n", rec->iattr.ia_valid); + CDEBUG (D_KML, " mode::%o\n", rec->iattr.ia_mode); + CDEBUG (D_KML, " uid::%d\n", rec->iattr.ia_uid); + CDEBUG (D_KML, " gid::%d\n", rec->iattr.ia_gid); + CDEBUG (D_KML, " size::%u\n", (u32) rec->iattr.ia_size); + CDEBUG (D_KML, " mtime::%u\n", (u32) rec->iattr.ia_mtime); + CDEBUG (D_KML, " ctime::%u\n", (u32) rec->iattr.ia_ctime); + CDEBUG (D_KML, " flags::%u\n", (u32) rec->iattr.ia_attr_flags); +} + +static void print_kml_link (struct kml_link *rec) +{ + CDEBUG (D_KML, " === LINK\n"); + CDEBUG (D_KML, " path::%s ==> %s\n", rec->sourcepath, rec->targetpath); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_obj::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); +} + +static void print_kml_mknod (struct kml_mknod *rec) +{ + CDEBUG (D_KML, " === MKNOD\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_obj::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " mode::%o\n", rec->mode); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); + CDEBUG (D_KML, " major::%d\n", rec->major); + CDEBUG (D_KML, " minor::%d\n", rec->minor); +} + +static void print_kml_open (struct kml_open *rec) +{ + CDEBUG (D_KML, " === OPEN\n"); +} + +#if 0 +static void print_kml_endmark (struct kml_endmark *rec) +{ + int i; + CDEBUG (D_KML, " === ENDMARK\n"); + CDEBUG (D_KML, " total::%u\n", rec->total); + for (i = 0; i < rec->total; i++) + { + CDEBUG (D_KML, " recno=%ld::flag=%ld,op=%ld, i_ino=%ld, \ + i_nlink=%ld\n", (long) rec->kop[i].kml_recno, + (long) rec->kop[i].kml_flag, (long) rec->kop[i].kml_op, + (long) rec->kop[i].i_ino, (long) rec->kop[i].i_nlink); + } +} +#endif + +static void print_kml_optimize (struct kml_optimize *rec) +{ + CDEBUG (D_KML, " === OPTIMIZE\n"); + if (rec->kml_flag == KML_REC_DELETE) + CDEBUG (D_KML, " kml_flag::deleted\n"); + else + CDEBUG (D_KML, " kml_flag::exist\n"); + CDEBUG (D_KML, " kml_op::%u\n", rec->kml_op); + CDEBUG (D_KML, " i_nlink::%d\n", rec->i_nlink); + CDEBUG (D_KML, " i_ino::%u\n", rec->i_ino); +} + +static void print_kml_suffix (struct journal_suffix *tail) +{ + CDEBUG (D_KML, " === KML SUFFIX\n"); + CDEBUG (D_KML, " prevrec::%ld\n", tail->prevrec); + CDEBUG (D_KML, " recno::%ld\n", (long) tail->recno); + CDEBUG (D_KML, " time::%d\n", tail->time); + CDEBUG (D_KML, " len::%d\n", tail->len); +} + +void kml_printrec (struct kml_rec *rec, int kml_printop) +{ + if (kml_printop & PRINT_KML_PREFIX) + print_kml_prefix (&rec->rec_head); + if (kml_printop & PRINT_KML_REC) + { + switch (rec->rec_head.opcode) + { + case KML_CREATE: + print_kml_create (&rec->rec_kml.create); + break; + case KML_MKDIR: + print_kml_mkdir (&rec->rec_kml.mkdir); + break; + case KML_UNLINK: + print_kml_unlink (&rec->rec_kml.unlink); + break; + case KML_RMDIR: + print_kml_rmdir (&rec->rec_kml.rmdir); + break; + case KML_CLOSE: + print_kml_close (&rec->rec_kml.close); + break; + case KML_SYMLINK: + print_kml_symlink (&rec->rec_kml.symlink); + break; + case KML_RENAME: + print_kml_rename (&rec->rec_kml.rename); + break; + case KML_SETATTR: + print_kml_setattr (&rec->rec_kml.setattr); + break; + case KML_LINK: + print_kml_link (&rec->rec_kml.link); + break; + case KML_OPEN: + print_kml_open (&rec->rec_kml.open); + break; + case KML_MKNOD: + print_kml_mknod (&rec->rec_kml.mknod); + break; +#if 0 + case KML_ENDMARK: + print_kml_endmark (&rec->rec_kml.endmark); +#endif + break; + default: + CDEBUG (D_KML, " === BAD RECORD, opcode=%u\n", + rec->rec_head.opcode); + break; + } + } + if (kml_printop & PRINT_KML_SUFFIX) + print_kml_suffix (&rec->rec_tail); + if (kml_printop & PRINT_KML_OPTIMIZE) + print_kml_optimize (&rec->kml_optimize); +} + +void kml_freerec (struct kml_rec *rec) +{ + char *sourcepath = NULL, + *targetpath = NULL; + switch (rec->rec_head.opcode) + { + case KML_CREATE: + sourcepath = rec->rec_kml.create.path; + break; + case KML_MKDIR: + sourcepath = rec->rec_kml.create.path; + break; + case KML_UNLINK: + sourcepath = rec->rec_kml.unlink.path; + targetpath = rec->rec_kml.unlink.name; + break; + case KML_RMDIR: + sourcepath = rec->rec_kml.rmdir.path; + targetpath = rec->rec_kml.rmdir.name; + break; + case KML_CLOSE: + sourcepath = rec->rec_kml.close.path; + break; + case KML_SYMLINK: + sourcepath = rec->rec_kml.symlink.sourcepath; + targetpath = rec->rec_kml.symlink.targetpath; + break; + case KML_RENAME: + sourcepath = rec->rec_kml.rename.sourcepath; + targetpath = rec->rec_kml.rename.targetpath; + break; + case KML_SETATTR: + sourcepath = rec->rec_kml.setattr.path; + break; + case KML_LINK: + sourcepath = rec->rec_kml.link.sourcepath; + targetpath = rec->rec_kml.link.targetpath; + break; + case KML_OPEN: + break; + case KML_MKNOD: + sourcepath = rec->rec_kml.mknod.path; + break; +#if 0 + case KML_ENDMARK: + PRESTO_FREE (rec->rec_kml.endmark.kop, sizeof (int) + + sizeof (struct kml_kop_node) * + rec->rec_kml.endmark.total); +#endif + break; + default: + break; + } + if (sourcepath != NULL) + PRESTO_FREE (sourcepath, strlen (sourcepath) + 1); + if (targetpath != NULL) + PRESTO_FREE (targetpath, strlen (targetpath) + 1); +} + +char *readrec (char *recbuf, int reclen, int pos, int *size) +{ + char *p = recbuf + pos; + *size = *((int *) p); + if (*size > (reclen - pos)) + return NULL; + return p; +} + +int kml_decoderec (char *buf, int pos, int buflen, int *size, + struct kml_rec **newrec) +{ + char *tmp; + int error; + tmp = readrec (buf, buflen, pos, size); + if (tmp == NULL) + return -EBADF; + error = kml_unpack (tmp, *size, pos, newrec); + return error; +} + +#if 0 +static void fill_kmlrec_optimize (struct list_head *head, + struct kml_rec *optrec) +{ + struct kml_rec *kmlrec; + struct list_head *tmp; + struct kml_endmark *km; + struct kml_optimize *ko; + int n; + + if (optrec->rec_kml.endmark.total == 0) + return; + n = optrec->rec_kml.endmark.total - 1; + tmp = head->prev; + km = &optrec->rec_kml.endmark; + while ( n >= 0 && tmp != head ) + { + kmlrec = list_entry(tmp, struct kml_rec, + kml_optimize.kml_chains); + tmp = tmp->prev; + if (kmlrec->rec_tail.recno == km->kop[n].kml_recno) + { + ko = &kmlrec->kml_optimize; + ko->kml_flag = km->kop[n].kml_flag; + ko->kml_op = km->kop[n].kml_op; + ko->i_nlink = km->kop[n].i_nlink; + ko->i_ino = km->kop[n].i_ino; + n --; + } + } + if (n != -1) + CDEBUG (D_KML, "Yeah!!!, KML optimize error, recno=%d, n=%d\n", + optrec->rec_tail.recno, n); +} +#endif + +int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen) +{ + struct kml_rec *rec; + int pos = 0, size; + int err; + while (pos < buflen) { + err = kml_decoderec (kml_buf, pos, buflen, &size, &rec); + if (err != 0) + break; +#if 0 + if (rec->rec_head.opcode == KML_ENDMARK) { + fill_kmlrec_optimize (head, rec); + mark_rec_deleted (rec); + } +#endif + list_add_tail (&rec->kml_optimize.kml_chains, head); + pos += size; + } + return err; +} + +int delete_kmlrec (struct list_head *head) +{ + struct kml_rec *rec; + struct list_head *tmp; + + if (list_empty(head)) + return 0; + tmp = head->next; + while ( tmp != head ) { + rec = list_entry(tmp, struct kml_rec, + kml_optimize.kml_chains); + tmp = tmp->next; + kml_freerec (rec); + } + INIT_LIST_HEAD(head); + return 0; +} + +int print_allkmlrec (struct list_head *head, int printop) +{ + struct kml_rec *rec; + struct list_head *tmp; + + if (list_empty(head)) + return 0; + tmp = head->next; + while ( tmp != head ) { + rec = list_entry(tmp, struct kml_rec, + kml_optimize.kml_chains); + tmp = tmp->next; +#if 0 + if (printop & PRINT_KML_EXIST) { + if (is_deleted_node (rec)) + continue; + } + else if (printop & PRINT_KML_DELETE) { + if (! is_deleted_node (rec)) + continue; + } +#endif + kml_printrec (rec, printop); + } + INIT_LIST_HEAD(head); + return 0; +} + diff --git a/fs/intermezzo/kml_reint.c b/fs/intermezzo/kml_reint.c new file mode 100644 index 000000000000..bd2c058cf803 --- /dev/null +++ b/fs/intermezzo/kml_reint.c @@ -0,0 +1,411 @@ +/* + * KML REINT + * + * Copryright (C) 1996 Arthur Ma <arthur.ma@mountainviewdata.com> + * + * Copyright (C) 2000 Mountainview Data, Inc. + */ + +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_kml.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_upcall.h> + +static void kmlreint_pre_secure (struct kml_rec *rec); +static void kmlreint_post_secure (struct kml_rec *rec); + +static void kmlreint_pre_secure (struct kml_rec *rec) +{ + if (current->fsuid != current->uid) + CDEBUG (D_KML, "reint_kmlreint_pre_secure: cannot setfsuid\n"); + if (current->fsgid != current->gid) + CDEBUG (D_KML, "reint_kmlreint_pre_secure: cannot setfsgid\n"); + current->fsuid = rec->rec_head.uid; + current->fsgid = rec->rec_head.fsgid; +} + +static void kmlreint_post_secure (struct kml_rec *rec) +{ + current->fsuid = current->uid; + current->fsgid = current->gid; + /* current->egid = current->gid; */ + /* ????????????? */ +} + +static int reint_create (int slot_offset, struct kml_rec *rec) +{ + struct lento_vfs_context info; + struct kml_create *create = &rec->rec_kml.create; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + CDEBUG (D_KML, "=====REINT_CREATE::%s\n", create->path); + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_create(create->path, create->mode, &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + + EXIT; + return error; +} + +static int reint_open (int slot_offset, struct kml_rec *rec) +{ + return 0; +} + +static int reint_mkdir (int slot_offset, struct kml_rec *rec) +{ + struct lento_vfs_context info; + struct kml_mkdir *mkdir = &rec->rec_kml.mkdir; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_mkdir (mkdir->path, mkdir->mode, &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + + EXIT; + return error; +} + +static int reint_rmdir (int slot_offset, struct kml_rec *rec) +{ + struct kml_rmdir *rmdir = &rec->rec_kml.rmdir; + struct lento_vfs_context info; + mm_segment_t old_fs; + char *name; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + name = bdup_printf ("%s/%s", rmdir->path, rmdir->name); + if (name == NULL) + { + kmlreint_post_secure (rec); + EXIT; + return -ENOMEM; + } + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_rmdir (name, &info); + set_fs (old_fs); + + PRESTO_FREE (name, strlen (name) + 1); + kmlreint_post_secure (rec); + EXIT; + return error; +} + +static int reint_link (int slot_offset, struct kml_rec *rec) +{ + struct kml_link *link = &rec->rec_kml.link; + struct lento_vfs_context info; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_link (link->sourcepath, link->targetpath, &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + EXIT; + return error; +} + +static int reint_unlink (int slot_offset, struct kml_rec *rec) +{ + struct kml_unlink *unlink = &rec->rec_kml.unlink; + struct lento_vfs_context info; + mm_segment_t old_fs; + int error; + char *name; + + ENTRY; + kmlreint_pre_secure (rec); + name = bdup_printf ("%s/%s", unlink->path, unlink->name); + if (name == NULL) + { + kmlreint_post_secure (rec); + EXIT; + return -ENOMEM; + } + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_unlink (name, &info); + set_fs (old_fs); + PRESTO_FREE (name, strlen (name)); + kmlreint_post_secure (rec); + + EXIT; + return error; +} + +static int reint_symlink (int slot_offset, struct kml_rec *rec) +{ + struct kml_symlink *symlink = &rec->rec_kml.symlink; + struct lento_vfs_context info; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_symlink (symlink->targetpath, + symlink->sourcepath, &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + EXIT; + return error; +} + +static int reint_rename (int slot_offset, struct kml_rec *rec) +{ + struct kml_rename *rename = &rec->rec_kml.rename; + struct lento_vfs_context info; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_rename (rename->sourcepath, rename->targetpath, &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + + EXIT; + return error; +} + +static int reint_setattr (int slot_offset, struct kml_rec *rec) +{ + struct kml_setattr *setattr = &rec->rec_kml.setattr; + struct lento_vfs_context info; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = setattr->iattr.ia_attr_flags; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_setattr (setattr->path, &setattr->iattr, &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + EXIT; + return error; +} + +static int reint_mknod (int slot_offset, struct kml_rec *rec) +{ + struct kml_mknod *mknod = &rec->rec_kml.mknod; + struct lento_vfs_context info; + mm_segment_t old_fs; + int error; + + ENTRY; + kmlreint_pre_secure (rec); + + info.slot_offset = slot_offset; + info.recno = rec->rec_tail.recno; + info.kml_offset = rec->rec_kml_offset; + info.flags = 0; + + old_fs = get_fs(); + set_fs (get_ds()); + error = lento_mknod (mknod->path, mknod->mode, + MKDEV(mknod->major, mknod->minor), &info); + set_fs (old_fs); + kmlreint_post_secure (rec); + EXIT; + return error; +} + +int kml_reint (char *mtpt, int slot_offset, struct kml_rec *rec) +{ + int error = 0; + switch (rec->rec_head.opcode) + { + case KML_CREATE: + error = reint_create (slot_offset, rec); + break; + case KML_OPEN: + error = reint_open (slot_offset, rec); + break; + case KML_CLOSE: + /* error = reint_close (slot_offset, rec); + force the system to return to lento */ + error = KML_CLOSE_BACKFETCH; + break; + case KML_MKDIR: + error = reint_mkdir (slot_offset, rec); + break; + case KML_RMDIR: + error = reint_rmdir (slot_offset, rec); + break; + case KML_UNLINK: + error = reint_unlink (slot_offset, rec); + break; + case KML_LINK: + error = reint_link (slot_offset, rec); + break; + case KML_SYMLINK: + error = reint_symlink (slot_offset, rec); + break; + case KML_RENAME: + error = reint_rename (slot_offset, rec); + break; + case KML_SETATTR: + error = reint_setattr (slot_offset, rec); + break; + case KML_MKNOD: + error = reint_mknod (slot_offset, rec); + break; + default: + CDEBUG (D_KML, "wrong opcode::%d\n", rec->rec_head.opcode); + return -EBADF; + } + if (error != 0 && error != KML_CLOSE_BACKFETCH) + CDEBUG (D_KML, "KML_ERROR::error = %d\n", error); + return error; +} + +/* return the old mtpt */ +/* +struct fs_struct { + atomic_t count; + int umask; + struct dentry * root, * pwd; +}; +*/ +static int do_set_fs_root (struct dentry *newroot, + struct dentry **old_root) +{ + struct dentry *de = current->fs->root; + current->fs->root = newroot; + if (old_root != (struct dentry **) NULL) + *old_root = de; + return 0; +} + +static int set_system_mtpt (char *mtpt, struct dentry **old_root) +{ + struct nameidata nd; + struct dentry *dentry; + int error; + + if (path_init(pathname, LOOKUP_PARENT, &nd)) + error = path_walk(mtpt, &nd); + if (error) { + CDEBUG (D_KML, "Yean!!!!::Can't find mtpt::%s\n", mtpt); + return error; + } + + dentry = nd.dentry; + error = do_set_fs_root (dentry, old_root); + path_release (&nd); + return error; +} + +int kml_reintbuf (struct kml_fsdata *kml_fsdata, + char *mtpt, struct kml_rec **close_rec) +{ + struct kml_rec *rec = NULL; + struct list_head *head, *tmp; + struct dentry *old_root; + int error = 0; + + head = &kml_fsdata->kml_reint_cache; + if (list_empty(head)) + return 0; + + if (kml_fsdata->kml_reint_current == NULL || + kml_fsdata->kml_reint_current == head->next) + return 0; + + error = set_system_mtpt (mtpt, &old_root); + if (error) + return error; + + tmp = head->next; + while (error == 0 && tmp != head ) { + rec = list_entry(tmp, struct kml_rec, kml_optimize.kml_chains); + error = kml_reint (mtpt, rec->rec_kml_offset, rec); + tmp = tmp->next; + } + + do_set_fs_root (old_root, NULL); + + if (error == KML_CLOSE_BACKFETCH) + *close_rec = rec; + kml_fsdata->kml_reint_current = tmp; + return error; +} + diff --git a/fs/intermezzo/kml_setup.c b/fs/intermezzo/kml_setup.c new file mode 100644 index 000000000000..c8ab345bfd63 --- /dev/null +++ b/fs/intermezzo/kml_setup.c @@ -0,0 +1,59 @@ +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +int kml_init (struct presto_file_set *fset) +{ + struct kml_fsdata *data; + + ENTRY; + PRESTO_ALLOC (data, struct kml_fsdata *, sizeof (struct kml_fsdata)); + if (data == NULL) { + EXIT; + return -ENOMEM; + } + INIT_LIST_HEAD (&data->kml_reint_cache); + INIT_LIST_HEAD (&data->kml_kop_cache); + + PRESTO_ALLOC (data->kml_buf, char *, KML_REINT_MAXBUF); + if (data->kml_buf == NULL) { + PRESTO_FREE (data, sizeof (struct kml_fsdata)); + EXIT; + return -ENOMEM; + } + + data->kml_maxsize = KML_REINT_MAXBUF; + data->kml_len = 0; + data->kml_reintpos = 0; + data->kml_count = 0; + fset->fset_kmldata = data; + EXIT; + return 0; +} + +int kml_cleanup (struct presto_file_set *fset) +{ + struct kml_fsdata *data = fset->fset_kmldata; + + if (data == NULL) + return 0; + + fset->fset_kmldata = NULL; +#if 0 + kml_sop_cleanup (&data->kml_reint_cache); + kml_kop_cleanup (&data->kml_kop_cache); +#endif + PRESTO_FREE (data->kml_buf, KML_REINT_MAXBUF); + PRESTO_FREE (data, sizeof (struct kml_fsdata)); + return 0; +} + + diff --git a/fs/intermezzo/kml_utils.c b/fs/intermezzo/kml_utils.c new file mode 100644 index 000000000000..b1c60f90deab --- /dev/null +++ b/fs/intermezzo/kml_utils.c @@ -0,0 +1,44 @@ +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_kml.h> + + +// dlogit -- oppsite to logit () +// return the sbuf + size; +char *dlogit (void *tbuf, const void *sbuf, int size) +{ + char *ptr = (char *)sbuf; + memcpy(tbuf, ptr, size); + ptr += size; + return ptr; +} + +static spinlock_t kml_lock = SPIN_LOCK_UNLOCKED; +static char buf[1024]; +char * bdup_printf (char *format, ...) +{ + va_list args; + int i; + char *path; + long flags; + + spin_lock_irqsave(&kml_lock, flags); + va_start(args, format); + i = vsprintf(buf, format, args); /* hopefully i < sizeof(buf) */ + va_end(args); + + PRESTO_ALLOC (path, char *, i + 1); + if (path == NULL) + return NULL; + strcpy (path, buf); + + spin_unlock_irqrestore(&kml_lock, flags); + return path; +} + + diff --git a/fs/intermezzo/methods.c b/fs/intermezzo/methods.c new file mode 100644 index 000000000000..2620f8a09e4a --- /dev/null +++ b/fs/intermezzo/methods.c @@ -0,0 +1,460 @@ +/* + * + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Mountain View Data, Inc. + * + * Extended Attribute Support + * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. + */ + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/fsfilter.h> +#include <linux/intermezzo_fs.h> + + +int filter_print_entry = 0; +int filter_debug = 0xfffffff; +/* + * The function in this file are responsible for setting up the + * correct methods layered file systems like InterMezzo and snapfs + */ + + +static struct filter_fs filter_oppar[FILTER_FS_TYPES]; + +/* get to the upper methods (intermezzo, snapfs) */ +inline struct super_operations *filter_c2usops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sops; +} + +inline struct inode_operations *filter_c2udiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dir_iops; +} + + +inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_file_iops; +} + +inline struct inode_operations *filter_c2usiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sym_iops; +} + + +inline struct file_operations *filter_c2udfops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dir_fops; +} + +inline struct file_operations *filter_c2uffops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_file_fops; +} + +inline struct file_operations *filter_c2usfops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sym_fops; +} + +inline struct dentry_operations *filter_c2udops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dentry_ops; +} + +/* get to the cache (lower) methods */ +inline struct super_operations *filter_c2csops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sops; +} + +inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dir_iops; +} + +inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_file_iops; +} + +inline struct inode_operations *filter_c2csiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sym_iops; +} + +inline struct file_operations *filter_c2cdfops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dir_fops; +} + +inline struct file_operations *filter_c2cffops(struct filter_fs *cache) +{ + return cache->o_caops.cache_file_fops; +} + +inline struct file_operations *filter_c2csfops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sym_fops; +} + +inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dentry_ops; +} + + +void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type) +{ + if ( strlen(cache_type) == strlen("ext2") && + memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { +#if CONFIG_EXT2_FS + ops->o_trops = &presto_ext2_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("ext3") && + memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { +#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) + ops->o_trops = &presto_ext3_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("reiserfs") && + memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) { +#if 0 + /* #if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) */ + ops->o_trops = &presto_reiserfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("xfs") && + memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) { +#if defined(CONFIG_XFS_FS) || defined (CONFIG_XFS_FS_MODULE) + ops->o_trops = &presto_xfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("obdfs") && + memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) { +#if defined(CONFIG_OBDFS_FS) || defined (CONFIG_OBDFS_FS_MODULE) + ops->o_trops = presto_obdfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } +} + + +/* find the cache for this FS */ +struct filter_fs *filter_get_filter_fs(const char *cache_type) +{ + struct filter_fs *ops = NULL; + FENTRY; + + if ( strlen(cache_type) == strlen("ext2") && + memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { + ops = &filter_oppar[FILTER_FS_EXT2]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("xfs") && + memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_XFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("ext3") && + memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { + ops = &filter_oppar[FILTER_FS_EXT3]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + if ( strlen(cache_type) == strlen("reiserfs") && + memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_REISERFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + if ( strlen(cache_type) == strlen("obdfs") && + memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_OBDFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if (ops == NULL) { + printk("prepare to die: unrecognized cache type for Filter\n"); + } + return ops; + FEXIT; +} + + +/* + * Frobnicate the InterMezzo operations + * this establishes the link between the InterMezzo file system + * and the underlying file system used for the cache. + */ + +void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops) +{ + /* Get ptr to the shared struct snapfs_ops structure. */ + struct filter_ops *props = &cache->o_fops; + /* Get ptr to the shared struct cache_ops structure. */ + struct cache_ops *caops = &cache->o_caops; + + FENTRY; + + if ( cache->o_flags & FILTER_DID_SUPER_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SUPER_OPS; + + /* Set the cache superblock operations to point to the + superblock operations of the underlying file system. */ + caops->cache_sops = cache_sops; + + /* + * Copy the cache (real fs) superblock ops to the "filter" + * superblock ops as defaults. Some will be changed below + */ + memcpy(&props->filter_sops, cache_sops, sizeof(*cache_sops)); + + /* 'put_super' unconditionally is that of filter */ + if (filter_sops->put_super) { + props->filter_sops.put_super = filter_sops->put_super; + } + + if (cache_sops->read_inode) { + props->filter_sops.read_inode = filter_sops->read_inode; + FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n", + cache, cache, props->filter_sops.read_inode); + } + + if (cache_sops->remount_fs) + props->filter_sops.remount_fs = filter_sops->remount_fs; + FEXIT; +} + + +void filter_setup_dir_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) +{ + struct inode_operations *cache_filter_iops; + struct inode_operations *cache_iops = inode->i_op; + struct file_operations *cache_fops = inode->i_fop; + FENTRY; + + if ( cache->o_flags & FILTER_DID_DIR_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_DIR_OPS; + + /* former ops become cache_ops */ + cache->o_caops.cache_dir_iops = cache_iops; + cache->o_caops.cache_dir_fops = cache_fops; + FDEBUG(D_SUPER, "filter at %p, cache iops %p, iops %p\n", + cache, cache_iops, filter_c2udiops(cache)); + + /* setup our dir iops: copy and modify */ + memcpy(filter_c2udiops(cache), cache_iops, sizeof(*cache_iops)); + + /* abbreviate */ + cache_filter_iops = filter_c2udiops(cache); + + /* methods that filter if cache filesystem has these ops */ + if (cache_iops->lookup && filter_iops->lookup) + cache_filter_iops->lookup = filter_iops->lookup; + if (cache_iops->create && filter_iops->create) + cache_filter_iops->create = filter_iops->create; + if (cache_iops->link && filter_iops->link) + cache_filter_iops->link = filter_iops->link; + if (cache_iops->unlink && filter_iops->unlink) + cache_filter_iops->unlink = filter_iops->unlink; + if (cache_iops->mkdir && filter_iops->mkdir) + cache_filter_iops->mkdir = filter_iops->mkdir; + if (cache_iops->rmdir && filter_iops->rmdir) + cache_filter_iops->rmdir = filter_iops->rmdir; + if (cache_iops->symlink && filter_iops->symlink) + cache_filter_iops->symlink = filter_iops->symlink; + if (cache_iops->rename && filter_iops->rename) + cache_filter_iops->rename = filter_iops->rename; + if (cache_iops->mknod && filter_iops->mknod) + cache_filter_iops->mknod = filter_iops->mknod; + if (cache_iops->permission && filter_iops->permission) + cache_filter_iops->permission = filter_iops->permission; + if (cache_iops->getattr) + cache_filter_iops->getattr = filter_iops->getattr; + /* Some filesystems do not use a setattr method of their own + instead relying on inode_setattr/write_inode. We still need to + journal these so we make setattr an unconditional operation. + XXX: we should probably check for write_inode. SHP + */ + /*if (cache_iops->setattr)*/ + cache_filter_iops->setattr = filter_iops->setattr; +#ifdef CONFIG_FS_EXT_ATTR + /* For now we assume that posix acls are handled through extended + * attributes. If this is not the case, we must explicitly trap + * posix_set_acl. SHP + */ + if (cache_iops->set_ext_attr && filter_iops->set_ext_attr) + cache_filter_iops->set_ext_attr = filter_iops->set_ext_attr; +#endif + + + /* copy dir fops */ + memcpy(filter_c2udfops(cache), cache_fops, sizeof(*cache_fops)); + + /* unconditional filtering operations */ + filter_c2udfops(cache)->open = filter_fops->open; + + FEXIT; +} + + +void filter_setup_file_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) +{ + struct inode_operations *pr_iops; + struct inode_operations *cache_iops = inode->i_op; + struct file_operations *cache_fops = inode->i_fop; + FENTRY; + + if ( cache->o_flags & FILTER_DID_FILE_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_FILE_OPS; + + /* steal the old ops */ + /* former ops become cache_ops */ + cache->o_caops.cache_file_iops = cache_iops; + cache->o_caops.cache_file_fops = cache_fops; + + /* abbreviate */ + pr_iops = filter_c2ufiops(cache); + + /* setup our dir iops: copy and modify */ + memcpy(pr_iops, cache_iops, sizeof(*cache_iops)); + + /* copy dir fops */ + printk("*** cache file ops at %p\n", cache_fops); + memcpy(filter_c2uffops(cache), cache_fops, sizeof(*cache_fops)); + + /* assign */ + /* See comments above in filter_setup_dir_ops. SHP */ + /*if (cache_iops->setattr)*/ + pr_iops->setattr = filter_iops->setattr; + if (cache_iops->getattr) + pr_iops->getattr = filter_iops->getattr; +#ifdef CONFIG_FS_EXT_ATTR + /* For now we assume that posix acls are handled through extended + * attributes. If this is not the case, we must explicitly trap and + * posix_set_acl + */ + if (cache_iops->set_ext_attr && filter_iops->set_ext_attr) + pr_iops->set_ext_attr = filter_iops->set_ext_attr; +#endif + + + /* unconditional filtering operations */ + filter_c2uffops(cache)->open = filter_fops->open; + filter_c2uffops(cache)->release = filter_fops->release; + filter_c2uffops(cache)->write = filter_fops->write; + + FEXIT; +} + +/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */ +void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) +{ + struct inode_operations *pr_iops; + struct inode_operations *cache_iops = inode->i_op; + struct file_operations *cache_fops = inode->i_fop; + FENTRY; + + if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SYMLINK_OPS; + + /* steal the old ops */ + cache->o_caops.cache_sym_iops = cache_iops; + cache->o_caops.cache_sym_fops = cache_fops; + + /* abbreviate */ + pr_iops = filter_c2usiops(cache); + + /* setup our dir iops: copy and modify */ + memcpy(pr_iops, cache_iops, sizeof(*cache_iops)); + + /* See comments above in filter_setup_dir_ops. SHP */ + /* if (cache_iops->setattr) */ + pr_iops->setattr = filter_iops->setattr; + if (cache_iops->getattr) + pr_iops->getattr = filter_iops->getattr; + + /* assign */ + /* copy fops - careful for symlinks they might be NULL */ + if ( cache_fops ) { + memcpy(filter_c2usfops(cache), cache_fops, sizeof(*cache_fops)); + } + + FEXIT; +} + +void filter_setup_dentry_ops(struct filter_fs *cache, + struct dentry_operations *cache_dop, + struct dentry_operations *filter_dop) +{ + if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_DENTRY_OPS; + + cache->o_caops.cache_dentry_ops = cache_dop; + memcpy(&cache->o_fops.filter_dentry_ops, + filter_dop, sizeof(*filter_dop)); + + if (cache_dop && cache_dop != filter_dop && cache_dop->d_revalidate){ + printk("WARNING: filter overriding revalidation!\n"); + } + return; +} diff --git a/fs/intermezzo/presto.c b/fs/intermezzo/presto.c new file mode 100644 index 000000000000..699f07e10f6f --- /dev/null +++ b/fs/intermezzo/presto.c @@ -0,0 +1,1149 @@ +/* + * intermezzo.c + * + * This file implements basic routines supporting the semantics + * + * Author: Peter J. Braam <braam@cs.cmu.edu> + * Copyright (C) 1998 Stelias Computing Inc + * Copyright (C) 1999 Red Hat Inc. + * + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#include <linux/smp_lock.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +extern int presto_init_last_rcvd_file(struct presto_file_set *); +extern int presto_init_lml_file(struct presto_file_set *); +extern int presto_init_kml_file(struct presto_file_set *); + +int presto_walk(const char *name, struct nameidata *nd) +{ + int err; + /* we do not follow symlinks to support symlink operations + correctly. The vfs should always hand us resolved dentries + so we should not be required to use LOOKUP_FOLLOW. At the + reintegrating end, lento again should be working with the + resolved pathname and not the symlink. SHP + XXX: This code implies that direct symlinks do not work. SHP + */ + unsigned int flags = LOOKUP_POSITIVE; + + ENTRY; + err = 0; + if (path_init(name, flags, nd)) + err = path_walk(name, nd); + return err; +} + +inline struct presto_dentry_data *presto_d2d(struct dentry *dentry) +{ + return (struct presto_dentry_data *)dentry->d_fsdata; +} + +static inline struct presto_file_set *presto_dentry2fset(struct dentry *dentry) +{ + if (dentry->d_fsdata == NULL) { + printk("fucked dentry: %p\n", dentry); + BUG(); + } + return presto_d2d(dentry)->dd_fset; +} + +/* find the presto minor device for this inode */ +int presto_i2m(struct inode *inode) +{ + struct presto_cache *cache; + ENTRY; + cache = presto_get_cache(inode); + CDEBUG(D_PSDEV, "\n"); + if ( !cache ) { + printk("PRESTO: BAD: cannot find cache for dev %d, ino %ld\n", + inode->i_dev, inode->i_ino); + EXIT; + return -1; + } + EXIT; + return cache->cache_psdev->uc_minor; +} + +inline int presto_f2m(struct presto_file_set *fset) +{ + return fset->fset_cache->cache_psdev->uc_minor; + +} + +inline int presto_c2m(struct presto_cache *cache) +{ + return cache->cache_psdev->uc_minor; + +} + +int presto_has_all_data(struct inode *inode) +{ + ENTRY; + + if ( (inode->i_size >> inode->i_sb->s_blocksize_bits) > + inode->i_blocks) { + EXIT; + return 0; + } + EXIT; + return 1; + +} + +/* find the fileset dentry for this dentry */ +struct presto_file_set *presto_fset(struct dentry *de) +{ + struct dentry *fsde; + ENTRY; + fsde = de; + for ( ; ; ) { + if ( presto_dentry2fset(fsde) ) { + EXIT; + return presto_dentry2fset(fsde); + } + /* are we at the cache "/" ?? */ + if ( fsde->d_parent == fsde ) { + if ( !de->d_inode ) { + printk("Warning %*s has no fileset inode.\n", + de->d_name.len, de->d_name.name); + } + /* better to return a BAD thing */ + EXIT; + return NULL; + } + fsde = fsde->d_parent; + } + /* not reached */ + EXIT; + return NULL; +} + +/* XXX check this out */ +struct presto_file_set *presto_path2fileset(const char *name) +{ + struct nameidata nd; + struct presto_file_set *fileset; + int error; + ENTRY; + + error = presto_walk(name, &nd); + if (!error) { +#if 0 + error = do_revalidate(nd.dentry); +#endif + if (!error) + fileset = presto_fset(nd.dentry); + path_release(&nd); + EXIT; + } else + fileset = ERR_PTR(error); + + EXIT; + return fileset; +} + +/* check a flag on this dentry or fset root. Semantics: + - most flags: test if it is set + - PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set +*/ +int presto_chk(struct dentry *dentry, int flag) +{ + int minor; + struct presto_file_set *fset = presto_fset(dentry); + + ENTRY; + minor = presto_i2m(dentry->d_inode); + if ( upc_comms[minor].uc_no_filter ) { + EXIT; + return ~0; + } + + /* if the fileset is in sync DATA and ATTR are OK */ + if ( fset && + (flag == PRESTO_ATTR || flag == PRESTO_DATA) && + (fset->fset_flags & FSET_INSYNC) ) { + CDEBUG(D_INODE, "fset in sync (ino %ld)!\n", + fset->fset_mtpt->d_inode->i_ino); + EXIT; + return 1; + } + + EXIT; + return (presto_d2d(dentry)->dd_flags & flag); +} + +/* set a bit in the dentry flags */ +void presto_set(struct dentry *dentry, int flag) +{ + + ENTRY; + if ( dentry->d_inode ) { + CDEBUG(D_INODE, "SET ino %ld, flag %x\n", + dentry->d_inode->i_ino, flag); + } + presto_d2d(dentry)->dd_flags |= flag; + EXIT; +} + +/* given a path: complete the closes on the fset */ +int lento_complete_closes(char *path) +{ + struct nameidata nd; + struct dentry *dentry; + int error; + struct presto_file_set *fset; + ENTRY; + + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_complete; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto out_complete; + } + + /* transactions and locking are internal to this function */ + error = presto_complete_lml(fset); + + EXIT; + out_complete: + path_release(&nd); + return error; +} + +/* set the fset recno and offset to a given value */ +int lento_reset_fset(char *path, __u64 offset, __u32 recno) +{ + struct nameidata nd; + struct dentry *dentry; + int error; + struct presto_file_set *fset; + ENTRY; + + + error = presto_walk(path, &nd); + if (error) + return error; + + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_complete; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto out_complete; + } + + write_lock(&fset->fset_kml.fd_lock); + fset->fset_kml.fd_recno = recno; + fset->fset_kml.fd_offset = offset; + read_lock(&fset->fset_kml.fd_lock); + + EXIT; + out_complete: + path_release(&nd); + return error; +} + + + +/* given a path, write an LML record for it - thus must have root's + group array settings, since lento is doing this +*/ +int lento_write_lml(char *path, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct presto_version *remote_file_version) +{ + struct nameidata nd; + struct rec_info rec; + struct dentry *dentry; + struct file file; + int error; + struct presto_file_set *fset; + ENTRY; + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + file.f_dentry = dentry; + file.private_data = NULL; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_lml; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto out_lml; + } + + + /* setting offset to -1 appends */ + rec.offset = -1; + /* this only requires a transaction below which is automatic */ + error = presto_write_lml_close(&rec, + fset, + &file, + remote_ino, + remote_generation, + remote_version, + remote_file_version); + + EXIT; + out_lml: + path_release(&nd); + return error; +} + +/* given a path: write a close record and cancel an LML record, finally + call truncate LML. Lento is doing this so it goes in with uid/gid's + root. +*/ +int lento_cancel_lml(char *path, + __u64 lml_offset, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct lento_vfs_context *info) +{ + struct nameidata nd; + struct rec_info rec; + struct dentry *dentry; + int error; + struct presto_file_set *fset; + void *handle; + struct presto_version new_ver; + ENTRY; + + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_cancel_lml; + } + + fset = presto_fset(dentry); + + error=-EINVAL; + if (fset==NULL) { + printk("No fileset!\n"); + EXIT; + goto out_cancel_lml; + } + + /* this only requires a transaction below which is automatic */ + handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE); + if ( !handle ) { + error = -ENOMEM; + EXIT; + goto out_cancel_lml; + } + + if (info->flags & LENTO_FL_CANCEL_LML) { + error = presto_clear_lml_close(fset, lml_offset); + if ( error ) { + presto_trans_commit(fset, handle); + EXIT; + goto out_cancel_lml; + } + } + + + if (info->flags & LENTO_FL_WRITE_KML) { + struct file file; + file.private_data = NULL; + file.f_dentry = dentry; + presto_getversion(&new_ver, dentry->d_inode); + error = presto_journal_close(&rec, fset, &file, dentry, + &new_ver); + if ( error ) { + EXIT; + presto_trans_commit(fset, handle); + goto out_cancel_lml; + } + } + + if (info->flags & LENTO_FL_WRITE_EXPECT) { + error = presto_write_last_rcvd(&rec, fset, info); + if ( error ) { + EXIT; + presto_trans_commit(fset, handle); + goto out_cancel_lml; + } + } + + presto_trans_commit(fset, handle); + + if (info->flags & LENTO_FL_CANCEL_LML) { + presto_truncate_lml(fset); + } + + + out_cancel_lml: + EXIT; + path_release(&nd); + return error; +} + + +/* given a path, operate on the flags in its dentry. Used by downcalls */ +int presto_mark_dentry(const char *name, int and_flag, int or_flag, + int *res) +{ + struct nameidata nd; + struct dentry *dentry; + int error; + + error = presto_walk(name, &nd); + if (error) + return error; + dentry = nd.dentry; + + CDEBUG(D_INODE, "name: %s, and flag %x, or flag %x, dd_flags %x\n", + name, and_flag, or_flag, presto_d2d(dentry)->dd_flags); + + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) + goto out; + + error = 0; + + presto_d2d(dentry)->dd_flags &= and_flag; + presto_d2d(dentry)->dd_flags |= or_flag; + if (res) + *res = presto_d2d(dentry)->dd_flags; + + // XXX this check makes no sense as d_count can change anytime. + /* indicate if we were the only users while changing the flag */ + if ( atomic_read(&dentry->d_count) > 1 ) + error = -EBUSY; + +out: + path_release(&nd); + return error; +} + +/* given a path, operate on the flags in its cache. Used by mark_ioctl */ +int presto_mark_cache(const char *name, int and_flag, int or_flag, + int *res) +{ + struct nameidata nd; + struct dentry *dentry; + struct presto_cache *cache; + int error; + + CDEBUG(D_INODE, + "presto_mark_cache :: name: %s, and flag %x, or flag %x\n", + name, and_flag, or_flag); + + error = presto_walk(name, &nd); + if (error) + return error; + + dentry = nd.dentry; + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) + goto out; + + error = -EBADF; + cache = presto_get_cache(dentry->d_inode); + if ( !cache ) { + printk("PRESTO: BAD: cannot find cache in presto_mark_cache\n"); + make_bad_inode(dentry->d_inode); + goto out; + } + error = 0; + ((int)cache->cache_flags) &= and_flag; + ((int)cache->cache_flags) |= or_flag; + if (res) { + *res = (int)cache->cache_flags; + } + +out: + path_release(&nd); + return error; +} + +int presto_mark_fset_dentry(struct dentry *dentry, int and_flag, int or_flag, + int * res) +{ + int error; + struct presto_file_set *fset; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) + return error; + + error = -EBADF; + fset = presto_fset(dentry); + if ( !fset ) { + printk("PRESTO: BAD: cannot find cache in presto_mark_cache\n"); + make_bad_inode(dentry->d_inode); + return error; + } + error = 0; + ((int)fset->fset_flags) &= and_flag; + ((int)fset->fset_flags) |= or_flag; + if (res) { + *res = (int)fset->fset_flags; + } + + return error; +} + +/* given a path, operate on the flags in its cache. Used by mark_ioctl */ +inline int presto_mark_fset(const char *name, int and_flag, int or_flag, + int * res) +{ + struct nameidata nd; + struct dentry *dentry; + int error; + ENTRY; + + error = presto_walk(name, &nd); + if (error) + return error; + + + dentry = nd.dentry; + error = presto_mark_fset_dentry(dentry, and_flag, or_flag, res); + + path_release(&nd); + return error; +} + + +/* talk to Lento about the permit */ +static int presto_permit_upcall(struct dentry *dentry) +{ + int rc; + char *path, *buffer; + int pathlen; + int minor; + int fsetnamelen; + struct presto_file_set *fset = NULL; + + if ( (minor = presto_i2m(dentry->d_inode)) < 0) + return -EINVAL; + + fset = presto_fset(dentry); + if (!fset) { + EXIT; + return -ENOTCONN; + } + + if ( !presto_lento_up(minor) ) { + if ( fset->fset_flags & FSET_STEAL_PERMIT ) { + return 0; + } else { + return -ENOTCONN; + } + } + + PRESTO_ALLOC(buffer, char *, PAGE_SIZE); + if ( !buffer ) { + printk("PRESTO: out of memory!\n"); + return -ENOMEM; + } + path = presto_path(dentry, fset->fset_mtpt, buffer, PAGE_SIZE); + pathlen = MYPATHLEN(buffer, path); + fsetnamelen = strlen(fset->fset_name); + rc = lento_permit(minor, pathlen, fsetnamelen, path, fset->fset_name); + PRESTO_FREE(buffer, PAGE_SIZE); + return rc; +} + +/* get a write permit for the fileset of this inode + * - if this returns a negative value there was an error + * - if 0 is returned the permit was already in the kernel -- or -- + * Lento gave us the permit without reintegration + * - lento returns the number of records it reintegrated + */ +int presto_get_permit(struct inode * inode) +{ + struct dentry *de; + struct presto_file_set *fset; + int minor = presto_i2m(inode); + int rc; + + ENTRY; + if (minor < 0) { + EXIT; + return -1; + } + + if ( ISLENTO(minor) ) { + EXIT; + return -EINVAL; + } + + if (list_empty(&inode->i_dentry)) { + printk("No alias for inode %d\n", (int) inode->i_ino); + EXIT; + return -EINVAL; + } + + de = list_entry(inode->i_dentry.next, struct dentry, d_alias); + + fset = presto_fset(de); + if ( !fset ) { + printk("Presto: no fileset in presto_get_permit!\n"); + EXIT; + return -EINVAL; + } + + if (fset->fset_flags & FSET_HASPERMIT) { + lock_kernel(); + fset->fset_permit_count++; + CDEBUG(D_INODE, "permit count now %d, inode %lx\n", + fset->fset_permit_count, inode->i_ino); + unlock_kernel(); + EXIT; + return 0; + } else { + /* Allow reintegration to proceed without locks -SHP */ + rc = presto_permit_upcall(fset->fset_mtpt); + lock_kernel(); + if ( !rc ) { + presto_mark_fset_dentry + (fset->fset_mtpt, ~0, FSET_HASPERMIT, NULL); + fset->fset_permit_count++; + } + CDEBUG(D_INODE, "permit count now %d, ino %lx (likely 1), rc %d\n", + fset->fset_permit_count, inode->i_ino, rc); + unlock_kernel(); + EXIT; + return rc; + } +} + +int presto_put_permit(struct inode * inode) +{ + struct dentry *de; + struct presto_file_set *fset; + int minor = presto_i2m(inode); + + ENTRY; + if (minor < 0) { + EXIT; + return -1; + } + + if ( ISLENTO(minor) ) { + EXIT; + return -1; + } + + if (list_empty(&inode->i_dentry)) { + printk("No alias for inode %d\n", (int) inode->i_ino); + EXIT; + return -1; + } + + de = list_entry(inode->i_dentry.next, struct dentry, d_alias); + + fset = presto_fset(de); + if ( !fset ) { + printk("Presto: no fileset in presto_get_permit!\n"); + EXIT; + return -1; + } + + lock_kernel(); + if (fset->fset_flags & FSET_HASPERMIT) { + if (fset->fset_permit_count > 0) fset->fset_permit_count--; + else printk("Put permit while permit count is 0, inode %lx!\n", + inode->i_ino); + } else { + fset->fset_permit_count=0; + printk("Put permit while no permit, inode %lx, flags %x!\n", + inode->i_ino, fset->fset_flags); + } + + CDEBUG(D_INODE, "permit count now %d, inode %lx\n", + fset->fset_permit_count, inode->i_ino); + + if (fset->fset_flags & FSET_PERMIT_WAITING && + fset->fset_permit_count == 0) { + CDEBUG(D_INODE, "permit count now 0, ino %lx, notify Lento\n", + inode->i_ino); + presto_mark_fset_dentry(fset->fset_mtpt, ~FSET_PERMIT_WAITING, 0, NULL); + presto_mark_fset_dentry(fset->fset_mtpt, ~FSET_HASPERMIT, 0, NULL); + lento_release_permit(fset->fset_cache->cache_psdev->uc_minor, + fset->fset_permit_cookie); + fset->fset_permit_cookie = 0; + } + unlock_kernel(); + + EXIT; + return 0; +} + + +void presto_getversion(struct presto_version * presto_version, + struct inode * inode) +{ + presto_version->pv_mtime = cpu_to_le64((__u64)inode->i_mtime); + presto_version->pv_ctime = cpu_to_le64((__u64)inode->i_ctime); + presto_version->pv_size = cpu_to_le64((__u64)inode->i_size); +} + +/* + * note: this routine "pins" a dentry for a fileset root + */ +int presto_set_fsetroot(char *path, char *fsetname, unsigned int fsetid, + unsigned int flags) +{ + struct presto_file_set *fset; + struct presto_file_set *fset2; + struct dentry *dentry; + struct presto_cache *cache; + int error; + + ENTRY; + + PRESTO_ALLOC(fset, struct presto_file_set *, sizeof(*fset)); + error = -ENOMEM; + if ( !fset ) { + printk(KERN_ERR "No memory allocating fset for %s\n", fsetname); + EXIT; + return -ENOMEM; + } + CDEBUG(D_INODE, "fset at %p\n", fset); + + printk("presto: fsetroot: path %s, fileset name %s\n", path, fsetname); + error = presto_walk(path, &fset->fset_nd); + CDEBUG(D_INODE, "\n"); + if (error) { + EXIT; + goto out_free; + } + dentry = fset->fset_nd.dentry; + CDEBUG(D_INODE, "\n"); + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_dput; + } + + CDEBUG(D_INODE, "\n"); + cache = presto_get_cache(dentry->d_inode); + if (!cache) { + printk(KERN_ERR "No cache found for %s\n", path); + EXIT; + goto out_dput; + } + + CDEBUG(D_INODE, "\n"); + error = -EINVAL; + if ( !cache->cache_mtpt) { + printk(KERN_ERR "Presto - no mountpoint: fsetroot fails!\n"); + EXIT; + goto out_dput; + } + CDEBUG(D_INODE, "\n"); + + if (!cache->cache_root_fileset) { + printk(KERN_ERR "Presto - no file set: fsetroot fails!\n"); + EXIT; + goto out_dput; + } + + error = -EEXIST; + CDEBUG(D_INODE, "\n"); + + fset2 = presto_fset(dentry); + if (fset2 && (fset2->fset_mtpt == dentry) ) { + printk(KERN_ERR "Fsetroot already set (path %s)\n", path); + EXIT; + goto out_dput; + } + + fset->fset_cache = cache; + fset->fset_mtpt = dentry; + fset->fset_name = fsetname; + fset->fset_chunkbits = CHUNK_BITS; + fset->fset_flags = flags; + fset->fset_file_maxio = FSET_DEFAULT_MAX_FILEIO; + + presto_d2d(dentry)->dd_fset = fset; + list_add(&fset->fset_list, &cache->cache_fset_list); + + error = presto_init_kml_file(fset); + if ( error ) { + EXIT; + CDEBUG(D_JOURNAL, "Error init_kml %d\n", error); + goto out_list_del; + } + + error = presto_init_last_rcvd_file(fset); + if ( error ) { + int rc; + EXIT; + rc = presto_close_journal_file(fset); + CDEBUG(D_JOURNAL, "Error init_lastrcvd %d, cleanup %d\n", error, rc); + goto out_list_del; + } + + error = presto_init_lml_file(fset); + if ( error ) { + int rc; + EXIT; + rc = presto_close_journal_file(fset); + CDEBUG(D_JOURNAL, "Error init_lml %d, cleanup %d\n", error, rc); + goto out_list_del; + } + +#ifdef CONFIG_KREINT + /* initialize kml reint buffer */ + error = kml_init (fset); + if ( error ) { + int rc; + EXIT; + rc = presto_close_journal_file(fset); + CDEBUG(D_JOURNAL, "Error init kml reint %d, cleanup %d\n", + error, rc); + goto out_list_del; + } +#endif + if ( dentry->d_inode == dentry->d_inode->i_sb->s_root->d_inode) { + cache->cache_flags |= CACHE_FSETROOT_SET; + } + + CDEBUG(D_PIOCTL, "-------> fset at %p, dentry at %p, mtpt %p, fset %s, cache %p, presto_d2d(dentry)->dd_fset %p\n", + fset, dentry, fset->fset_mtpt, fset->fset_name, cache, presto_d2d(dentry)->dd_fset); + + EXIT; + return 0; + + out_list_del: + list_del(&fset->fset_list); + presto_d2d(dentry)->dd_fset = NULL; + out_dput: + path_release(&fset->fset_nd); + out_free: + PRESTO_FREE(fset, sizeof(*fset)); + return error; +} + +int presto_get_kmlsize(char *path, size_t *size) +{ + struct nameidata nd; + struct presto_file_set *fset; + struct dentry *dentry; + int error; + + ENTRY; + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto kml_out; + } + + error = -EINVAL; + if ( ! presto_dentry2fset(dentry)) { + EXIT; + goto kml_out; + } + + fset = presto_dentry2fset(dentry); + if (!fset) { + EXIT; + goto kml_out; + } + error = 0; + *size = fset->fset_kml.fd_offset; + + kml_out: + path_release(&nd); + return error; +} + +static void presto_cleanup_fset(struct presto_file_set *fset) +{ + int error; + struct presto_cache *cache; + + ENTRY; +#ifdef CONFIG_KREINT + error = kml_cleanup (fset); + if ( error ) { + printk("InterMezzo: Closing kml for fset %s: %d\n", + fset->fset_name, error); + } +#endif + + error = presto_close_journal_file(fset); + if ( error ) { + printk("InterMezzo: Closing journal for fset %s: %d\n", + fset->fset_name, error); + } + cache = fset->fset_cache; + cache->cache_flags &= ~CACHE_FSETROOT_SET; + + list_del(&fset->fset_list); + + presto_d2d(fset->fset_mtpt)->dd_fset = NULL; + path_release(&fset->fset_nd); + + fset->fset_mtpt = NULL; + PRESTO_FREE(fset->fset_name, strlen(fset->fset_name) + 1); + PRESTO_FREE(fset, sizeof(*fset)); + EXIT; +} + +int presto_clear_fsetroot(char *path) +{ + struct nameidata nd; + struct presto_file_set *fset; + struct dentry *dentry; + int error; + + ENTRY; + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto put_out; + } + + error = -EINVAL; + if ( ! presto_dentry2fset(dentry)) { + EXIT; + goto put_out; + } + + fset = presto_dentry2fset(dentry); + if (!fset) { + EXIT; + goto put_out; + } + + presto_cleanup_fset(fset); + EXIT; + +put_out: + path_release(&nd); /* for our lookup */ + return error; +} + +int presto_clear_all_fsetroots(char *path) +{ + struct nameidata nd; + struct presto_file_set *fset; + struct dentry *dentry; + struct presto_cache *cache; + int error; + struct list_head *tmp,*tmpnext; + + + ENTRY; + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto put_out; + } + + error = -EINVAL; + if ( ! presto_dentry2fset(dentry)) { + EXIT; + goto put_out; + } + + fset = presto_dentry2fset(dentry); + if (!fset) { + EXIT; + goto put_out; + } + + cache = fset->fset_cache; + cache->cache_flags &= ~CACHE_FSETROOT_SET; + + tmp = &cache->cache_fset_list; + tmpnext = tmp->next; + while ( tmpnext != &cache->cache_fset_list) { + tmp = tmpnext; + tmpnext = tmp->next; + fset = list_entry(tmp, struct presto_file_set, fset_list); + + presto_cleanup_fset(fset); + } + + EXIT; + put_out: + path_release(&nd); /* for our lookup */ + return error; +} + + +int presto_get_lastrecno(char *path, off_t *recno) +{ + struct nameidata nd; + struct presto_file_set *fset; + struct dentry *dentry; + int error; + ENTRY; + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto kml_out; + } + + error = -EINVAL; + if ( ! presto_dentry2fset(dentry)) { + EXIT; + goto kml_out; + } + + fset = presto_dentry2fset(dentry); + if (!fset) { + EXIT; + goto kml_out; + } + error = 0; + *recno = fset->fset_kml.fd_recno; + + kml_out: + path_release(&nd); + return error; +} + +/* + if *cookie != 0, lento must wait for this cookie + before releasing the permit, operations are in progress. +*/ +int presto_permit_downcall( const char * path, int *cookie ) +{ + int result; + struct presto_file_set *fset; + + fset = presto_path2fileset(path); + if (IS_ERR(fset)) { + EXIT; + return PTR_ERR(fset); + } + + lock_kernel(); + if (fset->fset_permit_count != 0) { + /* is there are previous cookie? */ + if (fset->fset_permit_cookie == 0) { + CDEBUG(D_CACHE, "presto installing cookie 0x%x, %s\n", + *cookie, path); + fset->fset_permit_cookie = *cookie; + } else { + *cookie = fset->fset_permit_cookie; + CDEBUG(D_CACHE, "presto has cookie 0x%x, %s\n", + *cookie, path); + } + result = presto_mark_fset(path, 0, FSET_PERMIT_WAITING, NULL); + } else { + *cookie = 0; + CDEBUG(D_CACHE, "presto releasing permit %s\n", path); + result = presto_mark_fset(path, ~FSET_HASPERMIT, 0, NULL); + } + unlock_kernel(); + + return result; +} + +inline int presto_is_read_only(struct presto_file_set * fset) +{ + int minor, mask; + struct presto_cache *cache = fset->fset_cache; + + minor= cache->cache_psdev->uc_minor; + mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO); + if ( fset->fset_flags & mask ) + return 1; + mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO); + return ((cache->cache_flags & mask)? 1 : 0); +} + diff --git a/fs/intermezzo/psdev.c b/fs/intermezzo/psdev.c new file mode 100644 index 000000000000..a124b9bfe1f6 --- /dev/null +++ b/fs/intermezzo/psdev.c @@ -0,0 +1,1665 @@ +/* + * An implementation of a loadable kernel mode driver providing + * multiple kernel/user space bidirectional communications links. + * + * Author: Alan Cox <alan@cymru.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Adapted to become the Linux 2.0 Coda pseudo device + * Peter Braam <braam@maths.ox.ac.uk> + * Michael Callahan <mjc@emmy.smith.edu> + * + * Changes for Linux 2.1 + * Copyright (c) 1997 Carnegie-Mellon University + * + * Redone again for InterMezzo + * Copyright (c) 1998 Peter J. Braam + * Copyright (c) 2000 Mountain View Data, Inc. + * Copyright (c) 2000 Tacitus Systems, Inc. + * Copyright (c) 2001 Cluster File Systems, Inc. + * + * Extended attribute support + * Copyright (c) 2001 Shirish. H. Phatak + * Copyright (c) 2001 Tacit Networks, Inc. + */ + + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/sched.h> +#include <linux/lp.h> +#include <linux/slab.h> +#include <linux/ioport.h> +#include <linux/fcntl.h> +#include <linux/delay.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/init.h> +#include <linux/list.h> +#include <asm/io.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/poll.h> +#include <asm/uaccess.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + + +#ifdef PRESTO_DEVEL +int presto_print_entry = 1; +int presto_debug = 4095; +#else +int presto_print_entry = 0; +int presto_debug = 0; +#endif + +/* Like inode.c (presto_sym_iops), the initializer is just to prevent + upc_comms from appearing as a COMMON symbol (and therefore + interfering with other modules that use the same variable name. */ +struct upc_comm upc_comms[MAX_PRESTODEV] = {{0}}; + +/* + * Device operations: map file to upcall structure + */ +static inline struct upc_comm *presto_psdev_f2u(struct file *file) +{ + int minor; + + if ( MAJOR(file->f_dentry->d_inode->i_rdev) != PRESTO_PSDEV_MAJOR ) { + EXIT; + return NULL; + } + + minor = MINOR(file->f_dentry->d_inode->i_rdev); + if ( minor < 0 || minor >= MAX_PRESTODEV ) { + EXIT; + return NULL; + } + + return &(upc_comms[minor]); +} + +inline int presto_lento_up(int minor) +{ + return upc_comms[minor].uc_pid; +} + + +static unsigned int presto_psdev_poll(struct file *file, poll_table * wait) +{ + struct upc_comm *upccom; + unsigned int mask = POLLOUT | POLLWRNORM; + /* ENTRY; this will flood you */ + + if ( ! (upccom = presto_psdev_f2u(file)) ) { + kdev_t dev = file->f_dentry->d_inode->i_rdev; + printk("InterMezzo: %s, bad device %s\n", + __FUNCTION__, kdevname(dev)); + } + + poll_wait(file, &(upccom->uc_waitq), wait); + + if (!list_empty(&upccom->uc_pending)) { + CDEBUG(D_PSDEV, "Non-empty pending list.\n"); + mask |= POLLIN | POLLRDNORM; + } + + /* EXIT; will flood you */ + return mask; +} + + + +/* + * Receive a message written by Lento to the psdev + */ +static ssize_t presto_psdev_write(struct file *file, const char *buf, + size_t count, loff_t *off) +{ + struct upc_comm *upccom; + struct upc_req *req = NULL; + struct upc_req *tmp; + struct list_head *lh; + struct lento_down_hdr hdr; + int error; + + if ( ! (upccom = presto_psdev_f2u(file)) ) { + kdev_t dev = file->f_dentry->d_inode->i_rdev; + printk("InterMezzo: %s, bad device %s\n", + __FUNCTION__, kdevname(dev)); + } + + /* Peek at the opcode, uniquefier */ + if ( count < sizeof(hdr) ) { + printk("presto_psdev_write: Lento didn't write full hdr.\n"); + return -EINVAL; + } + + error = copy_from_user(&hdr, buf, sizeof(hdr)); + if ( error ) + return error; + + CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n", + current->pid, hdr.opcode, hdr.unique); + + /* Look for the message on the processing queue. */ + lh = &upccom->uc_processing; + while ( (lh = lh->next) != &upccom->uc_processing ) { + tmp = list_entry(lh, struct upc_req , rq_chain); + if (tmp->rq_unique == hdr.unique) { + req = tmp; + /* unlink here: keeps search length minimal */ + list_del(&req->rq_chain); + INIT_LIST_HEAD(&req->rq_chain); + CDEBUG(D_PSDEV,"Eureka opc %d uniq %d!\n", + hdr.opcode, hdr.unique); + break; + } + } + if (!req) { + printk("psdev_write: msg (%d, %d) not found\n", + hdr.opcode, hdr.unique); + return(-ESRCH); + } + + /* move data into response buffer. */ + if (req->rq_bufsize < count) { + printk("psdev_write: too much cnt: %d, cnt: %d, " + "opc: %d, uniq: %d.\n", + req->rq_bufsize, count, hdr.opcode, hdr.unique); + count = req->rq_bufsize; /* don't have more space! */ + } + error = copy_from_user(req->rq_data, buf, count); + if ( error ) + return error; + + /* adjust outsize: good upcalls can be aware of this */ + req->rq_rep_size = count; + req->rq_flags |= REQ_WRITE; + + wake_up(&req->rq_sleep); + return(count); +} + +/* + * Read a message from the kernel to Lento + */ +static ssize_t presto_psdev_read(struct file * file, char * buf, + size_t count, loff_t *off) +{ + struct upc_comm *upccom; + struct upc_req *req; + int result = count; + + if ( ! (upccom = presto_psdev_f2u(file)) ) { + kdev_t dev = file->f_dentry->d_inode->i_rdev; + printk("InterMezzo: %s, bad device %s\n", + __FUNCTION__, kdevname(dev)); + } + + CDEBUG(D_PSDEV, "count %d\n", count); + if (list_empty(&(upccom->uc_pending))) { + CDEBUG(D_UPCALL, "Empty pending list in read, not good\n"); + return -EINVAL; + } + + req = list_entry((upccom->uc_pending.next), struct upc_req, rq_chain); + list_del(&(req->rq_chain)); + if (! (req->rq_flags & REQ_ASYNC) ) { + list_add(&(req->rq_chain), upccom->uc_processing.prev); + } + req->rq_flags |= REQ_READ; + + /* Move the input args into userspace */ + if (req->rq_bufsize <= count) { + result = req->rq_bufsize; + } + + if (count < req->rq_bufsize) { + printk ("psdev_read: buffer too small, read %d of %d bytes\n", + count, req->rq_bufsize); + } + + if ( copy_to_user(buf, req->rq_data, result) ) { + return -EFAULT; + } + + /* If request was asynchronous don't enqueue, but free */ + if (req->rq_flags & REQ_ASYNC) { + CDEBUG(D_PSDEV, "psdev_read: async msg (%d, %d), result %d\n", + req->rq_opcode, req->rq_unique, result); + PRESTO_FREE(req->rq_data, req->rq_bufsize); + PRESTO_FREE(req, sizeof(*req)); + return result; + } + + return result; +} + +static int presto_psdev_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct upc_comm *upccom; + /* XXX is this rdev or dev? */ + kdev_t dev = inode->i_rdev; + + ENTRY; + upccom = presto_psdev_f2u(file); + if ( !upccom) { + printk("InterMezzo: %s, bad device %s\n", + __FUNCTION__, kdevname(dev)); + EXIT; + return -ENODEV; + } + + switch(cmd) { + + case TCGETS: + return -EINVAL; + + case PRESTO_GETMOUNT: { + /* return all the mounts for this device. */ + int minor = 0; + int len, outlen; + struct readmount readmount; + struct readmount *user_readmount = (struct readmount *) arg; + char * tmp; + int error; + + error = copy_from_user(&readmount, (void *)arg, + sizeof(readmount)); + if ( error ) { + printk("psdev: can't copy %d bytes from %p to %p\n", + sizeof(readmount), (struct readmount *) arg, + &readmount); + EXIT; + return error; + } + + len = readmount.io_len; + minor = MINOR(dev); + PRESTO_ALLOC(tmp, char *, len); + if (!tmp) { + EXIT; + return -ENOMEM; + } + + outlen = presto_sprint_mounts(tmp, len, minor); + CDEBUG(D_PSDEV, "presto_sprint_mounts returns %d bytes\n", + outlen); + + /* as this came out on 1/3/2000, it could NEVER work. + * So fix it ... RGM + * I mean, let's let the compiler do a little work ... + * gcc suggested the extra () + */ + error = copy_to_user(readmount.io_string, tmp, outlen); + if ( error ) { + CDEBUG(D_PSDEV, "Copy_to_user string 0x%p failed\n", + readmount.io_string); + } + if ((!error) && (error = copy_to_user(&(user_readmount->io_len), + &outlen, sizeof(int))) ) { + CDEBUG(D_PSDEV, "Copy_to_user len @0x%p failed\n", + &(user_readmount->io_len)); + } + + PRESTO_FREE(tmp, len); + EXIT; + return error; + } + + case PRESTO_SETPID: { + /* + * This ioctl is performed by each Lento that starts up + * and wants to do further communication with presto. + */ + CDEBUG(D_PSDEV, "Setting current pid to %d\n", current->pid); + upccom->uc_pid = current->pid; + if ( !list_empty(&upccom->uc_processing) ) { + struct list_head *lh; + struct upc_req *req; + printk("WARNING: setpid & processing not empty!\n"); + lh = &upccom->uc_processing; + while ( (lh = lh->next) != &upccom->uc_processing) { + req = list_entry(lh, struct upc_req, rq_chain); + /* freeing of req and data is done by the sleeper */ + wake_up(&req->rq_sleep); + } + } + if ( !list_empty(&upccom->uc_processing) ) { + printk("BAD: FAILDED TO CLEAN PROCESSING LIST!\n"); + } + EXIT; + return 0; + } + + case PRESTO_CLEAR_FSETROOT: { + /* + * Close KML files. + */ + int error; + int saved_pid = upccom->uc_pid; + char *path; + struct { + char *path; + int path_len; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + PRESTO_ALLOC(path, char *, input.path_len + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.path, input.path_len); + if ( error ) { + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return error; + } + path[input.path_len] = '\0'; + CDEBUG(D_PSDEV, "clear_fsetroot: path %s\n", path); + + upccom->uc_pid = current->pid; + error = presto_clear_fsetroot(path); + upccom->uc_pid = saved_pid; + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return error; + } + + + case PRESTO_CLEAR_ALL_FSETROOTS: { + /* + * Close KML files. + */ + int error; + int saved_pid = upccom->uc_pid; + char *path; + struct { + char *path; + int path_len; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + PRESTO_ALLOC(path, char *, input.path_len + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.path, input.path_len); + if ( error ) { + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return error; + } + path[input.path_len] = '\0'; + CDEBUG(D_PSDEV, "clear_all_fsetroot: path %s\n", path); + + upccom->uc_pid = current->pid; + error = presto_clear_all_fsetroots(path); + upccom->uc_pid = saved_pid; + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return error; + } + + case PRESTO_GET_KMLSIZE: { + int error; + int saved_pid = upccom->uc_pid; + char *path; + size_t size = 0; + struct { + __u64 size; + char *path; + int path_len; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + PRESTO_ALLOC(path, char *, input.path_len + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.path, input.path_len); + if ( error ) { + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return error; + } + path[input.path_len] = '\0'; + CDEBUG(D_PSDEV, "get_kmlsize: len %d path %s\n", + input.path_len, path); + + upccom->uc_pid = current->pid; + error = presto_get_kmlsize(path, &size); + PRESTO_FREE(path, input.path_len + 1); + if (error) { + EXIT; + return error; + } + input.size = size; + upccom->uc_pid = saved_pid; + + CDEBUG(D_PSDEV, "get_kmlsize: size = %d\n", size); + + EXIT; + return copy_to_user((char *)arg, &input, sizeof(input)); + } + + case PRESTO_GET_RECNO: { + int error; + int saved_pid = upccom->uc_pid; + char *path; + off_t recno = 0; + struct { + __u64 recno; + char *path; + int path_len; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + PRESTO_ALLOC(path, char *, input.path_len + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.path, input.path_len); + if ( error ) { + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return error; + } + path[input.path_len] = '\0'; + CDEBUG(D_PSDEV, "get_recno: len %d path %s\n", + input.path_len, path); + + upccom->uc_pid = current->pid; + error = presto_get_lastrecno(path, &recno); + PRESTO_FREE(path, input.path_len + 1); + if (error) { + EXIT; + return error; + } + input.recno = recno; + upccom->uc_pid = saved_pid; + + CDEBUG(D_PSDEV, "get_recno: recno = %d\n", (int) recno); + + EXIT; + return copy_to_user((char *)arg, &input, sizeof(input)); + } + + case PRESTO_SET_FSETROOT: { + /* + * Save information about the cache, and initialize "special" + * cache files (KML, etc). + */ + int error; + int saved_pid = upccom->uc_pid; + char *fsetname; + char *path; + struct { + char *path; + int path_len; + char *name; + int name_len; + int id; + int flags; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + PRESTO_ALLOC(path, char *, input.path_len + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(path, input.path, input.path_len); + if ( error ) { + EXIT; + goto exit_free_path; + } + path[input.path_len] = '\0'; + + PRESTO_ALLOC(fsetname, char *, input.name_len + 1); + if ( !fsetname ) { + error = -ENOMEM; + EXIT; + goto exit_free_path; + } + error = copy_from_user(fsetname, input.name, input.name_len); + if ( error ) { + EXIT; + goto exit_free_fsetname; + } + fsetname[input.name_len] = '\0'; + + CDEBUG(D_PSDEV, + "set_fsetroot: path %s name %s, id %d, flags %x\n", + path, fsetname, input.id, input.flags); + upccom->uc_pid = current->pid; + error = presto_set_fsetroot(path, fsetname, input.id,input.flags); + upccom->uc_pid = saved_pid; + if ( error ) { + EXIT; + goto exit_free_fsetname; + } + /* fsetname is kept in the fset, so don't free it now */ + PRESTO_FREE(path, input.path_len + 1); + EXIT; + return 0; + + exit_free_fsetname: + PRESTO_FREE(fsetname, input.name_len + 1); + exit_free_path: + PRESTO_FREE(path, input.path_len + 1); + return error; + } + + case PRESTO_CLOSE_JOURNALF: { + int saved_pid = upccom->uc_pid; + int error; + + CDEBUG(D_SUPER, "HELLO\n"); + + /* pretend we are lento: we should lock something */ + upccom->uc_pid = current->pid; + error = presto_close_journal_file(NULL); + CDEBUG(D_PSDEV, "error is %d\n", error); + upccom->uc_pid = saved_pid; + EXIT; + return error; + } + + case PRESTO_GETOPT: + case PRESTO_SETOPT: { + /* return all the mounts for this device. */ + int dosetopt(int, struct psdev_opt *); + int dogetopt(int, struct psdev_opt *); + int minor = 0; + struct psdev_opt kopt; + struct psdev_opt *user_opt = (struct psdev_opt *) arg; + int error; + + error = copy_from_user(&kopt, (void *)arg, sizeof(kopt)); + if ( error ) { + printk("psdev: can't copyin %d bytes from %p to %p\n", + sizeof(kopt), (struct kopt *) arg, &kopt); + EXIT; + return error; + } + minor = MINOR(dev); + if (cmd == PRESTO_SETOPT) + error = dosetopt(minor, &kopt); + + if ( error ) { + CDEBUG(D_PSDEV, + "dosetopt failed minor %d, opt %d, val %d\n", + minor, kopt.optname, kopt.optval); + EXIT; + return error; + } + + error = dogetopt(minor, &kopt); + + if ( error ) { + CDEBUG(D_PSDEV, + "dogetopt failed minor %d, opt %d, val %d\n", + minor, kopt.optname, kopt.optval); + EXIT; + return error; + } + + error = copy_to_user(user_opt, &kopt, sizeof(kopt)); + if ( error ) { + CDEBUG(D_PSDEV, "Copy_to_user opt 0x%p failed\n", + user_opt); + EXIT; + return error; + } + CDEBUG(D_PSDEV, "dosetopt minor %d, opt %d, val %d return %d\n", + minor, kopt.optname, kopt.optval, error); + EXIT; + return 0; + } + + case PRESTO_VFS_SETATTR: { + int error; + struct lento_input_attr input; + struct iattr iattr; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + iattr.ia_valid = input.valid; + iattr.ia_mode = (umode_t)input.mode; + iattr.ia_uid = (uid_t)input.uid; + iattr.ia_gid = (gid_t)input.gid; + iattr.ia_size = (off_t)input.size; + iattr.ia_atime = (time_t)input.atime; + iattr.ia_mtime = (time_t)input.mtime; + iattr.ia_ctime = (time_t)input.ctime; + iattr.ia_attr_flags = input.attr_flags; + + error = lento_setattr(input.name, &iattr, &input.info); + EXIT; + return error; + } + + case PRESTO_VFS_CREATE: { + int error; + struct lento_input_mode input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_create(input.name, input.mode, &input.info); + EXIT; + return error; + } + + case PRESTO_VFS_LINK: { + int error; + struct lento_input_old_new input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_link(input.oldname, input.newname, &input.info); + EXIT; + return error; + } + + case PRESTO_VFS_UNLINK: { + int error; + struct lento_input input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_unlink(input.name, &input.info); + EXIT; + return error; + } + + case PRESTO_VFS_SYMLINK: { + int error; + struct lento_input_old_new input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_symlink(input.oldname, input.newname,&input.info); + EXIT; + return error; + } + + case PRESTO_VFS_MKDIR: { + int error; + struct lento_input_mode input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_mkdir(input.name, input.mode, &input.info); + EXIT; + return error; + } + + case PRESTO_VFS_RMDIR: { + int error; + struct lento_input input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_rmdir(input.name, &input.info); + EXIT; + return error; + } + + case PRESTO_VFS_MKNOD: { + int error; + struct lento_input_dev input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_mknod(input.name, input.mode, + MKDEV(input.major,input.minor),&input.info); + EXIT; + return error; + } + + case PRESTO_VFS_RENAME: { + int error; + struct lento_input_old_new input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + error = lento_rename(input.oldname, input.newname, &input.info); + EXIT; + return error; + } + +#ifdef CONFIG_FS_EXT_ATTR + /* IOCTL to create/modify an extended attribute */ + case PRESTO_VFS_SETEXTATTR: { + int error; + struct lento_input_ext_attr input; + char *name; + char *buffer; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + /* Now setup the input parameters */ + PRESTO_ALLOC(name, char *, input.name_len+1); + /* We need null terminated strings for attr names */ + name[input.name_len] = '\0'; + error=copy_from_user(name, input.name, input.name_len); + if ( error ) { + EXIT; + PRESTO_FREE(name,input.name_len+1); + return error; + } + + PRESTO_ALLOC(buffer, char *, input.buffer_len+1); + error=copy_from_user(buffer, input.buffer, input.buffer_len); + if ( error ) { + EXIT; + PRESTO_FREE(name,input.name_len+1); + PRESTO_FREE(buffer,input.buffer_len+1); + return error; + } + /* Make null terminated for easy printing */ + buffer[input.buffer_len]='\0'; + + CDEBUG(D_PSDEV," setextattr params: name %s, valuelen %d," + " value %s, attr flags %x, mode %o, slot offset %d," + " recno %d, kml offset %lu, flags %x, time %d\n", + name, input.buffer_len, buffer, input.flags, input.mode, + input.info.slot_offset, input.info.recno, + (unsigned long) input.info.kml_offset, input.info.flags, + input.info.updated_time); + + error=lento_set_ext_attr + (input.path,name,buffer,input.buffer_len, + input.flags, input.mode, &input.info); + + PRESTO_FREE(name,input.name_len+1); + PRESTO_FREE(buffer,input.buffer_len+1); + EXIT; + return error; + } + + /* IOCTL to delete an extended attribute */ + case PRESTO_VFS_DELEXTATTR: { + int error; + struct lento_input_ext_attr input; + char *name; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + /* Now setup the input parameters */ + PRESTO_ALLOC(name, char *, input.name_len+1); + /* We need null terminated strings for attr names */ + name[input.name_len] = '\0'; + error=copy_from_user(name, input.name, input.name_len); + if ( error ) { + EXIT; + PRESTO_FREE(name,input.name_len+1); + return error; + } + + CDEBUG(D_PSDEV," delextattr params: name %s," + " attr flags %x, mode %o, slot offset %d, recno %d," + " kml offset %lu, flags %x, time %d\n", + name, input.flags, input.mode, + input.info.slot_offset, input.info.recno, + (unsigned long) input.info.kml_offset, input.info.flags, + input.info.updated_time); + + error=lento_set_ext_attr + (input.path,name,NULL,0,input.flags, + input.mode,&input.info); + PRESTO_FREE(name,input.name_len+1); + EXIT; + return error; + } +#endif + + case PRESTO_VFS_IOPEN: { + struct lento_input_iopen input; + int error; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + input.fd = lento_iopen(input.name, (ino_t)input.ino, + input.generation, input.flags); + CDEBUG(D_PIOCTL, "lento_iopen file descriptor: %d\n", input.fd); + if (input.fd < 0) { + EXIT; + return input.fd; + } + EXIT; + return copy_to_user((char *)arg, &input, sizeof(input)); + } + + case PRESTO_VFS_CLOSE: { + int error; + struct lento_input_close input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + + CDEBUG(D_PIOCTL, "lento_close file descriptor: %d\n", input.fd); + error = lento_close(input.fd, &input.info); + EXIT; + return error; + } + + case PRESTO_BACKFETCH_LML: { + char *user_path; + int error; + struct lml_arg { + char *path; + __u32 path_len; + __u64 remote_ino; + __u32 remote_generation; + __u32 remote_version; + struct presto_version remote_file_version; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + user_path = input.path; + + PRESTO_ALLOC(input.path, char *, input.path_len + 1); + if ( !input.path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(input.path, user_path, input.path_len); + if ( error ) { + EXIT; + PRESTO_FREE(input.path, input.path_len + 1); + return error; + } + input.path[input.path_len] = '\0'; + + CDEBUG(D_DOWNCALL, "lml name: %s\n", input.path); + + return lento_write_lml(input.path, + input.remote_ino, + input.remote_generation, + input.remote_version, + &input.remote_file_version); + + } + + + case PRESTO_CANCEL_LML: { + char *user_path; + int error; + struct lml_arg { + char *path; + __u64 lml_offset; + __u32 path_len; + __u64 remote_ino; + __u32 remote_generation; + __u32 remote_version; + struct lento_vfs_context info; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + user_path = input.path; + + PRESTO_ALLOC(input.path, char *, input.path_len + 1); + if ( !input.path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(input.path, user_path, input.path_len); + if ( error ) { + EXIT; + PRESTO_FREE(input.path, input.path_len + 1); + return error; + } + input.path[input.path_len] = '\0'; + + CDEBUG(D_DOWNCALL, "lml name: %s\n", input.path); + + return lento_cancel_lml(input.path, + input.lml_offset, + input.remote_ino, + input.remote_generation, + input.remote_version, + &input.info); + + } + + case PRESTO_COMPLETE_CLOSES: { + char *user_path; + int error; + struct lml_arg { + char *path; + __u32 path_len; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + user_path = input.path; + + PRESTO_ALLOC(input.path, char *, input.path_len + 1); + if ( !input.path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(input.path, user_path, input.path_len); + if ( error ) { + EXIT; + PRESTO_FREE(input.path, input.path_len + 1); + return error; + } + input.path[input.path_len] = '\0'; + + CDEBUG(D_DOWNCALL, "lml name: %s\n", input.path); + + error = lento_complete_closes(input.path); + PRESTO_FREE(input.path, input.path_len + 1); + return error; + } + + case PRESTO_RESET_FSET: { + char *user_path; + int error; + struct lml_arg { + char *path; + __u32 path_len; + __u64 offset; + __u32 recno; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + user_path = input.path; + + PRESTO_ALLOC(input.path, char *, input.path_len + 1); + if ( !input.path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(input.path, user_path, input.path_len); + if ( error ) { + EXIT; + PRESTO_FREE(input.path, input.path_len + 1); + return error; + } + input.path[input.path_len] = '\0'; + + CDEBUG(D_DOWNCALL, "lml name: %s\n", input.path); + + return lento_reset_fset(input.path, input.offset, input.recno); + + } + + + case PRESTO_MARK: { + char *user_path; + int res = 0; /* resulting flags - returned to user */ + int error; + struct { + int mark_what; + int and_flag; + int or_flag; + int path_len; + char *path; + } input; + + error = copy_from_user(&input, (char *)arg, sizeof(input)); + if ( error ) { + EXIT; + return error; + } + user_path = input.path; + + PRESTO_ALLOC(input.path, char *, input.path_len + 1); + if ( !input.path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(input.path, user_path, input.path_len); + if ( error ) { + EXIT; + PRESTO_FREE(input.path, input.path_len + 1); + return error; + } + input.path[input.path_len] = '\0'; + + CDEBUG(D_DOWNCALL, "mark name: %s, and: %x, or: %x, what %d\n", + input.path, input.and_flag, input.or_flag, + input.mark_what); + + switch (input.mark_what) { + case MARK_DENTRY: + error = presto_mark_dentry(input.path, + input.and_flag, + input.or_flag, &res); + break; + case MARK_FSET: + error = presto_mark_fset(input.path, + input.and_flag, + input.or_flag, &res); + break; + case MARK_CACHE: + error = presto_mark_cache(input.path, + input.and_flag, + input.or_flag, &res); + break; + case MARK_GETFL: { + int fflags, cflags; + input.and_flag = 0xffffffff; + input.or_flag = 0; + error = presto_mark_dentry(input.path, + input.and_flag, + input.or_flag, &res); + if (error) + break; + error = presto_mark_fset(input.path, + input.and_flag, + input.or_flag, &fflags); + if (error) + break; + error = presto_mark_cache(input.path, + input.and_flag, + input.or_flag, &cflags); + + if (error) + break; + input.and_flag = fflags; + input.or_flag = cflags; + break; + } + default: + error = -EINVAL; + } + + PRESTO_FREE(input.path, input.path_len + 1); + if (error == -EBUSY) { + input.and_flag = error; + error = 0; + } + if (error) { + EXIT; + return error; + } + /* return the correct cookie to wait for */ + input.mark_what = res; + return copy_to_user((char *)arg, &input, sizeof(input)); + } + +#ifdef CONFIG_KREINT + case PRESTO_REINT_BEGIN: + return begin_kml_reint (file, arg); + case PRESTO_DO_REINT: + return do_kml_reint (file, arg); + case PRESTO_REINT_END: + return end_kml_reint (file, arg); +#endif + + case PRESTO_RELEASE_PERMIT: { + int error; + char *user_path; + struct { + int cookie; + int path_len; + char *path; + } permit; + + error = copy_from_user(&permit, (char *)arg, sizeof(permit)); + if ( error ) { + EXIT; + return error; + } + user_path = permit.path; + + PRESTO_ALLOC(permit.path, char *, permit.path_len + 1); + if ( !permit.path ) { + EXIT; + return -ENOMEM; + } + error = copy_from_user(permit.path, user_path, permit.path_len); + if ( error ) { + EXIT; + PRESTO_FREE(permit.path, permit.path_len + 1); + return error; + } + permit.path[permit.path_len] = '\0'; + + CDEBUG(D_DOWNCALL, "release permit: %s, in cookie=%d\n", + permit.path, permit.cookie); + error = presto_permit_downcall(permit.path, &permit.cookie); + + PRESTO_FREE(permit.path, permit.path_len + 1); + if (error) { + EXIT; + return error; + } + /* return the correct cookie to wait for */ + return copy_to_user((char *)arg, &permit, sizeof(permit)); + } + + default: + CDEBUG(D_PSDEV, "bad ioctl 0x%x, \n", cmd); + CDEBUG(D_PSDEV, "valid are 0x%x - 0x%x, 0x%x - 0x%x \n", + PRESTO_GETMOUNT, PRESTO_GET_KMLSIZE, + PRESTO_VFS_SETATTR, PRESTO_VFS_IOPEN); + EXIT; + } + + return -EINVAL; +} + + +static int presto_psdev_open(struct inode * inode, struct file * file) +{ + struct upc_comm *upccom; + ENTRY; + + if ( ! (upccom = presto_psdev_f2u(file)) ) { + kdev_t dev = file->f_dentry->d_inode->i_rdev; + printk("InterMezzo: %s, bad device %s\n", + __FUNCTION__, kdevname(dev)); + EXIT; + return -EINVAL; + } + + MOD_INC_USE_COUNT; + + CDEBUG(D_PSDEV, "Psdev_open: uc_pid: %d, caller: %d, flags: %d\n", + upccom->uc_pid, current->pid, file->f_flags); + + EXIT; + return 0; +} + + + +static int presto_psdev_release(struct inode * inode, struct file * file) +{ + struct upc_comm *upccom; + struct upc_req *req; + struct list_head *lh; + ENTRY; + + + if ( ! (upccom = presto_psdev_f2u(file)) ) { + kdev_t dev = file->f_dentry->d_inode->i_rdev; + printk("InterMezzo: %s, bad device %s\n", + __FUNCTION__, kdevname(dev)); + } + + if ( upccom->uc_pid != current->pid ) { + printk("psdev_release: Not lento.\n"); + MOD_DEC_USE_COUNT; + return 0; + } + + MOD_DEC_USE_COUNT; + CDEBUG(D_PSDEV, "Lento: pid %d\n", current->pid); + upccom->uc_pid = 0; + + /* Wake up clients so they can return. */ + CDEBUG(D_PSDEV, "Wake up clients sleeping for pending.\n"); + lh = &upccom->uc_pending; + while ( (lh = lh->next) != &upccom->uc_pending) { + req = list_entry(lh, struct upc_req, rq_chain); + + /* Async requests stay around for a new lento */ + if (req->rq_flags & REQ_ASYNC) { + continue; + } + /* the sleeper will free the req and data */ + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + + CDEBUG(D_PSDEV, "Wake up clients sleeping for processing\n"); + lh = &upccom->uc_processing; + while ( (lh = lh->next) != &upccom->uc_processing) { + req = list_entry(lh, struct upc_req, rq_chain); + /* freeing of req and data is done by the sleeper */ + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + CDEBUG(D_PSDEV, "Done.\n"); + + EXIT; + return 0; +} + +static struct file_operations presto_psdev_fops = { + read: presto_psdev_read, + write: presto_psdev_write, + poll: presto_psdev_poll, + ioctl: presto_psdev_ioctl, + open: presto_psdev_open, + release: presto_psdev_release +}; + + +int presto_psdev_init(void) +{ + int i; + +#ifdef PRESTO_DEVEL + if (register_chrdev(PRESTO_PSDEV_MAJOR, "intermezzo_psdev_devel", + &presto_psdev_fops)) { + printk(KERN_ERR "presto_psdev: unable to get major %d\n", + PRESTO_PSDEV_MAJOR); + return -EIO; + } +#else + if (register_chrdev(PRESTO_PSDEV_MAJOR, "intermezzo_psdev", + &presto_psdev_fops)) { + printk("presto_psdev: unable to get major %d\n", + PRESTO_PSDEV_MAJOR); + return -EIO; + } +#endif + + memset(&upc_comms, 0, sizeof(upc_comms)); + for ( i = 0 ; i < MAX_PRESTODEV ; i++ ) { + char *name; + struct upc_comm *psdev = &upc_comms[i]; + INIT_LIST_HEAD(&psdev->uc_pending); + INIT_LIST_HEAD(&psdev->uc_processing); + INIT_LIST_HEAD(&psdev->uc_cache_list); + init_waitqueue_head(&psdev->uc_waitq); + psdev->uc_hard = 0; + psdev->uc_no_filter = 0; + psdev->uc_no_journal = 0; + psdev->uc_no_upcall = 0; + psdev->uc_timeout = 30; + psdev->uc_errorval = 0; + psdev->uc_minor = i; + PRESTO_ALLOC(name, char *, strlen(PRESTO_PSDEV_NAME "256")+1); + if (!name) { + printk("Unable to allocate memory for device name\n"); + continue; + } + sprintf(name, PRESTO_PSDEV_NAME "%d", i); + psdev->uc_devname = name; + } + return 0; +} + +void presto_psdev_cleanup(void) +{ + int i; + + for ( i = 0 ; i < MAX_PRESTODEV ; i++ ) { + struct upc_comm *psdev = &upc_comms[i]; + struct list_head *lh; + + if ( ! list_empty(&psdev->uc_pending)) { + printk("Weird, tell Peter: module cleanup and pending list not empty dev %d\n", i); + } + if ( ! list_empty(&psdev->uc_processing)) { + printk("Weird, tell Peter: module cleanup and processing list not empty dev %d\n", i); + } + if ( ! list_empty(&psdev->uc_cache_list)) { + printk("Weird, tell Peter: module cleanup and cache listnot empty dev %d\n", i); + } + if (psdev->uc_devname) { + PRESTO_FREE(psdev->uc_devname, + strlen(PRESTO_PSDEV_NAME "256")+1); + } + lh = psdev->uc_pending.next; + while ( lh != &psdev->uc_pending) { + struct upc_req *req; + + req = list_entry(lh, struct upc_req, rq_chain); + lh = lh->next; + if ( req->rq_flags & REQ_ASYNC ) { + list_del(&(req->rq_chain)); + CDEBUG(D_UPCALL, "free pending upcall type %d\n", + req->rq_opcode); + PRESTO_FREE(req->rq_data, req->rq_bufsize); + PRESTO_FREE(req, sizeof(struct upc_req)); + } else { + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + } + lh = &psdev->uc_processing; + while ( (lh = lh->next) != &psdev->uc_processing ) { + struct upc_req *req; + req = list_entry(lh, struct upc_req, rq_chain); + list_del(&(req->rq_chain)); + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + } +} + +/* + * lento_upcall and lento_downcall routines + */ +static inline unsigned long lento_waitfor_upcall(struct upc_req *req, + int minor) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long posttime; + + req->rq_posttime = posttime = jiffies; + + add_wait_queue(&req->rq_sleep, &wait); + for (;;) { + if ( upc_comms[minor].uc_hard == 0 ) + current->state = TASK_INTERRUPTIBLE; + else + current->state = TASK_UNINTERRUPTIBLE; + + /* got a reply */ + if ( req->rq_flags & (REQ_WRITE | REQ_DEAD) ) + break; + + if ( !upc_comms[minor].uc_hard && signal_pending(current) ) { + /* if this process really wants to die, let it go */ + if (sigismember(&(current->pending.signal), SIGKILL)|| + sigismember(&(current->pending.signal), SIGINT) ) + break; + /* signal is present: after timeout always return + really smart idea, probably useless ... */ + if ( jiffies > req->rq_posttime + + upc_comms[minor].uc_timeout * HZ ) + break; + } + schedule(); + + } + list_del(&req->rq_chain); + INIT_LIST_HEAD(&req->rq_chain); + remove_wait_queue(&req->rq_sleep, &wait); + current->state = TASK_RUNNING; + + CDEBUG(D_SPECIAL, "posttime: %ld, returned: %ld\n", + posttime, jiffies-posttime); + return (jiffies - posttime); + +} + +/* + * lento_upcall will return an error in the case of + * failed communication with Lento _or_ will peek at Lento + * reply and return Lento's error. + * + * As lento has 2 types of errors, normal errors (positive) and internal + * errors (negative), normal errors are negated, while internal errors + * are all mapped to -EINTR, while showing a nice warning message. (jh) + * + * lento_upcall will always free buffer, either directly, when an upcall + * is read (in presto_psdev_read), when the filesystem is unmounted, or + * when the module is unloaded. + */ +int lento_upcall(int minor, int bufsize, int *rep_size, union up_args *buffer, + int async, struct upc_req *rq) +{ + unsigned long runtime; + struct upc_comm *upc_commp; + union down_args *out; + struct upc_req *req; + int error = 0; + + ENTRY; + upc_commp = &(upc_comms[minor]); + + if (upc_commp->uc_no_upcall) { + EXIT; + goto exit_buf; + } + if (!upc_commp->uc_pid && !async) { + EXIT; + error = -ENXIO; + goto exit_buf; + } + + /* Format the request message. */ + CDEBUG(D_UPCALL, "buffer at %p, size %d\n", buffer, bufsize); + PRESTO_ALLOC(req, struct upc_req *, sizeof(struct upc_req)); + if ( !req ) { + EXIT; + error = -ENOMEM; + goto exit_buf; + } + req->rq_data = (void *)buffer; + req->rq_flags = 0; + req->rq_bufsize = bufsize; + req->rq_rep_size = 0; + req->rq_opcode = ((union up_args *)buffer)->uh.opcode; + req->rq_unique = ++upc_commp->uc_seq; + init_waitqueue_head(&req->rq_sleep); + + /* Fill in the common input args. */ + ((union up_args *)buffer)->uh.unique = req->rq_unique; + /* Append msg to pending queue and poke Lento. */ + list_add(&req->rq_chain, upc_commp->uc_pending.prev); + CDEBUG(D_UPCALL, + "Proc %d waking Lento %d for(opc,uniq) =(%d,%d) msg at %p.\n", + current->pid, upc_commp->uc_pid, req->rq_opcode, + req->rq_unique, req); + + wake_up_interruptible(&upc_commp->uc_waitq); + + if ( async ) { + req->rq_flags = REQ_ASYNC; + if( rq != NULL ) { + *rq = *req; /* struct copying */ + } + /* req, rq_data are freed in presto_psdev_read for async */ + EXIT; + return 0; + } + + /* We can be interrupted while we wait for Lento to process + * our request. If the interrupt occurs before Lento has read + * the request, we dequeue and return. If it occurs after the + * read but before the reply, we dequeue, send a signal + * message, and return. If it occurs after the reply we ignore + * it. In no case do we want to restart the syscall. If it + * was interrupted by a lento shutdown (psdev_close), return + * ENODEV. */ + + /* Go to sleep. Wake up on signals only after the timeout. */ + runtime = lento_waitfor_upcall(req, minor); + + CDEBUG(D_TIMING, "opc: %d time: %ld uniq: %d size: %d\n", + req->rq_opcode, jiffies - req->rq_posttime, + req->rq_unique, req->rq_rep_size); + CDEBUG(D_UPCALL, + "..process %d woken up by Lento for req at 0x%x, data at %x\n", + current->pid, (int)req, (int)req->rq_data); + + if (upc_commp->uc_pid) { /* i.e. Lento is still alive */ + /* Op went through, interrupt or not we go on */ + if (req->rq_flags & REQ_WRITE) { + out = (union down_args *)req->rq_data; + /* here we map positive Lento errors to kernel errors */ + if ( out->dh.result < 0 ) { + printk("Tell Peter: Lento returns negative error %d, for oc %d!\n", + out->dh.result, out->dh.opcode); + out->dh.result = EINVAL; + } + error = -out->dh.result; + CDEBUG(D_UPCALL, "upcall: (u,o,r) (%d, %d, %d) out at %p\n", + out->dh.unique, out->dh.opcode, out->dh.result, out); + *rep_size = req->rq_rep_size; + EXIT; + goto exit_req; + } + /* Interrupted before lento read it. */ + if ( !(req->rq_flags & REQ_READ) && signal_pending(current)) { + CDEBUG(D_UPCALL, + "Interrupt before read: (op,un)=(%d,%d), flags %x\n", + req->rq_opcode, req->rq_unique, req->rq_flags); + /* perhaps the best way to convince the app to give up? */ + error = -EINTR; + EXIT; + goto exit_req; + } + + /* interrupted after Lento did its read, send signal */ + if ( (req->rq_flags & REQ_READ) && signal_pending(current) ) { + union up_args *sigargs; + struct upc_req *sigreq; + + CDEBUG(D_UPCALL,"Sending for: op = %d.%d, flags = %x\n", + req->rq_opcode, req->rq_unique, req->rq_flags); + + error = -EINTR; + + /* req, rq_data are freed in presto_psdev_read for async */ + PRESTO_ALLOC(sigreq, struct upc_req *, + sizeof (struct upc_req)); + if (!sigreq) { + error = -ENOMEM; + EXIT; + goto exit_req; + } + PRESTO_ALLOC((sigreq->rq_data), char *, + sizeof(struct lento_up_hdr)); + if (!(sigreq->rq_data)) { + PRESTO_FREE(sigreq, sizeof (struct upc_req)); + error = -ENOMEM; + EXIT; + goto exit_req; + } + + sigargs = (union up_args *)sigreq->rq_data; + sigargs->uh.opcode = LENTO_SIGNAL; + sigargs->uh.unique = req->rq_unique; + + sigreq->rq_flags = REQ_ASYNC; + sigreq->rq_opcode = sigargs->uh.opcode; + sigreq->rq_unique = sigargs->uh.unique; + sigreq->rq_bufsize = sizeof(struct lento_up_hdr); + sigreq->rq_rep_size = 0; + CDEBUG(D_UPCALL, + "presto_upcall: enqueing signal msg (%d, %d)\n", + sigreq->rq_opcode, sigreq->rq_unique); + + /* insert at head of queue! */ + list_add(&sigreq->rq_chain, &upc_commp->uc_pending); + wake_up_interruptible(&upc_commp->uc_waitq); + } else { + printk("Lento: Strange interruption - tell Peter.\n"); + error = -EINTR; + } + } else { /* If lento died i.e. !UC_OPEN(upc_commp) */ + printk("presto_upcall: Lento dead on (op,un) (%d.%d) flags %d\n", + req->rq_opcode, req->rq_unique, req->rq_flags); + error = -ENODEV; + } + +exit_req: + PRESTO_FREE(req, sizeof(struct upc_req)); +exit_buf: + PRESTO_FREE(buffer, bufsize); + return error; +} + + diff --git a/fs/intermezzo/super.c b/fs/intermezzo/super.c new file mode 100644 index 000000000000..a0d17cbf9edf --- /dev/null +++ b/fs/intermezzo/super.c @@ -0,0 +1,528 @@ +/* + * presto's super.c + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * + * + */ + + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> + +#ifdef PRESTO_DEBUG +long presto_vmemory = 0; +long presto_kmemory = 0; +#endif + +extern struct presto_cache *presto_init_cache(void); +extern inline void presto_cache_add(struct presto_cache *cache, kdev_t dev); +extern inline void presto_init_cache_hash(void); + +int presto_remount(struct super_block *, int *, char *); +extern ssize_t presto_file_write(struct file *file, const char *buf, + size_t size, loff_t *off); + +/* + * Reading the super block. + * + * + * + */ + +/* returns an allocated string, copied out from data if opt is found */ +static char *read_opt(const char *opt, char *data) +{ + char *value; + char *retval; + + CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); + if ( strncmp(opt, data, strlen(opt)) ) + return NULL; + + if ( (value = strchr(data, '=')) == NULL ) + return NULL; + + value++; + PRESTO_ALLOC(retval, char *, strlen(value) + 1); + if ( !retval ) { + printk("InterMezzo: Out of memory!\n"); + return NULL; + } + + strcpy(retval, value); + CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); + return retval; +} + +static void store_opt(char **dst, char *opt, char *defval) +{ + if (dst) { + if (*dst) { + PRESTO_FREE(*dst, strlen(*dst) + 1); + } + *dst = opt; + } else { + printk("presto: store_opt, error dst == NULL\n"); + } + + + if (!opt && defval) { + char *def_alloced; + PRESTO_ALLOC(def_alloced, char *, strlen(defval)+1); + strcpy(def_alloced, defval); + *dst = def_alloced; + } +} + + +/* Find the options for InterMezzo in "options", saving them into the + * passed pointers. If the pointer is null, the option is discarded. + * Copy out all non-InterMezzo options into cache_data (to be passed + * to the read_super operation of the cache). The return value will + * be a pointer to the end of the cache_data. + */ +static char *presto_options(char *options, char *cache_data, + char **cache_type, char **fileset, + char **prestodev, char **mtpt) +{ + char *this_char; + char *cache_data_end = cache_data; + + if (!options || !cache_data) + return cache_data_end; + + /* set the defaults */ + store_opt(cache_type, NULL, "ext3"); + store_opt(prestodev, NULL, PRESTO_PSDEV_NAME "0"); + + CDEBUG(D_SUPER, "parsing options\n"); + for (this_char = strtok (options, ","); + this_char != NULL; + this_char = strtok (NULL, ",")) { + char *opt; + CDEBUG(D_SUPER, "this_char %s\n", this_char); + + if ( (opt = read_opt("fileset", this_char)) ) { + store_opt(fileset, opt, NULL); + continue; + } + if ( (opt = read_opt("cache_type", this_char)) ) { + store_opt(cache_type, opt, "ext3"); + continue; + } + if ( (opt = read_opt("mtpt", this_char)) ) { + store_opt(mtpt, opt, NULL); + continue; + } + if ( (opt = read_opt("prestodev", this_char)) ) { + store_opt(prestodev, opt, PRESTO_PSDEV_NAME); + continue; + } + + cache_data_end += sprintf(cache_data_end, "%s%s", + cache_data_end != cache_data ? ",":"", + this_char); + } + + return cache_data_end; +} + +/* + map a /dev/intermezzoX path to a minor: + used to validate mount options passed to InterMezzo + */ +static int presto_get_minor(char *dev_path, int *minor) +{ + struct nameidata nd; + struct dentry *dentry; + kdev_t devno = 0; + int error; + ENTRY; + + /* Special case for root filesystem - use minor 0 always. */ + if ( current->pid == 1 ) { + *minor = 0; + return 0; + } + + error = presto_walk(dev_path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + error = -ENODEV; + if (!dentry->d_inode) { + EXIT; + goto out; + } + + if (!S_ISCHR(dentry->d_inode->i_mode)) { + EXIT; + goto out; + } + + devno = dentry->d_inode->i_rdev; + if ( MAJOR(devno) != PRESTO_PSDEV_MAJOR ) { + EXIT; + goto out; + } + + if ( MINOR(devno) >= MAX_PRESTODEV ) { + EXIT; + goto out; + } + + EXIT; + out: + *minor = MINOR(devno); + path_release(&nd); + return 0; +} + +/* We always need to remove the presto options before passing to bottom FS */ +struct super_block * presto_read_super(struct super_block * presto_sb, + void * data, int silent) +{ + struct super_block *mysb = NULL; + struct file_system_type *fstype; + struct presto_cache *cache = NULL; + char *cache_data = NULL; + char *cache_data_end; + char *cache_type = NULL; + char *fileset = NULL; + char *presto_mtpt = NULL; + char *prestodev = NULL; + struct filter_fs *ops; + int minor; + struct upc_comm *psdev; + + ENTRY; + CDEBUG(D_MALLOC, "before parsing: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + + /* reserve space for the cache's data */ + PRESTO_ALLOC(cache_data, void *, PAGE_SIZE); + if ( !cache_data ) { + printk("presto_read_super: Cannot allocate data page.\n"); + EXIT; + goto out_err; + } + + CDEBUG(D_SUPER, "mount opts: %s\n", data ? (char *)data : "(none)"); + + /* read and validate options */ + cache_data_end = presto_options(data, cache_data, &cache_type, &fileset, + &prestodev, &presto_mtpt); + + /* was there anything for the cache filesystem in the data? */ + if (cache_data_end == cache_data) { + PRESTO_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } else { + CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, + cache_data); + } + + /* prepare the communication channel */ + if ( presto_get_minor(prestodev, &minor) ) { + /* if (!silent) */ + printk("InterMezzo: %s not a valid presto dev\n", prestodev); + EXIT; + goto out_err; + } + psdev = &upc_comms[minor]; + CDEBUG(D_SUPER, "\n"); + psdev->uc_no_filter = 1; + + CDEBUG(D_SUPER, "presto minor is %d\n", minor); + + /* set up the cache */ + cache = presto_init_cache(); + if ( !cache ) { + printk("presto_read_super: failure allocating cache.\n"); + EXIT; + goto out_err; + } + + /* no options were passed: likely we are "/" readonly */ + if ( !presto_mtpt || !fileset ) { + cache->cache_flags |= CACHE_LENTO_RO | CACHE_CLIENT_RO; + } + cache->cache_psdev = psdev; + /* no options were passed: likely we are "/" readonly */ + /* before the journaling infrastructure can work, these + need to be set; that happens in presto_remount */ + if ( !presto_mtpt || !fileset ) { + if (!presto_mtpt) + printk("No mountpoint marking cache RO\n"); + if (!fileset) + printk("No fileset marking cache RO\n"); + cache->cache_flags |= CACHE_LENTO_RO | CACHE_CLIENT_RO; + } + + cache->cache_mtpt = presto_mtpt; + cache->cache_root_fileset = fileset; + cache->cache_type = cache_type; + + printk("Presto: type=%s, vol=%s, dev=%s (minor %d), mtpt %s, flags %x\n", + cache_type, fileset ? fileset : "NULL", prestodev, minor, + presto_mtpt ? presto_mtpt : "NULL", cache->cache_flags); + + + MOD_INC_USE_COUNT; + fstype = get_fs_type(cache_type); + + cache->cache_filter = filter_get_filter_fs((const char *)cache_type); + if ( !fstype || !cache->cache_filter) { + printk("Presto: unrecognized fs type or cache type\n"); + MOD_DEC_USE_COUNT; + EXIT; + goto out_err; + } + mysb = fstype->read_super(presto_sb, cache_data, silent); + /* this might have been freed above */ + if (cache_data) { + PRESTO_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } + if ( !mysb ) { + /* if (!silent) */ + printk("InterMezzo: cache mount failure.\n"); + MOD_DEC_USE_COUNT; + EXIT; + goto out_err; + } + + cache->cache_sb = mysb; + ops = filter_get_filter_fs(cache_type); + + filter_setup_journal_ops(cache->cache_filter, cache->cache_type); + + /* we now know the dev of the cache: hash the cache */ + presto_cache_add(cache, mysb->s_dev); + + /* make sure we have our own super operations: mysb + still contains the cache operations */ + filter_setup_super_ops(cache->cache_filter, mysb->s_op, + &presto_super_ops); + mysb->s_op = filter_c2usops(cache->cache_filter); + + /* now get our own directory operations */ + if ( mysb->s_root && mysb->s_root->d_inode ) { + CDEBUG(D_SUPER, "\n"); + filter_setup_dir_ops(cache->cache_filter, + mysb->s_root->d_inode, + &presto_dir_iops, &presto_dir_fops); + mysb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter); + CDEBUG(D_SUPER, "lookup at %p\n", + mysb->s_root->d_inode->i_op->lookup); + filter_setup_dentry_ops(cache->cache_filter, + mysb->s_root->d_op, + &presto_dentry_ops); + presto_sb->s_root->d_op = filter_c2udops(cache->cache_filter); + cache->cache_mtde = mysb->s_root; + presto_set_dd(mysb->s_root); + } + + CDEBUG(D_MALLOC, "after mounting: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + + EXIT; + return mysb; + + out_err: + CDEBUG(D_SUPER, "out_err called\n"); + if (cache) + PRESTO_FREE(cache, sizeof(struct presto_cache)); + if (cache_data) + PRESTO_FREE(cache_data, PAGE_SIZE); + if (fileset) + PRESTO_FREE(fileset, strlen(fileset) + 1); + if (presto_mtpt) + PRESTO_FREE(presto_mtpt, strlen(presto_mtpt) + 1); + if (prestodev) + PRESTO_FREE(prestodev, strlen(prestodev) + 1); + if (cache_type) + PRESTO_FREE(cache_type, strlen(cache_type) + 1); + + CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + return NULL; +} + +int presto_remount(struct super_block * sb, int *flags, char *data) +{ + char *cache_data = NULL; + char *cache_data_end; + char **type; + char **fileset; + char **mtpt; + char **prestodev; + struct super_operations *sops; + struct presto_cache *cache = NULL; + int err = 0; + + ENTRY; + CDEBUG(D_MALLOC, "before remount: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + CDEBUG(D_SUPER, "remount opts: %s\n", data ? (char *)data : "(none)"); + if (data) { + /* reserve space for the cache's data */ + PRESTO_ALLOC(cache_data, void *, PAGE_SIZE); + if ( !cache_data ) { + err = -ENOMEM; + EXIT; + goto out_err; + } + } + + cache = presto_find_cache(sb->s_dev); + if (!cache) { + printk(__FUNCTION__ ": cannot find cache on remount\n"); + err = -ENODEV; + EXIT; + goto out_err; + } + + /* If an option has not yet been set, we allow it to be set on + * remount. If an option already has a value, we pass NULL for + * the option pointer, which means that the InterMezzo option + * will be parsed but discarded. + */ + type = cache->cache_type ? NULL : &cache->cache_type; + fileset = cache->cache_root_fileset ? NULL : &cache->cache_root_fileset; + prestodev = cache->cache_psdev ? NULL : &cache->cache_psdev->uc_devname; + mtpt = cache->cache_mtpt ? NULL : &cache->cache_mtpt; + cache_data_end = presto_options(data, cache_data, type, fileset, + prestodev, mtpt); + + if (cache_data) { + if (cache_data_end == cache_data) { + PRESTO_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } else { + CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, + cache_data); + } + } + + if (cache->cache_root_fileset && cache->cache_mtpt) { + cache->cache_flags &= ~(CACHE_LENTO_RO|CACHE_CLIENT_RO); + } + + sops = filter_c2csops(cache->cache_filter); + if (sops->remount_fs) { + err = sops->remount_fs(sb, flags, cache_data); + } + + CDEBUG(D_MALLOC, "after remount: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + EXIT; +out_err: + if (cache_data) + PRESTO_FREE(cache_data, PAGE_SIZE); + return err; +} + +struct file_system_type presto_fs_type = { +#ifdef PRESTO_DEVEL + "izofs", +#else + "intermezzo", +#endif + FS_REQUIRES_DEV, /* can use Ibaskets when ext2 does */ + presto_read_super, + NULL +}; + + +int /* __init */ init_intermezzo_fs(void) +{ + int status; + + printk(KERN_INFO "InterMezzo Kernel/Lento communications, " + "v1.04, braam@inter-mezzo.org\n"); + + status = presto_psdev_init(); + if ( status ) { + printk("Problem (%d) in init_intermezzo_psdev\n", status); + return status; + } + + status = init_intermezzo_sysctl(); + if (status) { + printk("presto: failed in init_intermezzo_sysctl!\n"); + } + + presto_init_cache_hash(); + presto_init_ddata_cache(); + + status = register_filesystem(&presto_fs_type); + if (status) { + printk("presto: failed in register_filesystem!\n"); + } + return status; +} + + +#ifdef MODULE +MODULE_AUTHOR("Peter J. Braam <braam@inter-mezzo.org>"); +MODULE_DESCRIPTION("InterMezzo Kernel/Lento communications, v1.0.5.1"); + +int init_module(void) +{ + return init_intermezzo_fs(); +} + + +void cleanup_module(void) +{ + int err; + + ENTRY; + + if ( (err = unregister_filesystem(&presto_fs_type)) != 0 ) { + printk("presto: failed to unregister filesystem\n"); + } + + presto_psdev_cleanup(); + cleanup_intermezzo_sysctl(); + presto_cleanup_ddata_cache(); + +#ifdef PRESTO_DEVEL + unregister_chrdev(PRESTO_PSDEV_MAJOR, "intermezzo_psdev_devel"); +#else + unregister_chrdev(PRESTO_PSDEV_MAJOR, "intermezzo_psdev"); +#endif + CDEBUG(D_MALLOC, "after cleanup: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); +} + +#endif + diff --git a/fs/intermezzo/sysctl.c b/fs/intermezzo/sysctl.c new file mode 100644 index 000000000000..2e31a273eda2 --- /dev/null +++ b/fs/intermezzo/sysctl.c @@ -0,0 +1,361 @@ +/* + * Sysctrl entries for Intermezzo! + */ + +#define __NO_VERSION__ +#include <linux/config.h> /* for CONFIG_PROC_FS */ +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <linux/swapctl.h> +#include <linux/proc_fs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <asm/bitops.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/utsname.h> +#include <linux/blk.h> + + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_upcall.h> + +/* /proc entries */ + +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_fs_intermezzo; +int intermezzo_mount_get_info( char * buffer, char ** start, off_t offset, + int length) +{ + int len=0; + + /* this works as long as we are below 1024 characters! */ + len += presto_sprint_mounts(buffer, length, -1); + + *start = buffer + offset; + len -= offset; + + if ( len < 0 ) + return -EINVAL; + + return len; +} + +#endif + + +/* SYSCTL below */ + +static struct ctl_table_header *intermezzo_table_header = NULL; +/* 0x100 to avoid any chance of collisions at any point in the tree with + * non-directories + */ +#define PSDEV_INTERMEZZO (0x100) + +#define PSDEV_DEBUG 1 /* control debugging */ +#define PSDEV_TRACE 2 /* control enter/leave pattern */ +#define PSDEV_TIMEOUT 3 /* timeout on upcalls to become intrble */ +#define PSDEV_HARD 4 /* mount type "hard" or "soft" */ +#define PSDEV_NO_FILTER 5 /* controls presto_chk */ +#define PSDEV_NO_JOURNAL 6 /* controls presto_chk */ +#define PSDEV_NO_UPCALL 7 /* controls lento_upcall */ +#define PSDEV_ERRORVAL 8 /* controls presto_debug_fail_blkdev */ +#define PSDEV_EXCL_GID 9 /* which GID is ignored by presto */ +#define PSDEV_ILOOKUP_UID 10 /* which UID bypasses file access perms */ +#define PSDEV_BYTES_TO_CLOSE 11 /* bytes to write before close */ + +/* These are global presto control options */ +#define PRESTO_PRIMARY_CTLCNT 4 +static struct ctl_table presto_table[ PRESTO_PRIMARY_CTLCNT + MAX_PRESTODEV + 1] = +{ + {PSDEV_DEBUG, "debug", &presto_debug, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_TRACE, "trace", &presto_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_EXCL_GID, "presto_excluded_gid", &presto_excluded_gid, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_ILOOKUP_UID, "presto_ilookup_uid", &presto_ilookup_uid, sizeof(int), 0644, NULL, &proc_dointvec}, +}; + +/* + * Intalling the sysctl entries: strategy + * - have templates for each /proc/sys/intermezzo/ entry + * such an entry exists for each /dev/presto + * (proto_prestodev_entry) + * - have a template for the contents of such directories + * (proto_psdev_table) + * - have the master table (presto_table) + * + * When installing, malloc, memcpy and fix up the pointers to point to + * the appropriate constants in upc_comms[your_minor] + */ + +static ctl_table proto_psdev_table[] = { + {PSDEV_HARD, "hard", 0, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_NO_FILTER, "no_filter", 0, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_NO_JOURNAL, "no_journal", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_NO_UPCALL, "no_upcall", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_TIMEOUT, "timeout", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_TRACE, "trace", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_DEBUG, "debug", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, +#ifdef PRESTO_DEBUG + {PSDEV_ERRORVAL, "errorval", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif + { 0 } +}; + +static ctl_table proto_prestodev_entry = { + PSDEV_INTERMEZZO, 0, NULL, 0, 0555, 0, +}; + +static ctl_table intermezzo_table[2] = { + {PSDEV_INTERMEZZO, "intermezzo", NULL, 0, 0555, presto_table}, + {0} +}; + +/* support for external setting and getting of opts. */ +/* particularly via ioctl. The Right way to do this is via sysctl, + * but that will have to wait until intermezzo gets its own nice set of + * sysctl IDs + */ +/* we made these separate as setting may in future be more restricted + * than getting + */ +int dosetopt(int minor, struct psdev_opt *opt) +{ + int retval = 0; + int newval = opt->optval; + + ENTRY; + + switch(opt->optname) { + + case PSDEV_TIMEOUT: + upc_comms[minor].uc_timeout = newval; + break; + + case PSDEV_HARD: + upc_comms[minor].uc_hard = newval; + break; + + case PSDEV_NO_FILTER: + upc_comms[minor].uc_no_filter = newval; + break; + + case PSDEV_NO_JOURNAL: + upc_comms[minor].uc_no_journal = newval; + break; + + case PSDEV_NO_UPCALL: + upc_comms[minor].uc_no_upcall = newval; + break; + +#ifdef PRESTO_DEBUG + case PSDEV_ERRORVAL: { + /* If we have a positive arg, set a breakpoint for that + * value. If we have a negative arg, make that device + * read-only. FIXME It would be much better to only + * allow setting the underlying device read-only for the + * current presto cache. + */ + int errorval = upc_comms[minor].uc_errorval; + if (errorval < 0) { + if (newval == 0) + set_device_ro(-errorval, 0); + else + printk("device %s already read only\n", + kdevname(-errorval)); + } else { + if (newval < 0) + set_device_ro(-newval, 1); + upc_comms[minor].uc_errorval = newval; + CDEBUG(D_PSDEV, "setting errorval to %d\n", newval); + } + + break; + } +#endif + + case PSDEV_TRACE: + case PSDEV_DEBUG: + case PSDEV_BYTES_TO_CLOSE: + default: + CDEBUG(D_PSDEV, + "ioctl: dosetopt: minor %d, bad optname 0x%x, \n", + minor, opt->optname); + + retval = -EINVAL; + } + + EXIT; + return retval; +} + +int dogetopt(int minor, struct psdev_opt *opt) +{ + int retval = 0; + + ENTRY; + + switch(opt->optname) { + + case PSDEV_TIMEOUT: + opt->optval = upc_comms[minor].uc_timeout; + break; + + case PSDEV_HARD: + opt->optval = upc_comms[minor].uc_hard; + break; + + case PSDEV_NO_FILTER: + opt->optval = upc_comms[minor].uc_no_filter; + break; + + case PSDEV_NO_JOURNAL: + opt->optval = upc_comms[minor].uc_no_journal; + break; + + case PSDEV_NO_UPCALL: + opt->optval = upc_comms[minor].uc_no_upcall; + break; + +#ifdef PSDEV_DEBUG + case PSDEV_ERRORVAL: { + int errorval = upc_comms[minor].uc_errorval; + if (errorval < 0 && is_read_only(-errorval)) + printk(KERN_INFO "device %s has been set read-only\n", + kdevname(-errorval)); + opt->optval = upc_comms[minor].uc_errorval; + break; + } +#endif + + case PSDEV_TRACE: + case PSDEV_DEBUG: + case PSDEV_BYTES_TO_CLOSE: + default: + CDEBUG(D_PSDEV, + "ioctl: dogetopt: minor %d, bad optval 0x%x, \n", + minor, opt->optname); + + retval = -EINVAL; + } + + EXIT; + return retval; +} + + + +int /* __init */ init_intermezzo_sysctl(void) +{ + int i; + extern struct upc_comm upc_comms[MAX_PRESTODEV]; + + /* allocate the tables for the presto devices. We need + * sizeof(proto_prestodev_table)/sizeof(proto_prestodev_table[0]) + * entries for each dev + */ + int total_dev = MAX_PRESTODEV; + int entries_per_dev = sizeof(proto_psdev_table) / + sizeof(proto_psdev_table[0]); + int total_entries = entries_per_dev * total_dev; + ctl_table *dev_ctl_table; + + PRESTO_ALLOC(dev_ctl_table, ctl_table *, + sizeof(ctl_table) * total_entries); + + if (! dev_ctl_table) { + printk("WARNING: presto couldn't allocate dev_ctl_table\n"); + EXIT; + return -ENOMEM; + } + + /* now fill in the entries ... we put the individual presto<x> + * entries at the end of the table, and the per-presto stuff + * starting at the front. We assume that the compiler makes + * this code more efficient, but really, who cares ... it + * happens once per reboot. + */ + for(i = 0; i < total_dev; i++) { + /* entry for this /proc/sys/intermezzo/intermezzo"i" */ + ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT]; + /* entries for the individual "files" in this "directory" */ + ctl_table *psdev_entries = &dev_ctl_table[i * entries_per_dev]; + /* init the psdev and psdev_entries with the prototypes */ + *psdev = proto_prestodev_entry; + memcpy(psdev_entries, proto_psdev_table, + sizeof(proto_psdev_table)); + /* now specialize them ... */ + /* the psdev has to point to psdev_entries, and fix the number */ + psdev->ctl_name = psdev->ctl_name + i + 1; /* sorry */ + + psdev->procname = kmalloc(32, GFP_KERNEL); + if (!psdev->procname) { + PRESTO_FREE(dev_ctl_table, + sizeof(ctl_table) * total_entries); + return -ENOMEM; + } + sprintf((char *) psdev->procname, "intermezzo%d", i); + /* hook presto into */ + psdev->child = psdev_entries; + + /* now for each psdev entry ... */ + psdev_entries[0].data = &(upc_comms[i].uc_hard); + psdev_entries[1].data = &(upc_comms[i].uc_no_filter); + psdev_entries[2].data = &(upc_comms[i].uc_no_journal); + psdev_entries[3].data = &(upc_comms[i].uc_no_upcall); + psdev_entries[4].data = &(upc_comms[i].uc_timeout); + psdev_entries[5].data = &presto_print_entry; + psdev_entries[6].data = &presto_debug; +#ifdef PRESTO_DEBUG + psdev_entries[7].data = &(upc_comms[i].uc_errorval); +#endif + } + + +#ifdef CONFIG_SYSCTL + if ( !intermezzo_table_header ) + intermezzo_table_header = + register_sysctl_table(intermezzo_table, 0); +#endif +#ifdef CONFIG_PROC_FS + proc_fs_intermezzo = proc_mkdir("intermezzo", proc_root_fs); + proc_fs_intermezzo->owner = THIS_MODULE; + create_proc_info_entry("mounts", 0, proc_fs_intermezzo, + intermezzo_mount_get_info); +#endif + return 0; +} + +void cleanup_intermezzo_sysctl() { + int total_dev = MAX_PRESTODEV; + int entries_per_dev = sizeof(proto_psdev_table) / + sizeof(proto_psdev_table[0]); + int total_entries = entries_per_dev * total_dev; + int i; + +#ifdef CONFIG_SYSCTL + if ( intermezzo_table_header ) + unregister_sysctl_table(intermezzo_table_header); + intermezzo_table_header = NULL; +#endif + for(i = 0; i < total_dev; i++) { + /* entry for this /proc/sys/intermezzo/intermezzo"i" */ + ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT]; + kfree(psdev->procname); + } + /* presto_table[PRESTO_PRIMARY_CTLCNT].child points to the + * dev_ctl_table previously allocated in init_intermezzo_psdev() + */ + PRESTO_FREE(presto_table[PRESTO_PRIMARY_CTLCNT].child, sizeof(ctl_table) * total_entries); + +#if CONFIG_PROC_FS + remove_proc_entry("mounts", proc_fs_intermezzo); + remove_proc_entry("intermezzo", proc_root_fs); +#endif +} + diff --git a/fs/intermezzo/upcall.c b/fs/intermezzo/upcall.c new file mode 100644 index 000000000000..ebb40c0c07e6 --- /dev/null +++ b/fs/intermezzo/upcall.c @@ -0,0 +1,248 @@ +/* + * Mostly platform independent upcall operations to Venus: + * -- upcalls + * -- upcall routines + * + * Linux 2.0 version + * Copyright (C) 1996 Peter J. Braam <braam@cs.cmu.edu>, + * Michael Callahan <callahan@maths.ox.ac.uk> + * + * Redone for Linux 2.1 + * Copyright (C) 1997 Carnegie Mellon University + * + * Carnegie Mellon University encourages users of this code to contribute + * improvements to the Coda project. Contact Peter Braam <coda@cs.cmu.edu>. + * + * Much cleaned up for InterMezzo + * Copyright (C) 1998 Peter J. Braam <braam@cs.cmu.edu>, + * Copyright (C) 1999 Carnegie Mellon University + * + */ + +#include <asm/system.h> +#include <asm/segment.h> +#include <asm/signal.h> +#include <linux/signal.h> + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/string.h> +#include <asm/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/segment.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> + +/* + At present: four upcalls + - opendir: fetch a directory (synchronous & asynchronous) + - open: fetch file (synchronous) + - journal: send a journal page (asynchronous) + - permit: get a permit (synchronous) + + Errors returned here are positive. + + */ + + +#define INSIZE(tag) sizeof(struct lento_ ## tag ## _in) +#define OUTSIZE(tag) sizeof(struct lento_ ## tag ## _out) +#define SIZE(tag) ( (INSIZE(tag)>OUTSIZE(tag)) ? INSIZE(tag) : OUTSIZE(tag) ) + +#define UPARG(op)\ +do {\ + PRESTO_ALLOC(inp, union up_args *, insize);\ + if ( !inp ) { return -ENOMEM; }\ + outp = (union down_args *) (inp);\ + inp->uh.opcode = (op);\ + inp->uh.pid = current->pid;\ + inp->uh.uid = current->fsuid;\ + outsize = insize;\ +} while (0) + +#define BUFF_ALLOC(buffer) \ + PRESTO_ALLOC(buffer, char *, PAGE_SIZE); \ + if ( !buffer ) { \ + printk("PRESTO: out of memory!\n"); \ + return -ENOMEM; \ + } + +/* the upcalls */ +int lento_kml(int minor, unsigned int offset, unsigned int first_recno, + unsigned int length, unsigned int last_recno, int namelen, + char *fsetname) +{ + union up_args *inp; + union down_args *outp; + int insize, outsize, error; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return 0; + } + + insize = SIZE(kml) + namelen + 1; + UPARG(LENTO_KML); + inp->lento_kml.namelen = namelen; + memcpy(inp->lento_kml.fsetname, fsetname, namelen); + inp->lento_kml.fsetname[namelen] = '\0'; + inp->lento_kml.offset = offset; + inp->lento_kml.first_recno = first_recno; + inp->lento_kml.length = length; + inp->lento_kml.last_recno = last_recno; + + CDEBUG(D_UPCALL, "KML: fileset %s, offset %d, length %d, " + "first %d, last %d; minor %d\n", + inp->lento_kml.fsetname, + inp->lento_kml.offset, + inp->lento_kml.length, + inp->lento_kml.first_recno, + inp->lento_kml.last_recno, minor); + + error = lento_upcall(minor, insize, &outsize, inp, + ASYNCHRONOUS, NULL); + + EXIT; + return error; +} + +int lento_release_permit( int minor, int mycookie ) +{ + union up_args *inp; + union down_args *outp; + int insize, outsize, error; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return 0; + } + + insize= SIZE(response_cookie); + UPARG(LENTO_COOKIE); + inp->lento_response_cookie.cookie= mycookie; + + CDEBUG(D_UPCALL, "cookie %d\n", mycookie); + + error = lento_upcall(minor, insize, &outsize, inp, + ASYNCHRONOUS, NULL); + + EXIT; + return error; +} + +int lento_opendir(int minor, int pathlen, char *path, int async) +{ + union up_args *inp; + union down_args *outp; + int insize, outsize, error; + ENTRY; + + insize = SIZE(opendir) + pathlen + 1; + UPARG(LENTO_OPENDIR); + inp->lento_opendir.async = async; + inp->lento_opendir.pathlen = pathlen; + memcpy(inp->lento_opendir.path, path, pathlen); + inp->lento_opendir.path[pathlen] = '\0'; + + CDEBUG(D_UPCALL, "path %s\n", inp->lento_opendir.path); + + if (async) { + error = lento_upcall(minor, insize, &outsize, inp, + ASYNCHRONOUS, NULL); + return 0; + } + + error = lento_upcall(minor, insize, &outsize, inp, + SYNCHRONOUS, NULL); + if (error && error != EISFSETROOT) { + printk("lento_opendir: error %d\n", error); + } + + EXIT; + return error; +} + +int lento_open(int minor, int pathlen, char *path) +{ + union up_args *inp; + union down_args *outp; + int insize, outsize, error; + + ENTRY; + insize = SIZE(open) + pathlen + 1; + UPARG(LENTO_OPEN); + inp->lento_open.pathlen = pathlen; + memcpy(inp->lento_open.path, path, pathlen); + inp->lento_open.path[pathlen] = '\0'; + + CDEBUG(D_UPCALL, "path %s\n", inp->lento_open.path); + + error = lento_upcall(minor, insize, &outsize, inp, + SYNCHRONOUS, NULL); + if (error) { + printk("lento_open: error %d\n", error); + } + + EXIT; + return error; +} + + +int lento_permit(int minor, int pathlen, int fsetnamelen, char *path, char *fsetname) +{ + union up_args *inp; + union down_args *outp; + int insize, outsize, error; + ENTRY; + + insize = SIZE(permit) + pathlen + 1 + fsetnamelen + 1; + UPARG(LENTO_PERMIT); + inp->lento_permit.pathlen = pathlen; + inp->lento_permit.fsetnamelen = fsetnamelen; + + memcpy(inp->lento_permit.path, path, pathlen); + inp->lento_permit.path[pathlen] = '\0'; + + memcpy(&(inp->lento_permit.path[pathlen+1]), fsetname, fsetnamelen); + inp->lento_permit.path[fsetnamelen + 1 + pathlen] = '\0'; + + CDEBUG(D_UPCALL, "Permit minor %d path %s\n", minor, + inp->lento_permit.path); + + error = lento_upcall(minor, insize, &outsize, inp, + SYNCHRONOUS, NULL); + if (error) { + if( error == -EROFS ) { + int err; + printk("lento_permit: ERROR - requested permit for " + "read-only fileset.\n" + " Setting \"%s\" read-only!\n", + path); + err= presto_mark_cache(path, 0xFFFFFFFF, + CACHE_CLIENT_RO, NULL); + if( err ) { + printk("ERROR : mark_cache %d\n", err); + } + } + else { + printk("lento_permit: error %d\n", error); + } + } + + EXIT; + + return error; +} + diff --git a/fs/intermezzo/vfs.c b/fs/intermezzo/vfs.c new file mode 100644 index 000000000000..1f9eea0793fa --- /dev/null +++ b/fs/intermezzo/vfs.c @@ -0,0 +1,2360 @@ +/* + * vfs.c + * + * This file implements kernel downcalls from lento. + * + * Author: Rob Simmonds <simmonds@stelias.com> + * Andreas Dilger <adilger@stelias.com> + * Copyright (C) 2000 Stelias Computing Inc + * Copyright (C) 2000 Red Hat Inc. + * + * Extended attribute support + * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. + * + * This code is based on code from namei.c in the linux file system; + * see copyright notice below. + */ + +/** namei.c copyright **/ + +/* + * linux/fs/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * Some corrections by tytso. + */ + +/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname + * lookup logic. + */ + +/** end of namei.c copyright **/ + +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/smp_lock.h> +#include <linux/quotaops.h> + +#include <asm/uaccess.h> +#include <asm/unaligned.h> +#include <asm/semaphore.h> +#include <asm/pgtable.h> + +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/blk.h> + +#include <linux/intermezzo_fs.h> +#include <linux/intermezzo_upcall.h> +#include <linux/intermezzo_psdev.h> +#include <linux/intermezzo_kml.h> + +#ifdef CONFIG_FS_EXT_ATTR +#include <linux/ext_attr.h> + +#ifdef CONFIG_FS_POSIX_ACL +#include <linux/posix_acl.h> +#endif +#endif + +extern struct inode_operations presto_sym_iops; + +/* + * It's inline, so penalty for filesystems that don't use sticky bit is + * minimal. + */ +static inline int check_sticky(struct inode *dir, struct inode *inode) +{ + if (!(dir->i_mode & S_ISVTX)) + return 0; + if (inode->i_uid == current->fsuid) + return 0; + if (dir->i_uid == current->fsuid) + return 0; + return !capable(CAP_FOWNER); +} + +/* from linux/fs/namei.c */ +static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) +{ + int error; + if (!victim->d_inode || victim->d_parent->d_inode != dir) + return -ENOENT; + error = permission(dir,MAY_WRITE | MAY_EXEC); + if (error) + return error; + if (IS_APPEND(dir)) + return -EPERM; + if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| + IS_IMMUTABLE(victim->d_inode)) + return -EPERM; + if (isdir) { + if (!S_ISDIR(victim->d_inode->i_mode)) + return -ENOTDIR; + if (IS_ROOT(victim)) + return -EBUSY; + } else if (S_ISDIR(victim->d_inode->i_mode)) + return -EISDIR; + return 0; +} + +/* from linux/fs/namei.c */ +static inline int may_create(struct inode *dir, struct dentry *child) { + if (child->d_inode) + return -EEXIST; + if (IS_DEADDIR(dir)) + return -ENOENT; + return permission(dir,MAY_WRITE | MAY_EXEC); +} + +#ifdef PRESTO_DEBUG +/* The loop_discard_io() function is available via a kernel patch to the + * loop block device. It "works" by accepting writes, but throwing them + * away, rather than trying to write them to disk. The old method worked + * by setting the underlying device read-only, but that has the problem + * that dirty buffers are kept in memory, and ext3 didn't like that at all. + */ +#ifdef CONFIG_LOOP_DISCARD +#define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail) +#else +#define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1) +#endif + +/* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval, + * that is the same as "value", the underlying device will "fail" now. + */ +inline void presto_debug_fail_blkdev(struct presto_file_set *fset, + unsigned long value) +{ + int minor = presto_f2m(fset); + int errorval = upc_comms[minor].uc_errorval; + kdev_t dev = fset->fset_mtpt->d_inode->i_dev; + + if (errorval && errorval == (long)value && !is_read_only(dev)) { + CDEBUG(D_SUPER, "setting device %s read only\n", kdevname(dev)); + BLKDEV_FAIL(dev, 1); + upc_comms[minor].uc_errorval = -dev; + } +} +#else +#define presto_debug_fail_blkdev(dev,value) do {} while (0) +#endif + + +static inline int presto_do_kml(struct lento_vfs_context *info, struct inode* inode) +{ + if ( ! (info->flags & LENTO_FL_KML) ) + return 0; + if ( inode->i_gid == presto_excluded_gid ) + return 0; + return 1; +} + +static inline int presto_do_expect(struct lento_vfs_context *info, struct inode *inode) +{ + if ( ! (info->flags & LENTO_FL_EXPECT) ) + return 0; + if ( inode->i_gid == presto_excluded_gid ) + return 0; + return 1; +} + +int presto_settime(struct presto_file_set *fset, + struct dentry *dentry, + struct lento_vfs_context *ctx, + int valid) +{ + int error; + struct inode *inode = dentry->d_inode; + struct inode_operations *iops; + struct iattr iattr; + + ENTRY; + if (ctx->flags & LENTO_FL_IGNORE_TIME ) { + EXIT; + return 0; + } + iattr.ia_ctime = ctx->updated_time; + iattr.ia_mtime = ctx->updated_time; + iattr.ia_valid = valid; + + error = -EROFS; + if (IS_RDONLY(inode)) { + EXIT; + return -EROFS; + } + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + EXIT; + return -EPERM; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops && + !iops->setattr) { + EXIT; + return error; + } + + if (iops->setattr != NULL) + error = iops->setattr(dentry, &iattr); + else { + error = 0; // we suppose no error, Arthur + inode_setattr(dentry->d_inode, &iattr); + } + EXIT; + return error; +} + + +int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry, + struct iattr *iattr, struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode *inode = dentry->d_inode; + struct inode_operations *iops; + int error; + struct presto_version old_ver, new_ver; + void *handle; + off_t old_size=inode->i_size; + + ENTRY; + error = -EROFS; + if (IS_RDONLY(inode)) { + EXIT; + return -EROFS; + } + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + EXIT; + return -EPERM; + } + + presto_getversion(&old_ver, dentry->d_inode); + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops && + !iops->setattr) { + EXIT; + return error; + } + + error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH); + if (error) { + EXIT; + return error; + } + + if (iattr->ia_valid & ATTR_SIZE) { + handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_TRUNC); + } else { + handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_SETATTR); + } + + if ( IS_ERR(handle) ) { + printk("presto_do_setattr: no space for transaction\n"); + presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); + return -ENOSPC; + } + + if (dentry->d_inode && iops->setattr) { + error = iops->setattr(dentry, iattr); + } else { + error = inode_change_ok(dentry->d_inode, iattr); + if (!error) + inode_setattr(inode, iattr); + } + + if (!error && (iattr->ia_valid & ATTR_SIZE)) + vmtruncate(inode, iattr->ia_size); + + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_SETATTR | 0x10); + + if ( presto_do_kml(info, dentry->d_inode) ) { + if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) { + struct file file; + /* Journal a close whenever we see a potential truncate + * At the receiving end, lento should explicitly remove + * ATTR_SIZE from the list of valid attributes */ + presto_getversion(&new_ver, inode); + file.private_data = NULL; + file.f_dentry = dentry; + error=presto_journal_close(&rec, fset, &file, dentry, &new_ver); + } + + if (!error) + error = presto_journal_setattr(&rec, fset, dentry, &old_ver, iattr); + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_SETATTR | 0x20); + if ( presto_do_expect(info, dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_SETATTR | 0x30); + + EXIT; +exit: + presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); + presto_trans_commit(fset, handle); + return error; +} + +int lento_setattr(const char *name, struct iattr *iattr, + struct lento_vfs_context *info) +{ + struct nameidata nd; + struct dentry *dentry; + struct presto_file_set *fset; + int error; +#ifdef CONFIG_FS_POSIX_ACL + int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL; +#endif + + ENTRY; + CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n", + name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, + iattr->ia_gid, iattr->ia_size); + CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n", + iattr->ia_atime, iattr->ia_mtime, iattr->ia_ctime, + iattr->ia_attr_flags); + CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n", + info->slot_offset, info->recno, info->flags); + + lock_kernel(); + error = presto_walk(name, &nd); + if (error) { + EXIT; + goto exit; + } + dentry = nd.dentry; + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit_lock; + } + + /* NOTE: this prevents us from changing the filetype on setattr, + * as we normally only want to change permission bits. + * If this is not correct, then we need to fix the perl code + * to always send the file type OR'ed with the permission. + */ + if (iattr->ia_valid & ATTR_MODE) { + int set_mode = iattr->ia_mode; + iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) | + (dentry->d_inode->i_mode & ~S_IALLUGO); + CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n", + dentry->d_inode->i_mode, set_mode, iattr->ia_mode); +#ifdef CONFIG_FS_POSIX_ACL + /* ACl code interacts badly with setattr + * since it tries to modify the ACL using + * set_ext_attr which recurses back into presto. + * This only happens if ATTR_MODE is set. + * Here we are doing a "forced" mode set + * (initiated by lento), so we disable the + * set_posix_acl operation which + * prevents such recursion. -SHP + * + * This will probably still be required when native + * acl journalling is in place. + */ + set_posix_acl=dentry->d_inode->i_op->set_posix_acl; + dentry->d_inode->i_op->set_posix_acl=NULL; +#endif + } + + error = presto_do_setattr(fset, dentry, iattr, info); + +#ifdef CONFIG_FS_POSIX_ACL + /* restore the inode_operations if we changed them*/ + if (iattr->ia_valid & ATTR_MODE) + dentry->d_inode->i_op->set_posix_acl=set_posix_acl; +#endif + + + EXIT; +exit_lock: + path_release(&nd); +exit: + unlock_kernel(); + return error; +} + +int presto_do_statfs (struct presto_file_set *fset, + struct statfs * buf) +{ + struct super_operations *sops; + struct super_block *sb; + int result; + ENTRY; + + if ( !fset ) { + EXIT; + return -EINVAL; + } + if ( !fset->fset_cache ) { + EXIT; + return -EINVAL; + } + if ( !fset->fset_cache->cache_filter ) { + EXIT; + return -EINVAL; + } + + sops = filter_c2csops(fset->fset_cache->cache_filter); + if ( ! sops ) { + EXIT; + return -EINVAL; + } + if ( ! fset->fset_cache->cache_mtde ) { + EXIT; + return -EINVAL; + } + + if ( ! fset->fset_cache->cache_mtde->d_inode ) { + EXIT; + return -EINVAL; + } + + if ( ! fset->fset_cache->cache_mtde->d_inode->i_sb ) { + EXIT; + return -EINVAL; + } + sb = fset->fset_cache->cache_mtde->d_inode->i_sb; + + if (sops->statfs) { + mm_segment_t old_fs = get_fs(); + memset(buf, 0, sizeof(struct statfs)); + set_fs(get_ds()); + lock_kernel(); + result = sops->statfs(sb, buf); + unlock_kernel(); + set_fs(old_fs); + } else { + result = -EINVAL; + } + + EXIT; + return result; +} + +int presto_do_create(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, new_file_ver; + struct inode_operations *iops; + void *handle; + + ENTRY; + mode &= S_IALLUGO; + mode |= S_IFREG; + + down(&dir->d_inode->i_zombie); + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_pre_lock; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->create) { + EXIT; + goto exit_pre_lock; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_CREATE); + if ( IS_ERR(handle) ) { + EXIT; + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + printk("presto_do_create: no space for transaction\n"); + error=-ENOSPC; + goto exit_pre_lock; + } + DQUOT_INIT(dir->d_inode); + lock_kernel(); + error = iops->create(dir->d_inode, dentry, mode); + if (error) { + EXIT; + goto exit_lock; + } + + if (dentry->d_inode && + dentry->d_inode->i_gid != presto_excluded_gid) { + struct presto_cache *cache = fset->fset_cache; + /* was this already done? */ + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + + /* if Lento creates this file, we won't have data */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + error = presto_settime(fset, dir, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit_lock; + } + error = presto_settime(fset, dentry, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit_lock; + } + + + presto_debug_fail_blkdev(fset, PRESTO_OP_CREATE | 0x10); + presto_getversion(&new_file_ver, dentry->d_inode); + if ( presto_do_kml(info, dentry->d_inode) ) + error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver, + &new_file_ver, + dentry->d_inode->i_mode); + + presto_debug_fail_blkdev(fset, PRESTO_OP_CREATE | 0x20); + if ( presto_do_expect(info, dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_CREATE | 0x30); + EXIT; + + exit_lock: + unlock_kernel(); + presto_trans_commit(fset, handle); + exit_pre_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + up(&dir->d_inode->i_zombie); + return error; +} + +/* from namei.c */ +static struct dentry *lookup_create(struct nameidata *nd, int is_dir) +{ + struct dentry *dentry; + + down(&nd->dentry->d_inode->i_sem); + dentry = ERR_PTR(-EEXIST); + if (nd->last_type != LAST_NORM) + goto fail; + dentry = lookup_hash(&nd->last, nd->dentry); + if (IS_ERR(dentry)) + goto fail; + if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) + goto enoent; + return dentry; +enoent: + dput(dentry); + dentry = ERR_PTR(-ENOENT); +fail: + return dentry; +} + +int lento_create(const char *name, int mode, struct lento_vfs_context *info) +{ + int error; + struct nameidata nd; + char * pathname; + struct dentry *dentry; + struct presto_file_set *fset; + + ENTRY; + pathname = getname(name); + error = PTR_ERR(pathname); + if (IS_ERR(pathname)) { + EXIT; + goto exit; + } + + /* this looks up the parent */ +// if (path_init(pathname, LOOKUP_FOLLOW | LOOKUP_POSITIVE, &nd)) + if (path_init(pathname, LOOKUP_PARENT, &nd)) + error = path_walk(pathname, &nd); + if (error) { + EXIT; + goto exit; + } + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + EXIT; + goto exit_lock; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit_lock; + } + error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG, + info); + + EXIT; + + exit_lock: + path_release (&nd); + dput(dentry); + up(&dentry->d_parent->d_inode->i_sem); + putname(pathname); +exit: + return error; +} + +int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry, + struct dentry *dir, struct dentry *new_dentry, + struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode *inode; + int error; + struct inode_operations *iops; + struct presto_version tgt_dir_ver; + struct presto_version new_link_ver; + void *handle; + + down(&dir->d_inode->i_zombie); + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + error = -ENOENT; + inode = old_dentry->d_inode; + if (!inode) + goto exit_lock; + + error = may_create(dir->d_inode, new_dentry); + if (error) + goto exit_lock; + + error = -EXDEV; + if (dir->d_inode->i_dev != inode->i_dev) + goto exit_lock; + + /* + * A link to an append-only or immutable file cannot be created. + */ + error = -EPERM; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { + EXIT; + goto exit_lock; + } + + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->link) { + EXIT; + goto exit_lock; + } + + + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_LINK); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + printk("presto_do_link: no space for transaction\n"); + return -ENOSPC; + } + + DQUOT_INIT(dir->d_inode); + lock_kernel(); + error = iops->link(old_dentry, dir->d_inode, new_dentry); + unlock_kernel(); + if (error) { + EXIT; + goto exit_lock; + } + + error = presto_settime(fset, dir, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit_lock; + } + error = presto_settime(fset, new_dentry, info, ATTR_CTIME); + if (error) { + EXIT; + goto exit_lock; + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_LINK | 0x10); + presto_getversion(&new_link_ver, new_dentry->d_inode); + if ( presto_do_kml(info, old_dentry->d_inode) ) + error = presto_journal_link(&rec, fset, old_dentry, new_dentry, + &tgt_dir_ver, &new_link_ver); + + presto_debug_fail_blkdev(fset, PRESTO_OP_LINK | 0x20); + if ( presto_do_expect(info, old_dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_LINK | 0x30); + EXIT; + presto_trans_commit(fset, handle); +exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + up(&dir->d_inode->i_zombie); + return error; +} + + +int lento_link(const char * oldname, const char * newname, + struct lento_vfs_context *info) +{ + int error; + char * from; + char * to; + struct presto_file_set *fset; + + from = getname(oldname); + if(IS_ERR(from)) + return PTR_ERR(from); + to = getname(newname); + error = PTR_ERR(to); + if (!IS_ERR(to)) { + struct dentry *new_dentry; + struct nameidata nd, old_nd; + + error = 0; + if (path_init(from, LOOKUP_POSITIVE, &old_nd)) + error = path_walk(from, &old_nd); + if (error) + goto exit; + if (path_init(to, LOOKUP_PARENT, &nd)) + error = path_walk(to, &nd); + if (error) + goto out; + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out; + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + + if (!IS_ERR(new_dentry)) { + fset = presto_fset(new_dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto out2; + } + error = presto_do_link(fset, old_nd.dentry, + nd.dentry, + new_dentry, info); + dput(new_dentry); + } + out2: + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); + out: + path_release(&old_nd); + exit: + putname(to); + } + putname(from); + + return error; +} + + +int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct inode_operations *iops; + struct presto_version tgt_dir_ver, old_file_ver; + void *handle; + int do_kml = 0, do_expect =0; + int linkno = 0; + ENTRY; + down(&dir->d_inode->i_zombie); + error = may_delete(dir->d_inode, dentry, 0); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->unlink) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + presto_getversion(&old_file_ver, dentry->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_UNLINK); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQLOW); + printk("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n"); + up(&dir->d_inode->i_zombie); + return -ENOSPC; + } + DQUOT_INIT(dir->d_inode); + if (d_mountpoint(dentry)) + error = -EBUSY; + else { + lock_kernel(); + linkno = dentry->d_inode->i_nlink; + if (linkno > 1) { + dget(dentry); + } + do_kml = presto_do_kml(info, dir->d_inode); + do_expect = presto_do_expect(info, dir->d_inode); + error = iops->unlink(dir->d_inode, dentry); + unlock_kernel(); + if (!error) + d_delete(dentry); + } + + if (linkno > 1) { + error = presto_settime(fset, dentry, info, ATTR_CTIME); + dput(dentry); + if (error) { + EXIT; + goto exit; + } + } + + error = presto_settime(fset, dir, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + up(&dir->d_inode->i_zombie); + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_UNLINK | 0x10); + if ( do_kml ) { + error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver, + &old_file_ver, + dentry->d_name.len, + dentry->d_name.name); + } + presto_debug_fail_blkdev(fset, PRESTO_OP_UNLINK | 0x20); + if ( do_expect ) { + error = presto_write_last_rcvd(&rec, fset, info); + } + presto_debug_fail_blkdev(fset, PRESTO_OP_UNLINK | 0x30); + EXIT; +exit: + presto_release_space(fset->fset_cache, PRESTO_REQLOW); + presto_trans_commit(fset, handle); + return error; +} + + +int lento_unlink(const char *pathname, struct lento_vfs_context *info) +{ + int error = 0; + char * name; + struct dentry *dentry; + struct nameidata nd; + struct presto_file_set *fset; + + ENTRY; + + name = getname(pathname); + if(IS_ERR(name)) + return PTR_ERR(name); + + if (path_init(name, LOOKUP_PARENT, &nd)) + error = path_walk(name, &nd); + if (error) + goto exit; + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit2; + } + /* Why not before? Because we want correct error value */ + if (nd.last.name[nd.last.len]) + goto slashes; + error = presto_do_unlink(fset, nd.dentry, dentry, info); + exit2: + EXIT; + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +exit1: + path_release(&nd); +exit: + putname(name); + + return error; + +slashes: + error = !dentry->d_inode ? -ENOENT : + S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; + goto exit2; +} + +int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, const char *oldname, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, new_link_ver; + struct inode_operations *iops; + void *handle; + + ENTRY; + down(&dir->d_inode->i_zombie); + /* record + max path len + space to free */ + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_lock; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->symlink) { + EXIT; + goto exit_lock; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_SYMLINK); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + printk("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n"); + EXIT; + return -ENOSPC; + } + DQUOT_INIT(dir->d_inode); + lock_kernel(); + error = iops->symlink(dir->d_inode, dentry, oldname); + if (error) { + EXIT; + goto exit; + } + + if (dentry->d_inode && + dentry->d_inode->i_gid != presto_excluded_gid) { + struct presto_cache *cache = fset->fset_cache; + + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + /* XXX ? Cache state ? if Lento creates a symlink */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + error = presto_settime(fset, dir, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + error = presto_settime(fset, dentry, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_SYMLINK | 0x10); + presto_getversion(&new_link_ver, dentry->d_inode); + if ( presto_do_kml(info, dentry->d_inode) ) + error = presto_journal_symlink(&rec, fset, dentry, oldname, + &tgt_dir_ver, &new_link_ver); + + presto_debug_fail_blkdev(fset, PRESTO_OP_SYMLINK | 0x20); + if ( presto_do_expect(info, dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_SYMLINK | 0x30); + EXIT; +exit: + unlock_kernel(); + presto_trans_commit(fset, handle); + exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + up(&dir->d_inode->i_zombie); + return error; +} + +int lento_symlink(const char *oldname, const char *newname, + struct lento_vfs_context *info) +{ + int error; + char *from; + char *to; + struct dentry *dentry; + struct presto_file_set *fset; + struct nameidata nd; + + ENTRY; + lock_kernel(); + from = getname(oldname); + error = PTR_ERR(from); + if (IS_ERR(from)) { + EXIT; + goto exit; + } + + to = getname(newname); + error = PTR_ERR(to); + if (IS_ERR(to)) { + EXIT; + goto exit_from; + } + + if (path_init(to, LOOKUP_PARENT, &nd)) + error = path_walk(to, &nd); + if (error) { + EXIT; + goto exit_to; + } + + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + path_release(&nd); + EXIT; + goto exit_to; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + path_release(&nd); + EXIT; + goto exit_lock; + } + error = presto_do_symlink(fset, nd.dentry, + dentry, oldname, info); + path_release(&nd); + EXIT; + exit_lock: + up(&nd.dentry->d_inode->i_sem); + dput(dentry); + exit_to: + putname(to); + exit_from: + putname(from); + exit: + unlock_kernel(); + return error; +} + +int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, new_dir_ver; + void *handle; + + ENTRY; + down(&dir->d_inode->i_zombie); + /* one journal record + directory block + room for removals*/ + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_lock; + } + + error = -EPERM; + if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) { + EXIT; + goto exit_lock; + } + + error = -ENOSPC; + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_MKDIR); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + printk("presto_do_mkdir: no space for transaction\n"); + goto exit_lock; + } + + DQUOT_INIT(dir->d_inode); + mode &= (S_IRWXUGO|S_ISVTX); + lock_kernel(); + error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode); + if (error) { + EXIT; + goto exit; + } + + if ( dentry->d_inode && !error && + dentry->d_inode->i_gid != presto_excluded_gid) { + struct presto_cache *cache = fset->fset_cache; + + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + /* if Lento does this, we won't have data */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + error = presto_settime(fset, dir, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + error = presto_settime(fset, dentry, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_MKDIR | 0x10); + presto_getversion(&new_dir_ver, dentry->d_inode); + if ( presto_do_kml(info, dentry->d_inode) ) + error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver, + &new_dir_ver, + dentry->d_inode->i_mode); + + presto_debug_fail_blkdev(fset, PRESTO_OP_MKDIR | 0x20); + if ( presto_do_expect(info, dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_MKDIR | 0x30); + EXIT; +exit: + unlock_kernel(); + presto_trans_commit(fset, handle); + exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + up(&dir->d_inode->i_zombie); + return error; +} + +/* + * Look out: this function may change a normal dentry + * into a directory dentry (different size).. + */ +int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info) +{ + int error; + char *pathname; + struct dentry *dentry; + struct presto_file_set *fset; + struct nameidata nd; + + ENTRY; + CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n", + name, mode, info->slot_offset, info->recno, info->flags); + pathname = getname(name); + error = PTR_ERR(pathname); + if (IS_ERR(pathname)) { + EXIT; + return error; + } + + if (path_init(pathname, LOOKUP_PARENT, &nd)) + error = path_walk(pathname, &nd); + if (error) + goto out_name; + + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto out_dput; + } + + error = presto_do_mkdir(fset, nd.dentry, dentry, + mode & S_IALLUGO, info); +out_dput: + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out_name: + EXIT; + putname(pathname); + CDEBUG(D_PIOCTL, "error: %d\n", error); + return error; +} + +static void d_unhash(struct dentry *dentry) +{ + dget(dentry); + switch (atomic_read(&dentry->d_count)) { + default: + shrink_dcache_parent(dentry); + if (atomic_read(&dentry->d_count) != 2) + break; + case 2: + d_drop(dentry); + } +} + +int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, old_dir_ver; + struct inode_operations *iops; + void *handle; + int do_kml, do_expect; + int size; + + ENTRY; + error = may_delete(dir->d_inode, dentry, 1); + if (error) + return error; + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->rmdir) { + EXIT; + return error; + } + + size = PRESTO_REQHIGH - dentry->d_inode->i_size; + error = presto_reserve_space(fset->fset_cache, size); + if (error) { + EXIT; + return error; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + presto_getversion(&old_dir_ver, dentry->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_RMDIR); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, size); + printk("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n"); + return -ENOSPC; + } + + DQUOT_INIT(dir->d_inode); + + do_kml = presto_do_kml(info, dir->d_inode); + do_expect = presto_do_expect(info, dir->d_inode); + + double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie); + d_unhash(dentry); + if (IS_DEADDIR(dir->d_inode)) + error = -ENOENT; + else if (d_mountpoint(dentry)) + error = -EBUSY; + else { + lock_kernel(); + error = iops->rmdir(dir->d_inode, dentry); + unlock_kernel(); + if (!error) { + dentry->d_inode->i_flags |= S_DEAD; + error = presto_settime(fset, dir, info, + ATTR_CTIME | ATTR_MTIME); + } + } + double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie); + if (!error) + d_delete(dentry); + dput(dentry); + + presto_debug_fail_blkdev(fset, PRESTO_OP_RMDIR | 0x10); + if ( do_kml ) + error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver, + &old_dir_ver, + dentry->d_name.len, + dentry->d_name.name); + + presto_debug_fail_blkdev(fset, PRESTO_OP_RMDIR | 0x20); + if ( do_expect ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_RMDIR | 0x30); + EXIT; + + presto_trans_commit(fset, handle); + presto_release_space(fset->fset_cache, size); + return error; +} + +int lento_rmdir(const char *pathname, struct lento_vfs_context *info) +{ + int error = 0; + char * name; + struct dentry *dentry; + struct presto_file_set *fset; + struct nameidata nd; + + ENTRY; + name = getname(pathname); + if(IS_ERR(name)) + return PTR_ERR(name); + + if (path_init(name, LOOKUP_PARENT, &nd)) + error = path_walk(name, &nd); + if (error) + goto exit; + + switch(nd.last_type) { + case LAST_DOTDOT: + error = -ENOTEMPTY; + goto exit1; + case LAST_ROOT: case LAST_DOT: + error = -EBUSY; + goto exit1; + } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit_put; + } + error = presto_do_rmdir(fset, nd.dentry, dentry, info); + exit_put: + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +exit1: + EXIT; + path_release(&nd); +exit: + EXIT; + putname(name); + return error; +} + +int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, dev_t dev, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error = -EPERM; + struct presto_version tgt_dir_ver, new_node_ver; + struct inode_operations *iops; + void *handle; + + ENTRY; + + down(&dir->d_inode->i_zombie); + /* one KML entry */ + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + up(&dir->d_inode->i_zombie); + return error; + } + + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) { + EXIT; + goto exit_lock; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_lock; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->mknod) { + EXIT; + goto exit_lock; + } + + DQUOT_INIT(dir->d_inode); + lock_kernel(); + + error = -ENOSPC; + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, PRESTO_OP_MKNOD); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + printk("presto_do_mknod: no space for transaction\n"); + goto exit_lock2; + } + + error = iops->mknod(dir->d_inode, dentry, mode, dev); + if ( dentry->d_inode && + dentry->d_inode->i_gid != presto_excluded_gid) { + struct presto_cache *cache = fset->fset_cache; + + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + + /* if Lento does this, we won't have data */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + error = presto_settime(fset, dir, info, ATTR_MTIME); + if (error) { + EXIT; + } + error = presto_settime(fset, dentry, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + } + + presto_debug_fail_blkdev(fset, PRESTO_OP_MKNOD | 0x10); + presto_getversion(&new_node_ver, dentry->d_inode); + if ( presto_do_kml(info, dentry->d_inode) ) + error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver, + &new_node_ver, + dentry->d_inode->i_mode, + MAJOR(dev), MINOR(dev) ); + + presto_debug_fail_blkdev(fset, PRESTO_OP_MKNOD | 0x20); + if ( presto_do_expect(info, dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_MKNOD | 0x30); + EXIT; + presto_trans_commit(fset, handle); + exit_lock2: + unlock_kernel(); + exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + up(&dir->d_inode->i_zombie); + return error; +} + +int lento_mknod(const char *filename, int mode, dev_t dev, + struct lento_vfs_context *info) +{ + int error = 0; + char * tmp; + struct dentry * dentry; + struct nameidata nd; + struct presto_file_set *fset; + + ENTRY; + + if (S_ISDIR(mode)) + return -EPERM; + tmp = getname(filename); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + if (path_init(tmp, LOOKUP_PARENT, &nd)) + error = path_walk(tmp, &nd); + if (error) + goto out; + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit_put; + } + switch (mode & S_IFMT) { + case 0: case S_IFREG: + error = -EOPNOTSUPP; + break; + case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: + error = presto_do_mknod(fset, nd.dentry, dentry, + mode, dev, info); + break; + case S_IFDIR: + error = -EPERM; + break; + default: + error = -EINVAL; + } + exit_put: + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out: + putname(tmp); + + return error; +} + +static int do_rename(struct presto_file_set *fset, + struct dentry *old_parent, struct dentry *old_dentry, + struct dentry *new_parent, struct dentry *new_dentry, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct inode_operations *iops; + struct presto_version src_dir_ver, tgt_dir_ver; + void *handle; + int new_inode_unlink = 0; + struct inode *old_dir = old_parent->d_inode; + struct inode *new_dir = new_parent->d_inode; + + ENTRY; + presto_getversion(&src_dir_ver, old_dir); + presto_getversion(&tgt_dir_ver, new_dir); + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops || !iops->rename) { + EXIT; + return error; + } + + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + return error; + } + handle = presto_trans_start(fset, old_dir, PRESTO_OP_RENAME); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + printk("presto_do_rename: no space for transaction\n"); + return -ENOSPC; + } + if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) { + dget(new_dentry); + new_inode_unlink = 1; + } + + error = iops->rename(old_dir, old_dentry, new_dir, new_dentry); + + if (error) { + EXIT; + goto exit; + } + + if (new_inode_unlink) { + error = presto_settime(fset, old_dentry, info, ATTR_CTIME); + dput(old_dentry); + if (error) { + EXIT; + goto exit; + } + } + error = presto_settime(fset, old_parent, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + error = presto_settime(fset, new_parent, info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + /* XXX make a distinction between cross file set + * and intra file set renames here + */ + presto_debug_fail_blkdev(fset, PRESTO_OP_RENAME | 0x10); + if ( presto_do_kml(info, old_dir) ) + error = presto_journal_rename(&rec, fset, old_dentry, new_dentry, + &src_dir_ver, &tgt_dir_ver); + + presto_debug_fail_blkdev(fset, PRESTO_OP_RENAME | 0x20); + + if ( presto_do_expect(info, new_dir) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_RENAME | 0x30); + EXIT; +exit: + presto_trans_commit(fset, handle); + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + return error; +} + +static +int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent, + struct dentry *old_dentry, struct dentry *new_parent, + struct dentry *new_dentry, struct lento_vfs_context *info) +{ + int error; + struct inode *target; + struct inode *old_dir = old_parent->d_inode; + struct inode *new_dir = new_parent->d_inode; + + if (old_dentry->d_inode == new_dentry->d_inode) + return 0; + + error = may_delete(old_dir, old_dentry, 1); + if (error) + return error; + + if (new_dir->i_dev != old_dir->i_dev) + return -EXDEV; + + if (!new_dentry->d_inode) + error = may_create(new_dir, new_dentry); + else + error = may_delete(new_dir, new_dentry, 1); + if (error) + return error; + + if (!old_dir->i_op || !old_dir->i_op->rename) + return -EPERM; + + /* + * If we are going to change the parent - check write permissions, + * we'll need to flip '..'. + */ + if (new_dir != old_dir) { + error = permission(old_dentry->d_inode, MAY_WRITE); + } + if (error) + return error; + + DQUOT_INIT(old_dir); + DQUOT_INIT(new_dir); + down(&old_dir->i_sb->s_vfs_rename_sem); + error = -EINVAL; + if (is_subdir(new_dentry, old_dentry)) + goto out_unlock; + target = new_dentry->d_inode; + if (target) { /* Hastur! Hastur! Hastur! */ + triple_down(&old_dir->i_zombie, + &new_dir->i_zombie, + &target->i_zombie); + d_unhash(new_dentry); + } else + double_down(&old_dir->i_zombie, + &new_dir->i_zombie); + if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir)) + error = -ENOENT; + else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = do_rename(fset, old_parent, old_dentry, + new_parent, new_dentry, info); + if (target) { + if (!error) + target->i_flags |= S_DEAD; + triple_up(&old_dir->i_zombie, + &new_dir->i_zombie, + &target->i_zombie); + if (d_unhashed(new_dentry)) + d_rehash(new_dentry); + dput(new_dentry); + } else + double_up(&old_dir->i_zombie, + &new_dir->i_zombie); + + if (!error) + d_move(old_dentry,new_dentry); +out_unlock: + up(&old_dir->i_sb->s_vfs_rename_sem); + return error; +} + +static +int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent, + struct dentry *old_dentry, struct dentry *new_parent, + struct dentry *new_dentry, struct lento_vfs_context *info) +{ + struct inode *old_dir = old_parent->d_inode; + struct inode *new_dir = new_parent->d_inode; + int error; + + if (old_dentry->d_inode == new_dentry->d_inode) + return 0; + + error = may_delete(old_dir, old_dentry, 0); + if (error) + return error; + + if (new_dir->i_dev != old_dir->i_dev) + return -EXDEV; + + if (!new_dentry->d_inode) + error = may_create(new_dir, new_dentry); + else + error = may_delete(new_dir, new_dentry, 0); + if (error) + return error; + + if (!old_dir->i_op || !old_dir->i_op->rename) + return -EPERM; + + DQUOT_INIT(old_dir); + DQUOT_INIT(new_dir); + double_down(&old_dir->i_zombie, &new_dir->i_zombie); + if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = do_rename(fset, old_parent, old_dentry, + new_parent, new_dentry, info); + double_up(&old_dir->i_zombie, &new_dir->i_zombie); + if (error) + return error; + /* The following d_move() should become unconditional */ + if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) { + d_move(old_dentry, new_dentry); + } + return 0; +} + +int presto_do_rename(struct presto_file_set *fset, + struct dentry *old_parent, struct dentry *old_dentry, + struct dentry *new_parent, struct dentry *new_dentry, + struct lento_vfs_context *info) +{ + if (S_ISDIR(old_dentry->d_inode->i_mode)) + return presto_rename_dir(fset, old_parent,old_dentry,new_parent, + new_dentry, info); + else + return presto_rename_other(fset, old_parent, old_dentry, + new_parent,new_dentry, info); +} + + +int lento_do_rename(const char *oldname, const char *newname, + struct lento_vfs_context *info) +{ + int error = 0; + struct dentry * old_dir, * new_dir; + struct dentry * old_dentry, *new_dentry; + struct nameidata oldnd, newnd; + struct presto_file_set *fset; + + ENTRY; + + if (path_init(oldname, LOOKUP_PARENT, &oldnd)) + error = path_walk(oldname, &oldnd); + + if (error) + goto exit; + + if (path_init(newname, LOOKUP_PARENT, &newnd)) + error = path_walk(newname, &newnd); + if (error) + goto exit1; + + error = -EXDEV; + if (oldnd.mnt != newnd.mnt) + goto exit2; + + old_dir = oldnd.dentry; + error = -EBUSY; + if (oldnd.last_type != LAST_NORM) + goto exit2; + + new_dir = newnd.dentry; + if (newnd.last_type != LAST_NORM) + goto exit2; + + double_lock(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd.last, old_dir); + error = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) + goto exit3; + /* source must exist */ + error = -ENOENT; + if (!old_dentry->d_inode) + goto exit4; + fset = presto_fset(old_dentry); + error = -EINVAL; + if ( !fset ) { + printk("No fileset!\n"); + EXIT; + goto exit4; + } + /* unless the source is a directory trailing slashes give -ENOTDIR */ + if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + error = -ENOTDIR; + if (oldnd.last.name[oldnd.last.len]) + goto exit4; + if (newnd.last.name[newnd.last.len]) + goto exit4; + } + new_dentry = lookup_hash(&newnd.last, new_dir); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto exit4; + + lock_kernel(); + error = presto_do_rename(fset, old_dir, old_dentry, + new_dir, new_dentry, info); + unlock_kernel(); + + dput(new_dentry); +exit4: + dput(old_dentry); +exit3: + double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem); +exit2: + path_release(&newnd); +exit1: + path_release(&oldnd); +exit: + return error; +} + +int lento_rename(const char * oldname, const char * newname, + struct lento_vfs_context *info) +{ + int error; + char * from; + char * to; + + from = getname(oldname); + if(IS_ERR(from)) + return PTR_ERR(from); + to = getname(newname); + error = PTR_ERR(to); + if (!IS_ERR(to)) { + error = lento_do_rename(from,to, info); + putname(to); + } + putname(from); + return error; +} + +struct dentry *presto_iopen(struct dentry *dentry, + ino_t ino, unsigned int generation) +{ + struct presto_file_set *fset; + char name[48]; + int error; + + ENTRY; + /* see if we already have the dentry we want */ + if (dentry->d_inode && dentry->d_inode->i_ino == ino && + dentry->d_inode->i_generation == generation) { + EXIT; + return dentry; + } + + /* Make sure we have a cache beneath us. We should always find at + * least one dentry inside the cache (if it exists), otherwise not + * even the cache root exists, or we passed in a bad name. + */ + fset = presto_fset(dentry); + error = -EINVAL; + if (!fset) { + printk("No fileset for %*s!\n", + dentry->d_name.len, dentry->d_name.name); + EXIT; + dput(dentry); + return ERR_PTR(error); + } + dput(dentry); + + sprintf(name, "%s%#lx%c%#x", + PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation); + CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name); + return lookup_one_len(name, fset->fset_mtpt, strlen(name)); +} + +static struct file *presto_filp_dopen(struct dentry *dentry, int flags) +{ + struct file *f; + struct inode *inode; + int flag, error; + + ENTRY; + error = -ENFILE; + f = get_empty_filp(); + if (!f) { + CDEBUG(D_PIOCTL, "error getting file pointer\n"); + EXIT; + goto out; + } + f->f_flags = flag = flags; + f->f_mode = (flag+1) & O_ACCMODE; + inode = dentry->d_inode; + if (f->f_mode & FMODE_WRITE) { + error = get_write_access(inode); + if (error) { + CDEBUG(D_PIOCTL, "error getting write access\n"); + EXIT; + goto cleanup_file; + } + } + + f->f_dentry = dentry; + f->f_pos = 0; + f->f_reada = 0; + f->f_op = NULL; + if (inode->i_op) + /* XXX should we set to presto ops, or leave at cache ops? */ + f->f_op = inode->i_fop; + if (f->f_op && f->f_op->open) { + error = f->f_op->open(inode, f); + if (error) { + CDEBUG(D_PIOCTL, "error calling cache 'open'\n"); + EXIT; + goto cleanup_all; + } + } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + + return f; + +cleanup_all: + if (f->f_mode & FMODE_WRITE) + put_write_access(inode); +cleanup_file: + put_filp(f); +out: + return ERR_PTR(error); +} + + +/* Open an inode by number. We pass in the cache root name (or a subdirectory + * from the cache that is guaranteed to exist) to be able to access the cache. + */ +int lento_iopen(const char *name, ino_t ino, unsigned int generation, + int flags) +{ + char * tmp; + struct dentry *dentry; + struct nameidata nd; + int fd; + int error; + + ENTRY; + CDEBUG(D_PIOCTL, + "open %s:inode %#lx (%ld), generation %x (%d), flags %d \n", + name, ino, ino, generation, generation, flags); + /* We don't allow creation of files by number only, as it would + * lead to a dangling files not in any directory. We could also + * just turn off the flag and ignore it. + */ + if (flags & O_CREAT) { + printk(KERN_WARNING __FUNCTION__ + ": create file by inode number (%ld) not allowed\n",ino); + EXIT; + return -EACCES; + } + + tmp = getname(name); + if (IS_ERR(tmp)) { + EXIT; + return PTR_ERR(tmp); + } + + lock_kernel(); +again: /* look the named file or a parent directory so we can get the cache */ + error = presto_walk(tmp, &nd); + if ( error && error != -ENOENT ) { + EXIT; + return error; + } + if (error == -ENOENT) + dentry = NULL; + else + dentry = nd.dentry; + + /* we didn't find the named file, so see if a parent exists */ + if (!dentry) { + char *slash; + + slash = strrchr(tmp, '/'); + if (slash && slash != tmp) { + *slash = '\0'; + path_release(&nd); + goto again; + } + /* we should never get here... */ + CDEBUG(D_PIOCTL, "no more path components to try!\n"); + fd = -ENOENT; + goto exit; + } + CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry); + + dentry = presto_iopen(dentry, ino, generation); + fd = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + EXIT; + goto exit; + } + + /* XXX start of code that might be replaced by something like: + * if (flags & (O_WRONLY | O_RDWR)) { + * error = get_write_access(dentry->d_inode); + * if (error) { + * EXIT; + * goto cleanup_dput; + * } + * } + * fd = open_dentry(dentry, flags); + * + * including the presto_filp_dopen() function (check dget counts!) + */ + fd = get_unused_fd(); + if (fd < 0) { + EXIT; + goto cleanup_dput; + } + + { + int error; + struct file * f = presto_filp_dopen(dentry, flags); + error = PTR_ERR(f); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = error; + EXIT; + goto cleanup_dput; + } + fd_install(fd, f); + } + /* end of code that might be replaced by open_dentry */ + + EXIT; +exit: + unlock_kernel(); + path_release(&nd); + putname(tmp); + return fd; + +cleanup_dput: + putname(&nd); + goto exit; +} + +int lento_close(unsigned int fd, struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct file * filp; + struct dentry *dentry; + int do_kml, do_expect; + + ENTRY; + lock_kernel(); + + error = -EBADF; + filp = fcheck(fd); + if (filp) { + + struct files_struct * files = current->files; + dentry = filp->f_dentry; + dget(dentry); + do_kml = presto_do_kml(info, dentry->d_inode); + do_expect = presto_do_expect(info, dentry->d_inode); + files->fd[fd] = NULL; + put_unused_fd(fd); + FD_CLR(fd, files->close_on_exec); + error = filp_close(filp, files); + } else { + EXIT; + return error; + } + + if (error) { + EXIT; + goto exit; + } + + if ( do_kml ) { + struct presto_file_set *fset; + struct presto_version new_file_ver; + + fset = presto_fset(dentry); + error = -EINVAL; + if (!fset) { + printk("No fileset for %*s!\n", + dentry->d_name.len, dentry->d_name.name); + EXIT; + goto exit; + } + presto_getversion(&new_file_ver, dentry->d_inode); + error = presto_journal_close(&rec, fset, filp, dentry, + &new_file_ver); + if ( error ) { + printk("presto: close error %d!\n", error); + EXIT; + goto exit; + } + if ( do_expect ) + + error = presto_write_last_rcvd(&rec, fset, info); + } + + EXIT; +exit: + dput(dentry); + unlock_kernel(); + return error; +} + +#ifdef CONFIG_FS_EXT_ATTR + +#ifdef CONFIG_FS_POSIX_ACL +/* Posix ACL code changes i_mode without using a notify_change (or + * a mark_inode_dirty!). We need to duplicate this at the reintegrator + * which is done by this function. This function also takes care of + * resetting the cached posix acls in this inode. If we don't reset these + * VFS continues using the old acl information, which by now may be out of + * date. + */ +int presto_setmode(struct presto_file_set *fset, struct dentry *dentry, + mode_t mode) +{ + struct inode *inode = dentry->d_inode; + + ENTRY; + /* The extended attributes for this inode were modified. + * At this point we can not be sure if any of the ACL + * information for this inode was updated. So we will + * force VFS to reread the acls. Note that we do this + * only when called from the SETEXTATTR ioctl, which is why we + * do this while setting the mode of the file. Also note + * that mark_inode_dirty is not be needed for i_*acl only + * to force i_mode info to disk, and should be removed once + * we use notify_change to update the mode. + * XXX: is mode setting really needed? Just setting acl's should + * be enough! VFS should change the i_mode as needed? SHP + */ + if (inode->i_acl && + inode->i_acl != POSIX_ACL_NOT_CACHED) + posix_acl_release(inode->i_acl); + if (inode->i_default_acl && + inode->i_default_acl != POSIX_ACL_NOT_CACHED) + posix_acl_release(inode->i_default_acl); + inode->i_acl = POSIX_ACL_NOT_CACHED; + inode->i_default_acl = POSIX_ACL_NOT_CACHED; + inode->i_mode = mode; + /* inode should already be dirty...but just in case */ + mark_inode_dirty(inode); + return 0; + +#if 0 + /* XXX: The following code is the preferred way to set mode, + * however, I need to carefully go through possible recursion + * paths back into presto. See comments in presto_do_setattr. + */ + { + int error=0; + struct super_operations *sops; + struct iattr iattr; + + iattr.ia_mode = mode; + iattr.ia_valid = ATTR_MODE|ATTR_FORCE; + + error = -EPERM; + sops = filter_c2csops(fset->fset_cache->cache_filter); + if (!sops && + !sops->notify_change) { + EXIT; + return error; + } + + error = sops->notify_change(dentry, &iattr); + + EXIT; + return error; + } +#endif +} +#endif + +/* setextattr Interface to cache filesystem */ +int presto_do_set_ext_attr(struct presto_file_set *fset, + struct dentry *dentry, + const char *name, void *buffer, + size_t buffer_len, int flags, mode_t *mode, + struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode *inode = dentry->d_inode; + struct inode_operations *iops; + int error; + struct presto_version ver; + void *handle; + char temp[PRESTO_EXT_ATTR_NAME_MAX+1]; + + ENTRY; + error = -EROFS; + if (IS_RDONLY(inode)) { + EXIT; + return -EROFS; + } + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + EXIT; + return -EPERM; + } + + presto_getversion(&ver, inode); + error = -EPERM; + /* We need to invoke different filters based on whether + * this dentry is a regular file, directory or symlink. + */ + switch (inode->i_mode & S_IFMT) { + case S_IFLNK: /* symlink */ + iops = filter_c2csiops(fset->fset_cache->cache_filter); + break; + case S_IFDIR: /* directory */ + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + break; + case S_IFREG: + default: /* everything else including regular files */ + iops = filter_c2cfiops(fset->fset_cache->cache_filter); + } + + if (!iops && !iops->set_ext_attr) { + EXIT; + return error; + } + + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + return error; + } + + + handle = presto_trans_start(fset,dentry->d_inode,PRESTO_OP_SETEXTATTR); + if ( IS_ERR(handle) ) { + printk("presto_do_set_ext_attr: no space for transaction\n"); + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + return -ENOSPC; + } + + /* We first "truncate" name to the maximum allowable in presto */ + /* This simulates the strncpy_from_use code in fs/ext_attr.c */ + strncpy(temp,name,sizeof(temp)); + + /* Pass down to cache*/ + error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags); + if (error) { + EXIT; + goto exit; + } + +#ifdef CONFIG_FS_POSIX_ACL + /* Reset mode if specified*/ + /* XXX: when we do native acl support, move this code out! */ + if (mode != NULL) { + error = presto_setmode(fset, dentry, *mode); + if (error) { + EXIT; + goto exit; + } + } +#endif + + /* Reset ctime. Only inode change time (ctime) is affected */ + error = presto_settime(fset, dentry, info, ATTR_CTIME); + if (error) { + EXIT; + goto exit; + } + + if (flags & EXT_ATTR_FLAG_USER) { + printk(" USER flag passed to presto_do_set_ext_attr!\n"); + *(int *)0 = 1; + } + + /* We are here, so set_ext_attr succeeded. We no longer need to keep + * track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force + * the attribute value during log replay. -SHP + */ + flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE); + + presto_debug_fail_blkdev(fset, PRESTO_OP_SETEXTATTR | 0x10); + if ( presto_do_kml(info, dentry->d_inode) ) + error = presto_journal_set_ext_attr + (&rec, fset, dentry, &ver, name, buffer, + buffer_len, flags); + + presto_debug_fail_blkdev(fset, PRESTO_OP_SETEXTATTR | 0x20); + if ( presto_do_expect(info, dentry->d_inode) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, PRESTO_OP_SETEXTATTR | 0x30); + EXIT; +exit: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + presto_trans_commit(fset, handle); + + return error; +} +#endif diff --git a/fs/namespace.c b/fs/namespace.c index c15de9f71aea..45393b7c7e87 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -22,6 +22,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> #include <linux/nfs_mount.h> +#include <linux/seq_file.h> struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data); int do_remount_sb(struct super_block *sb, int flags, void * data); @@ -167,160 +168,132 @@ void __mntput(struct vfsmount *mnt) kill_super(sb); } -/* Use octal escapes, like mount does, for embedded spaces etc. */ -static unsigned char need_escaping[] = { ' ', '\t', '\n', '\\' }; - -static int -mangle(const unsigned char *s, char *buf, int len) { - char *sp; - int n; - - sp = buf; - while(*s && sp-buf < len-3) { - for (n = 0; n < sizeof(need_escaping); n++) { - if (*s == need_escaping[n]) { - *sp++ = '\\'; - *sp++ = '0' + ((*s & 0300) >> 6); - *sp++ = '0' + ((*s & 070) >> 3); - *sp++ = '0' + (*s & 07); - goto next; - } - } - *sp++ = *s; - next: - s++; - } - return sp - buf; /* no trailing NUL */ +/* iterator */ +static void *m_start(struct seq_file *m, loff_t *pos) +{ + struct list_head *p; + loff_t n = *pos; + + down(&mount_sem); + list_for_each(p, &vfsmntlist) + if (!n--) + return list_entry(p, struct vfsmount, mnt_list); + return NULL; } -static struct proc_fs_info { - int flag; - char *str; -} fs_info[] = { - { MS_SYNCHRONOUS, ",sync" }, - { MS_MANDLOCK, ",mand" }, - { MS_NOATIME, ",noatime" }, - { MS_NODIRATIME, ",nodiratime" }, - { 0, NULL } -}; +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; + (*pos)++; + return p==&vfsmntlist ? NULL : list_entry(p, struct vfsmount, mnt_list); +} -static struct proc_fs_info mnt_info[] = { - { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, - { MNT_NOEXEC, ",noexec" }, - { 0, NULL } -}; +static void m_stop(struct seq_file *m, void *v) +{ + up(&mount_sem); +} -static struct proc_nfs_info { - int flag; - char *str; - char *nostr; -} nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, - { NFS_MOUNT_TCP, ",tcp", ",udp" }, - { NFS_MOUNT_NOCTO, ",nocto", "" }, - { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, - { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, - { 0, NULL, NULL } -}; +static inline void mangle(struct seq_file *m, const char *s) +{ + seq_escape(m, s, " \t\n\\"); +} -int get_filesystem_info( char *buf ) +static void show_nfs_mount(struct seq_file *m, struct vfsmount *mnt) { - struct list_head *p; - struct proc_fs_info *fs_infop; + static struct proc_nfs_info { + int flag; + char *str; + char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_TCP, ",tcp", ",udp" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, + { 0, NULL, NULL } + }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss; - int len, prevlen; - char *path, *buffer = (char *) __get_free_page(GFP_KERNEL); - - if (!buffer) return 0; - len = prevlen = 0; - -#define FREEROOM ((int)PAGE_SIZE-200-len) -#define MANGLE(s) len += mangle((s), buf+len, FREEROOM); - - for (p = vfsmntlist.next; p != &vfsmntlist; p = p->next) { - struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list); - path = d_path(tmp->mnt_root, tmp, buffer, PAGE_SIZE); - if (!path) - continue; - MANGLE(tmp->mnt_devname ? tmp->mnt_devname : "none"); - buf[len++] = ' '; - MANGLE(path); - buf[len++] = ' '; - MANGLE(tmp->mnt_sb->s_type->name); - len += sprintf(buf+len, " %s", - tmp->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); - for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { - if (tmp->mnt_sb->s_flags & fs_infop->flag) - MANGLE(fs_infop->str); - } - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (tmp->mnt_flags & fs_infop->flag) - MANGLE(fs_infop->str); - } - if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) { - nfss = &tmp->mnt_sb->u.nfs_sb.s_server; - len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version); - - len += sprintf(buf+len, ",rsize=%d", nfss->rsize); - - len += sprintf(buf+len, ",wsize=%d", nfss->wsize); -#if 0 - if (nfss->timeo != 7*HZ/10) { - len += sprintf(buf+len, ",timeo=%d", - nfss->timeo*10/HZ); - } - if (nfss->retrans != 3) { - len += sprintf(buf+len, ",retrans=%d", - nfss->retrans); - } -#endif - if (nfss->acregmin != 3*HZ) { - len += sprintf(buf+len, ",acregmin=%d", - nfss->acregmin/HZ); - } - if (nfss->acregmax != 60*HZ) { - len += sprintf(buf+len, ",acregmax=%d", - nfss->acregmax/HZ); - } - if (nfss->acdirmin != 30*HZ) { - len += sprintf(buf+len, ",acdirmin=%d", - nfss->acdirmin/HZ); - } - if (nfss->acdirmax != 60*HZ) { - len += sprintf(buf+len, ",acdirmax=%d", - nfss->acdirmax/HZ); - } - for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - char *str; - if (nfss->flags & nfs_infop->flag) - str = nfs_infop->str; - else - str = nfs_infop->nostr; - MANGLE(str); - } - len += sprintf(buf+len, ",addr="); - MANGLE(nfss->hostname); - } - len += sprintf(buf + len, " 0 0\n"); - if (FREEROOM <= 3) { - len = prevlen; - len += sprintf(buf+len, "# truncated\n"); - break; - } - prevlen = len; + struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server; + + seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",rsize=%d", nfss->rsize); + seq_printf(m, ",wsize=%d", nfss->wsize); + if (nfss->acregmin != 3*HZ) + seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + if (nfss->acregmax != 60*HZ) + seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + if (nfss->acdirmin != 30*HZ) + seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + if (nfss->acdirmax != 60*HZ) + seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); } + seq_puts(m, ",addr="); + mangle(m, nfss->hostname); +} - free_page((unsigned long) buffer); - return len; -#undef MANGLE -#undef FREEROOM +static int show_vfsmnt(struct seq_file *m, void *v) +{ + struct vfsmount *mnt = v; + static struct proc_fs_info { + int flag; + char *str; + } fs_info[] = { + { MS_SYNCHRONOUS, ",sync" }, + { MS_MANDLOCK, ",mand" }, + { MS_NOATIME, ",noatime" }, + { MS_NODIRATIME, ",nodiratime" }, + { 0, NULL } + }; + static struct proc_fs_info mnt_info[] = { + { MNT_NOSUID, ",nosuid" }, + { MNT_NODEV, ",nodev" }, + { MNT_NOEXEC, ",noexec" }, + { 0, NULL } + }; + struct proc_fs_info *fs_infop; + char *path_buf, *path; + + path_buf = (char *) __get_free_page(GFP_KERNEL); + if (!path_buf) + return -ENOMEM; + path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE); + + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); + seq_putc(m, ' '); + mangle(m, path); + free_page((unsigned long) path_buf); + seq_putc(m, ' '); + mangle(m, mnt->mnt_sb->s_type->name); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); + for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { + if (mnt->mnt_sb->s_flags & fs_infop->flag) + seq_puts(m, fs_infop->str); + } + for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { + if (mnt->mnt_flags & fs_infop->flag) + seq_puts(m, fs_infop->str); + } + if (strcmp("nfs", mnt->mnt_sb->s_type->name) == 0) + show_nfs_mount(m, mnt); + seq_puts(m, " 0 0\n"); + return 0; } +struct seq_operations mounts_op = { + start: m_start, + next: m_next, + stop: m_stop, + show: show_vfsmnt +}; + /* * Doesn't take quota and stuff into account. IOW, in some cases it will * give false negatives. The main reason why it's here is that we need diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 946a5ccd7423..b0e1fd06c3f8 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -1053,3 +1053,4 @@ EXPORT_NO_SYMBOLS; module_init(init_openprom_fs) module_exit(exit_openprom_fs) +MODULE_LICENSE("GPL"); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index e1601dba7ca8..ee030ece19d2 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -35,6 +35,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/smp_lock.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -57,12 +58,10 @@ extern int get_malloc(char * buffer); #endif #ifdef CONFIG_MODULES extern int get_module_list(char *); -extern int get_ksyms_list(char *, char **, off_t, int); #endif extern int get_device_list(char *); extern int get_partition_list(char *, char **, off_t, int); extern int get_filesystem_list(char *); -extern int get_filesystem_info(char *); extern int get_exec_domain_list(char *); extern int get_irq_list(char *); extern int get_dma_list(char *); @@ -251,13 +250,17 @@ static int modules_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -static int ksyms_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +extern struct seq_operations ksyms_op; +static int ksyms_open(struct inode *inode, struct file *file) { - int len = get_ksyms_list(page, start, off, count); - if (len < count) *eof = 1; - return len; + return seq_open(file, &ksyms_op); } +static struct file_operations proc_ksyms_operations = { + open: ksyms_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; #endif static int kstat_read_proc(char *page, char **start, off_t off, @@ -414,13 +417,6 @@ static int locks_read_proc(char *page, char **start, off_t off, return len; } -static int mounts_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = get_filesystem_info(page); - return proc_calc_metrics(page, start, off, count, eof, len); -} - static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -505,6 +501,18 @@ static struct file_operations proc_profile_operations = { write: write_profile, }; +extern struct seq_operations mounts_op; +static int mounts_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &mounts_op); +} +static struct file_operations proc_mounts_operations = { + open: mounts_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; + struct proc_dir_entry *proc_root_kcore; void __init proc_misc_init(void) @@ -530,7 +538,6 @@ void __init proc_misc_init(void) #endif #ifdef CONFIG_MODULES {"modules", modules_read_proc}, - {"ksyms", ksyms_read_proc}, #endif {"stat", kstat_read_proc}, {"devices", devices_read_proc}, @@ -546,7 +553,6 @@ void __init proc_misc_init(void) {"rtc", ds1286_read_proc}, #endif {"locks", locks_read_proc}, - {"mounts", mounts_read_proc}, {"swaps", swaps_read_proc}, {"iomem", memory_read_proc}, {"execdomains", execdomains_read_proc}, @@ -559,6 +565,12 @@ void __init proc_misc_init(void) entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); if (entry) entry->proc_fops = &proc_kmsg_operations; + entry = create_proc_entry("mounts", 0, NULL); + if (entry) + entry->proc_fops = &proc_mounts_operations; + entry = create_proc_entry("ksyms", 0, NULL); + if (entry) + entry->proc_fops = &proc_ksyms_operations; proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { proc_root_kcore->proc_fops = &proc_kcore_operations; diff --git a/fs/seq_file.c b/fs/seq_file.c new file mode 100644 index 000000000000..3786b3f37d70 --- /dev/null +++ b/fs/seq_file.c @@ -0,0 +1,236 @@ +/* + * linux/fs/seq_file.c + * + * helper functions for making syntetic files from sequences of records. + * initial implementation -- AV, Oct 2001. + */ + +#include <linux/malloc.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <asm/uaccess.h> + +/** + * seq_open - initialize sequential file + * @file: file we initialize + * @op: method table describing the sequence + * + * seq_open() sets @file, associating it with a sequence described + * by @op. @op->start() sets the iterator up and returns the first + * element of sequence. @op->stop() shuts it down. @op->next() + * returns the next element of sequence. @op->show() prints element + * into the buffer. In case of error ->start() and ->next() return + * ERR_PTR(error). In the end of sequence they return %NULL. ->show() + * returns 0 in case of success and negative number in case of error. + */ +int seq_open(struct file *file, struct seq_operations *op) +{ + struct seq_file *p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + memset(p, 0, sizeof(*p)); + sema_init(&p->sem, 1); + p->op = op; + file->private_data = p; + return 0; +} + +/** + * seq_read - ->read() method for sequential files. + * @file, @buf, @size, @ppos: see file_operations method + * + * Ready-made ->f_op->read() + */ +ssize_t seq_read(struct file *file, char *buf, size_t size, loff_t *ppos) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + size_t copied = 0; + loff_t pos; + size_t n; + void *p; + int err = 0; + + if (ppos != &file->f_pos) + return -EPIPE; + + down(&m->sem); + /* grab buffer if we didn't have one */ + if (!m->buf) { + m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + if (!m->buf) + goto Enomem; + } + /* if not empty - flush it first */ + if (m->count) { + n = min(m->count, size); + err = copy_to_user(buf, m->buf + m->from, n); + if (err) + goto Efault; + m->count -= n; + m->from += n; + size -= n; + buf += n; + copied += n; + if (!m->count) + (*ppos)++; + if (!size) + goto Done; + } + /* we need at least one record in buffer */ + while (1) { + pos = *ppos; + p = m->op->start(m, &pos); + err = PTR_ERR(p); + if (!p || IS_ERR(p)) + break; + err = m->op->show(m, p); + if (err) + break; + if (m->count < m->size) + goto Fill; + m->op->stop(m, p); + kfree(m->buf); + m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + if (!m->buf) + goto Enomem; + } + m->op->stop(m, p); + goto Done; +Fill: + /* they want more? let's try to get some more */ + while (m->count < size) { + size_t offs = m->count; + loff_t next = pos; + p = m->op->next(m, p, &next); + if (!p || IS_ERR(p)) { + err = PTR_ERR(p); + break; + } + err = m->op->show(m, p); + if (err || m->count == m->size) { + m->count = offs; + break; + } + pos = next; + } + m->op->stop(m, p); + n = min(m->count, size); + err = copy_to_user(buf, m->buf, n); + if (err) + goto Efault; + copied += n; + m->count -= n; + if (m->count) + m->from = n; + else + pos++; + *ppos = pos; +Done: + if (!copied) + copied = err; + up(&m->sem); + return copied; +Enomem: + err = -ENOMEM; + goto Done; +Efault: + err = -EFAULT; + goto Done; +} + +/** + * seq_lseek - ->llseek() method for sequential files. + * @file, @offset, @origin: see file_operations method + * + * Ready-made ->f_op->llseek() + */ +loff_t seq_lseek(struct file *file, loff_t offset, int origin) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + long long retval = -EINVAL; + + down(&m->sem); + switch (origin) { + case 1: + offset += file->f_pos; + case 0: + if (offset < 0) + break; + if (offset != file->f_pos) { + file->f_pos = offset; + m->count = 0; + } + retval = offset; + } + up(&m->sem); + return retval; +} + +/** + * seq_release - free the structures associated with sequential file. + * @file: file in question + * @inode: file->f_dentry->d_inode + * + * Frees the structures associated with sequential file; can be used + * as ->f_op->release() if you don't have private data to destroy. + */ +int seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + kfree(m->buf); + kfree(m); + return 0; +} + +/** + * seq_escape - print string into buffer, escaping some characters + * @m: target buffer + * @s: string + * @esc: set of characters that need escaping + * + * Puts string into buffer, replacing each occurence of character from + * @esc with usual octal escape. Returns 0 in case of success, -1 - in + * case of overflow. + */ +int seq_escape(struct seq_file *m, const char *s, const char *esc) +{ + char *end = m->buf + m->size; + char *p; + char c; + + for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { + if (!strchr(esc, c)) { + *p++ = c; + continue; + } + if (p + 3 < end) { + *p++ = '\\'; + *p++ = '0' + ((c & 0300) >> 6); + *p++ = '0' + ((c & 070) >> 3); + *p++ = '0' + (c & 07); + continue; + } + m->count = m->size; + return -1; + } + m->count = p - m->buf; + return 0; +} + +int seq_printf(struct seq_file *m, const char *f, ...) +{ + va_list args; + int len; + + if (m->count < m->size) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); + va_end(args); + if (m->count + len < m->size) { + m->count += len; + return 0; + } + } + m->count = m->size; + return -1; +} diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 28f1b7b26976..6f99b61e87f3 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -105,8 +105,20 @@ extern unsigned long empty_zero_page[1024]; #ifndef __ASSEMBLY__ #if CONFIG_X86_PAE # include <asm/pgtable-3level.h> + +/* + * Need to initialise the X86 PAE caches + */ +extern void pgtable_cache_init(void); + #else # include <asm/pgtable-2level.h> + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif #endif diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index c6279c8f7264..edc2104509d5 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -483,4 +483,9 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) #define KERNEL_TR_PAGE_NUM ((KERNEL_START - PAGE_OFFSET) / KERNEL_TR_PAGE_SIZE) +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h index 6213ab8504f2..1e2fdf53ec4d 100644 --- a/include/asm-parisc/pgtable.h +++ b/include/asm-parisc/pgtable.h @@ -334,4 +334,9 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, #define io_remap_page_range remap_page_range +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif /* _PARISC_PAGE_H */ diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index 81dbec806dcf..47aa26cdc47c 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -555,6 +555,11 @@ extern void kernel_set_cachemode (unsigned long address, unsigned long size, #define io_remap_page_range remap_page_range +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif /* __ASSEMBLY__ */ #endif /* _PPC_PGTABLE_H */ #endif /* __KERNEL__ */ diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h index 6bd7e49ae83c..cc962def542d 100644 --- a/include/asm-sh/pgtable.h +++ b/include/asm-sh/pgtable.h @@ -308,4 +308,9 @@ extern void update_mmu_cache(struct vm_area_struct * vma, #define io_remap_page_range remap_page_range +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif /* __ASM_SH_PAGE_H */ diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h index fa318a5b581c..a2a3777f1fb4 100644 --- a/include/asm-sparc/pgtable.h +++ b/include/asm-sparc/pgtable.h @@ -454,4 +454,9 @@ extern int io_remap_page_range(unsigned long from, unsigned long to, /* We provide our own get_unmapped_area to cope with VA holes for userland */ #define HAVE_ARCH_UNMAPPED_AREA +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif /* !(_SPARC_PGTABLE_H) */ diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index 7acc74ef4b54..22aec51acd22 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -344,4 +344,9 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, unsi #endif /* !(__ASSEMBLY__) */ +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + #endif /* !(_SPARC64_PGTABLE_H) */ diff --git a/include/linux/fsfilter.h b/include/linux/fsfilter.h new file mode 100644 index 000000000000..89e870450d3d --- /dev/null +++ b/include/linux/fsfilter.h @@ -0,0 +1,129 @@ +#ifndef __FILTER_H_ +#define __FILTER_H_ 1 + +#ifdef __KERNEL__ + +/* cachetype.c */ + +/* + * it is important that things like inode, super and file operations + * for intermezzo are not defined statically. If methods are NULL + * the VFS takes special action based on that. Given that different + * cache types have NULL ops at different slots, we must install opeation + * talbes for InterMezzo with NULL's in the same spot + */ + +struct filter_ops { + struct super_operations filter_sops; + + struct inode_operations filter_dir_iops; + struct inode_operations filter_file_iops; + struct inode_operations filter_sym_iops; + + struct file_operations filter_dir_fops; + struct file_operations filter_file_fops; + struct file_operations filter_sym_fops; + + struct dentry_operations filter_dentry_ops; +}; + +struct cache_ops { + /* operations on the file store */ + struct super_operations *cache_sops; + + struct inode_operations *cache_dir_iops; + struct inode_operations *cache_file_iops; + struct inode_operations *cache_sym_iops; + + struct file_operations *cache_dir_fops; + struct file_operations *cache_file_fops; + struct file_operations *cache_sym_fops; + + struct dentry_operations *cache_dentry_ops; +}; + + +#define FILTER_DID_SUPER_OPS 0x1 +#define FILTER_DID_INODE_OPS 0x2 +#define FILTER_DID_FILE_OPS 0x4 +#define FILTER_DID_DENTRY_OPS 0x8 +#define FILTER_DID_DEV_OPS 0x10 +#define FILTER_DID_SYMLINK_OPS 0x20 +#define FILTER_DID_DIR_OPS 0x40 + +struct filter_fs { + int o_flags; + struct filter_ops o_fops; + struct cache_ops o_caops; + struct journal_ops *o_trops; + struct snapshot_ops *o_snops; +}; + +#define FILTER_FS_TYPES 5 +#define FILTER_FS_EXT2 0 +#define FILTER_FS_EXT3 1 +#define FILTER_FS_REISERFS 2 +#define FILTER_FS_XFS 3 +#define FILTER_FS_OBDFS 4 +extern struct filter_fs filter_oppar[FILTER_FS_TYPES]; + +struct filter_fs *filter_get_filter_fs(const char *cache_type); +void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type); +inline struct super_operations *filter_c2usops(struct filter_fs *cache); +inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache); +inline struct inode_operations *filter_c2udiops(struct filter_fs *cache); +inline struct inode_operations *filter_c2usiops(struct filter_fs *cache); +inline struct file_operations *filter_c2uffops(struct filter_fs *cache); +inline struct file_operations *filter_c2udfops(struct filter_fs *cache); +inline struct file_operations *filter_c2usfops(struct filter_fs *cache); +inline struct super_operations *filter_c2csops(struct filter_fs *cache); +inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache); +inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache); +inline struct inode_operations *filter_c2csiops(struct filter_fs *cache); +inline struct file_operations *filter_c2cffops(struct filter_fs *cache); +inline struct file_operations *filter_c2cdfops(struct filter_fs *cache); +inline struct file_operations *filter_c2csfops(struct filter_fs *cache); +inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache); +inline struct dentry_operations *filter_c2udops(struct filter_fs *cache); + +void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_ops, struct super_operations *filter_sops); +void filter_setup_dir_ops(struct filter_fs *cache, struct inode *cache_inode, struct inode_operations *filter_iops, struct file_operations *ffops); +void filter_setup_file_ops(struct filter_fs *cache, struct inode *cache_inode, struct inode_operations *filter_iops, struct file_operations *filter_op); +void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *cache_inode, struct inode_operations *filter_iops, struct file_operations *filter_op); +void filter_setup_dentry_ops(struct filter_fs *cache, struct dentry_operations *cache_dop, struct dentry_operations *filter_dop); + + +#define PRESTO_DEBUG +#ifdef PRESTO_DEBUG +/* debugging masks */ +#define D_SUPER 1 /* print results returned by Venus */ +#define D_INODE 2 /* print entry and exit into procedure */ +#define D_FILE 4 +#define D_CACHE 8 /* cache debugging */ +#define D_MALLOC 16 /* print malloc, de-alloc information */ +#define D_JOURNAL 32 +#define D_UPCALL 64 /* up and downcall debugging */ +#define D_PSDEV 128 +#define D_PIOCTL 256 +#define D_SPECIAL 512 +#define D_TIMING 1024 +#define D_DOWNCALL 2048 + +#define FDEBUG(mask, format, a...) \ + do { \ + if (filter_debug & mask) { \ + printk("(%s,l. %d): ", __FUNCTION__, __LINE__); \ + printk(format, ##a); } \ + } while (0) + +#define FENTRY \ + if(filter_print_entry) \ + printk("Process %d entered %s\n", current->pid, __FUNCTION__) + +#define FEXIT \ + if(filter_print_entry) \ + printk("Process %d leaving %s at %d\n", current->pid, \ + __FUNCTION__,__LINE__) +#endif +#endif +#endif diff --git a/include/linux/intermezzo_fs.h b/include/linux/intermezzo_fs.h new file mode 100644 index 000000000000..012060243874 --- /dev/null +++ b/include/linux/intermezzo_fs.h @@ -0,0 +1,731 @@ +/* + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory. + * Copyright (C) 2001 Tacitus Systems, Inc. + * Copyright (C) 2001 Cluster File Systems, Inc. + */ + +#ifndef __INTERMEZZO_FS_H_ +#define __INTERMEZZO_FS_H_ 1 + +#ifdef __KERNEL__ +#include <linux/smp.h> +#include <linux/fsfilter.h> + +/* fixups for fs.h */ +#ifndef fs_down +#define fs_down(sem) down(sem) +#endif + +#ifndef fs_up +#define fs_up(sem) up(sem) +#endif + +/* We will be more tolerant than the default ea patch with attr name sizes and + * the size of value. If these come via VFS from the default ea patches, the + * corresponding character strings will be truncated anyway. During journalling- * we journal length for both name and value. See journal_set_ext_attr. + */ +#define PRESTO_EXT_ATTR_NAME_MAX 128 +#define PRESTO_EXT_ATTR_VALUE_MAX 8192 + +#define KML_IDLE 0 +#define KML_DECODE 1 +#define KML_OPTIMIZE 2 +#define KML_REINT 3 + +#define KML_OPEN_REINT 0x0100 +#define KML_REINT_BEGIN 0x0200 +#define KML_BACKFETCH 0x0400 +#define KML_REINT_END 0x0800 +#define KML_CLOSE_REINT 0x1000 +#define FSET_GET_KMLDATA(fset) fset->fset_kmldata +#define KML_REINT_MAXBUF (64 * 1024) + +struct kml_fsdata +{ + int kml_state; + + /* kml optimize support */ + struct list_head kml_kop_cache; + + /* kml reint support */ + int kml_reint_state; + struct list_head kml_reint_cache; + struct list_head *kml_reint_current; + int kml_maxsize; /* max buffer */ + int kml_len; + char * kml_buf; + loff_t kml_reintpos; + int kml_count; +}; + +/* super.c */ +struct presto_cache *presto_find_cache(kdev_t dev) ; +extern struct file_system_type presto_fs_type; +extern int init_intermezzo_fs(void); + +#define CACHE_TYPE_LENGTH 16 + +int presto_ispresto(struct inode *); + +#define CACHE_CLIENT_RO 0x4 +#define CACHE_LENTO_RO 0x8 +#define CACHE_FSETROOT_SET 0x10 + + +struct presto_cache { + spinlock_t cache_lock; + loff_t cache_reserved; + struct list_head cache_chain; /* for the dev/cache hash */ + + int cache_flags; + char *cache_root_fileset; /* fileset mounted on cache "/" */ + + kdev_t cache_dev; /* underlying block device */ + struct super_block *cache_sb; + struct dentry *cache_mtde; /* unix mtpt of cache XXX NOT VALID XXX */ + char *cache_mtpt; /* again */ + + char *cache_type; /* filesystem type of cache */ + struct filter_fs *cache_filter; + + struct upc_comm *cache_psdev; /* points to /dev/intermezzo? we use */ + struct list_head cache_psdev_chain; + + struct list_head cache_fset_list; /* filesets mounted in cache */ +}; + + + + +/* file sets */ +#define CHUNK_BITS 16 + +struct presto_log_fd { + rwlock_t fd_lock; + loff_t fd_offset; /* offset where next record should go */ + struct file *fd_file; + int fd_truncating; + unsigned int fd_recno; /* last recno written */ + struct list_head fd_reservations; +}; + +struct presto_file_set { + struct list_head fset_list; + struct presto_log_fd fset_kml; + struct presto_log_fd fset_lml; + struct file *fset_last_rcvd; + struct dentry *fset_mtpt; + struct nameidata fset_nd; + struct presto_cache *fset_cache; + + unsigned int fset_lento_recno; /* last recno mentioned to lento */ + loff_t fset_lento_off; /* last offset mentioned to lento */ + char * fset_name; + + int fset_flags; + int fset_permit_count; + int fset_permit_cookie; + int fset_chunkbits; + struct kml_fsdata *fset_kmldata; + loff_t fset_file_maxio; /* writing more than this causes a close */ +}; + +/* This is the default number of bytes written before a close is recorded*/ +#define FSET_DEFAULT_MAX_FILEIO (1024<<10) + +struct journal_ops { + loff_t (*tr_avail)(struct presto_cache *fset, struct super_block *); + void *(*tr_start)(struct presto_file_set *, struct inode *, int op); + void (*tr_commit)(struct presto_file_set *, void *handle); + void (*tr_journal_data)(struct inode *); +}; + + +extern struct journal_ops presto_ext2_journal_ops; +extern struct journal_ops presto_ext3_journal_ops; +extern struct journal_ops presto_xfs_journal_ops; +extern struct journal_ops presto_reiserfs_journal_ops; +extern struct journal_ops presto_obdfs_journal_ops; +struct lento_vfs_context { + __u32 slot_offset; + __u32 recno; + __u64 kml_offset; + __u32 flags; + __u32 updated_time; +}; + + +#define LENTO_FL_KML 0x0001 +#define LENTO_FL_EXPECT 0x0002 +#define LENTO_FL_VFSCHECK 0x0004 +#define LENTO_FL_JUSTLOG 0x0008 +#define LENTO_FL_WRITE_KML 0x0010 +#define LENTO_FL_CANCEL_LML 0x0020 +#define LENTO_FL_WRITE_EXPECT 0x0040 +#define LENTO_FL_IGNORE_TIME 0x0080 + +struct presto_cache *presto_get_cache(struct inode *inode) ; +int presto_sprint_mounts(char *buf, int buflen, int minor); +struct presto_file_set *presto_fset(struct dentry *de); +int presto_journal(struct dentry *dentry, char *buf, size_t size); +int presto_fwrite(struct file *file, const char *str, int len, loff_t *off); + +/* psdev.c */ +int presto_psdev_init(void); +extern void presto_psdev_cleanup(void); +inline int presto_lento_up(int minor); + +/* inode.c */ +extern struct super_operations presto_super_ops; +extern int presto_excluded_gid; +#define PRESTO_EXCL_GID 4711 +void presto_set_ops(struct inode *inode, struct filter_fs *filter); +void presto_read_inode(struct inode *inode); +void presto_put_super(struct super_block *); + +/* journal.c */ +void presto_trans_commit(struct presto_file_set *fset, void *handle); +void *presto_trans_start(struct presto_file_set *fset, struct inode *inode, + int op); + +/* dcache.c */ +void presto_frob_dop(struct dentry *de) ; +char * presto_path(struct dentry *dentry, struct dentry *root, + char *buffer, int buflen); +void presto_set_dd(struct dentry *); +void presto_init_ddata_cache(void); +void presto_cleanup_ddata_cache(void); +extern struct dentry_operations presto_dentry_ops; + + + +/* dir.c */ +extern struct inode_operations presto_dir_iops; +extern struct inode_operations presto_file_iops; +extern struct inode_operations presto_sym_iops; +extern struct file_operations presto_dir_fops; +extern struct file_operations presto_file_fops; +extern struct file_operations presto_sym_fops; +int presto_setattr(struct dentry *de, struct iattr *iattr); +extern int presto_ilookup_uid; +#define PRESTO_ILOOKUP_MAGIC "...ino:" +#define PRESTO_ILOOKUP_SEP ':' + +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry); + +/* file.c */ +struct presto_reservation_data { + unsigned int ri_recno; + loff_t ri_offset; + loff_t ri_size; + struct list_head ri_list; +}; + + +struct presto_dentry_data { + int dd_count; /* how mnay dentries are using this dentry */ + struct presto_file_set *dd_fset; + loff_t dd_kml_offset; + int dd_flags; + +}; + +struct presto_file_data { + int fd_do_lml; + loff_t fd_lml_offset; + uid_t fd_fsuid; + gid_t fd_fsgid; + uid_t fd_uid; + gid_t fd_gid; + mode_t fd_mode; + int fd_ngroups; + size_t fd_bytes_written; /* Number of bytes written so far on this fd*/ + gid_t fd_groups[NGROUPS_MAX]; +}; + + +/* presto.c and Lento::Downcall */ +struct presto_version { + __u64 pv_mtime; + __u64 pv_ctime; + __u64 pv_size; +}; +inline struct presto_dentry_data *presto_d2d(struct dentry *); +int presto_walk(const char *name, struct nameidata *nd); +int presto_clear_fsetroot(char *path); +int presto_clear_all_fsetroots(char *path); +int presto_get_kmlsize(char *path, size_t *size); +int presto_get_lastrecno(char *path, off_t *size); +int presto_set_fsetroot(char *path, char *fsetname, unsigned int fsetid, + unsigned int flags); +int presto_has_all_data(struct inode *inode); +inline int presto_is_read_only(struct presto_file_set *); +int presto_truncate_lml(struct presto_file_set *fset); +int lento_write_lml(char *path, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct presto_version *remote_file_version); +int lento_reset_fset(char *path, __u64 offset, __u32 recno); +int lento_complete_closes(char *path); +int lento_cancel_lml(char *path, + __u64 lml_offset, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct lento_vfs_context *info); +inline int presto_f2m(struct presto_file_set *fset); + +/* cache.c */ +#define PRESTO_REQLOW (3 * 4096) +#define PRESTO_REQHIGH (6 * 4096) +void presto_release_space(struct presto_cache *cache, loff_t req); +int presto_reserve_space(struct presto_cache *cache, loff_t req); + +/* NOTE: PRESTO_FSETROOT MUST be 0x1: + - if this bit is set dentry->d_fsdata points to a file_set + - the address of the file_set if d_fsdata - 1 +*/ + +#define PRESTO_FSETROOT 0x00000001 /* dentry is fileset root */ +#define PRESTO_DATA 0x00000002 /* cached data is valid */ +#define PRESTO_ATTR 0x00000004 /* attributes cached */ + +#define EISFSETROOT 0x2001 + + +struct presto_file_set *presto_path2fileset(const char *name); +int presto_permit_downcall(const char *path, int *cookie); +int presto_chk(struct dentry *dentry, int flag); +void presto_set(struct dentry *dentry, int flag); +int presto_get_permit(struct inode *inode); +int presto_put_permit(struct inode *inode); +int presto_mark_dentry(const char *path, int and, int or, int *res); +int presto_mark_cache(const char *path, int and_bits, int or_bits, int *); +int presto_mark_fset(const char *path, int and_bits, int or_bits, int *); +void presto_getversion(struct presto_version *pv, struct inode *inode); +int presto_i2m(struct inode *inode); +int presto_c2m(struct presto_cache *cache); + +/* journal.c */ +struct rec_info { + loff_t offset; + int size; + int recno; + int is_kml; +}; +void presto_trans_commit(struct presto_file_set *fset, void *handle); +void *presto_trans_start(struct presto_file_set *fset, struct inode *inode, + int op); +int presto_clear_lml_close(struct presto_file_set *fset, + loff_t lml_offset); +int presto_write_lml_close(struct rec_info *rec, + struct presto_file_set *fset, + struct file *file, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct presto_version *new_file_ver); +int presto_complete_lml(struct presto_file_set *fset); + +/* vfs.c */ +int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry, + struct iattr *iattr, struct lento_vfs_context *info); +int presto_do_create(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info); +int presto_do_link(struct presto_file_set *fset, struct dentry *dir, + struct dentry *old_dentry, struct dentry *new_dentry, + struct lento_vfs_context *info); +int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info); +int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, const char *name, + struct lento_vfs_context *info); +int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info); +int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info); +int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, dev_t dev, + struct lento_vfs_context *info); +int presto_do_rename(struct presto_file_set *fset, struct dentry *old_dir, + struct dentry *old_dentry, struct dentry *new_dir, + struct dentry *new_dentry, struct lento_vfs_context *info); +int presto_do_statfs (struct presto_file_set *fset, + struct statfs * buf); + +int lento_setattr(const char *name, struct iattr *iattr, + struct lento_vfs_context *info); +int lento_create(const char *name, int mode, struct lento_vfs_context *info); +int lento_link(const char *oldname, const char *newname, + struct lento_vfs_context *info); +int lento_unlink(const char *name, struct lento_vfs_context *info); +int lento_symlink(const char *oldname,const char *newname, + struct lento_vfs_context *info); +int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info); +int lento_rmdir(const char *name, struct lento_vfs_context *info); +int lento_mknod(const char *name, int mode, dev_t dev, + struct lento_vfs_context *info); +int lento_rename(const char *oldname, const char *newname, + struct lento_vfs_context *info); +int lento_iopen(const char *name, ino_t ino, unsigned int generation,int flags); +int lento_close(unsigned int fd, struct lento_vfs_context *info); + + +/* journal.c */ + +#define JOURNAL_PAGE_SZ PAGE_SIZE + +__inline__ int presto_no_journal(struct presto_file_set *fset); +int journal_fetch(int minor); +int presto_journal_write(struct rec_info *rec, struct presto_file_set *fset, + struct file *file); +int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *old_ver, + struct iattr *iattr); +int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_file_ver, int mode); +int presto_journal_link(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver); +int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *old_file_ver, int len, + const char *name); +int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, const char *target, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver); +int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_dir_ver, int mode); +int presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *old_dir_ver, int len, + const char *name); +int presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_node_ver, int mode, + int dmajor, int dminor); +int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *src_dir_ver, + struct presto_version *tgt_dir_ver); +int presto_journal_open(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, struct presto_version *old_ver); +int presto_journal_close(struct rec_info *rec, struct presto_file_set *fset, + struct file *file, + struct dentry *dentry, + struct presto_version *new_ver); +int presto_close_journal_file(struct presto_file_set *fset); +void presto_log_op(void *data, int len); +int presto_write_last_rcvd(struct rec_info *recinfo, + struct presto_file_set *fset, + struct lento_vfs_context *info); + +/* journal_ext3.c */ +struct ext3_journal_data { + struct file *jd_file; +}; +extern struct ext3_journal_data e3jd; + + + + +/* sysctl.c */ +int init_intermezzo_sysctl(void); +void cleanup_intermezzo_sysctl(void); + +/* ext_attr.c */ +#ifdef CONFIG_FS_EXT_ATTR +/* XXX: Borrowed from vfs.c. Once the ea patch is into CVS + * move this prototype -SHP + */ +int presto_do_set_ext_attr(struct presto_file_set *fset, + struct dentry *dentry, + const char *name, void *buffer, + size_t buffer_len, int flags, mode_t *mode, + struct lento_vfs_context *info); +int presto_set_ext_attr(struct inode *inode, + const char *name, void *buffer, + size_t buffer_len, int flags); +int lento_set_ext_attr(const char *path, const char *name, + void *buffer, size_t buffer_len, int flags, + mode_t mode, struct lento_vfs_context *info); +/* XXX: Borrowed from journal.c. Once the ea patch is into CVS + * move this prototype -SHP + */ +int presto_journal_set_ext_attr (struct rec_info *rec, + struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *ver, const char *name, + const char *buffer, int buffer_len, + int flags); +#endif + + +/* global variables */ +extern int presto_debug; +extern int presto_print_entry; + +#define PRESTO_DEBUG +#ifdef PRESTO_DEBUG +/* debugging masks */ +#define D_SUPER 1 /* print results returned by Venus */ +#define D_INODE 2 /* print entry and exit into procedure */ +#define D_FILE 4 +#define D_CACHE 8 /* cache debugging */ +#define D_MALLOC 16 /* print malloc, de-alloc information */ +#define D_JOURNAL 32 +#define D_UPCALL 64 /* up and downcall debugging */ +#define D_PSDEV 128 +#define D_PIOCTL 256 +#define D_SPECIAL 512 +#define D_TIMING 1024 +#define D_DOWNCALL 2048 +#define D_KML 4096 + +#define CDEBUG(mask, format, a...) \ + do { \ + if (presto_debug & mask) { \ + printk("(%s:%s,l. %d %d): ", __FILE__, __FUNCTION__, __LINE__, current->pid); \ + printk(format, ##a); } \ + } while (0) + +#define ENTRY \ + if(presto_print_entry) \ + printk("Process %d entered %s\n", current->pid, __FUNCTION__) + +#define EXIT \ + if(presto_print_entry) \ + printk("Process %d leaving %s at %d\n", current->pid, \ + __FUNCTION__,__LINE__) + +extern long presto_kmemory; +extern long presto_vmemory; + +#define presto_kmem_inc(ptr, size) presto_kmemory += (size) +#define presto_kmem_dec(ptr, size) presto_kmemory -= (size) +#define presto_vmem_inc(ptr, size) presto_vmemory += (size) +#define presto_vmem_dec(ptr, size) presto_vmemory -= (size) +#else /* !PRESTO_DEBUG */ +#define CDEBUG(mask, format, a...) do {} while (0) +#define ENTRY do {} while (0) +#define EXIT do {} while (0) +#define presto_kmem_inc(ptr, size) do {} while (0) +#define presto_kmem_dec(ptr, size) do {} while (0) +#define presto_vmem_inc(ptr, size) do {} while (0) +#define presto_vmem_dec(ptr, size) do {} while (0) +#endif /* PRESTO_DEBUG */ + + +#define PRESTO_ALLOC(ptr, cast, size) \ +do { \ + if (size <= 4096) { \ + ptr = (cast)kmalloc((unsigned long) size, GFP_KERNEL); \ + CDEBUG(D_MALLOC, "kmalloced: %ld at %p.\n", (long)size, ptr); \ + presto_kmem_inc(ptr, size); \ + } else { \ + ptr = (cast)vmalloc((unsigned long) size); \ + CDEBUG(D_MALLOC, "vmalloced: %ld at %p.\n", (long)size, ptr); \ + presto_vmem_inc(ptr, size); \ + } \ + if ((ptr) == 0) \ + printk("PRESTO: out of memory at %s:%d\n", __FILE__, __LINE__); \ + else \ + memset( ptr, 0, size ); \ +} while (0) + + + +#define PRESTO_FREE(ptr,size) \ +do { \ + if (!ptr) { \ + printk("PRESTO: free NULL pointer (%ld bytes) at %s:%d\n", \ + (long)size, __FILE__, __LINE__); \ + break; \ + } \ + if (size <= 4096) { \ + CDEBUG(D_MALLOC, "kfreed: %ld at %p.\n", (long)size, ptr); \ + presto_kmem_dec(ptr, size); \ + kfree((ptr)); \ + } else { \ + CDEBUG(D_MALLOC, "vfreed: %ld at %p.\n", (long)size, ptr); \ + presto_vmem_dec(ptr, size); \ + vfree((ptr)); \ + } \ +} while (0) + +#define MYPATHLEN(buffer,path) (buffer + PAGE_SIZE - path - 1) + +#else /* __KERNEL__ */ +#include <asm/types.h> +#include <sys/ioctl.h> +struct lento_vfs_context { + __u32 slot_offset; + __u32 recno; + __u64 kml_offset; + __u32 flags; + __u32 updated_time; +}; +#endif /* __KERNEL__*/ + + +/* marking flags for fsets */ +#define FSET_CLIENT_RO 0x00000001 +#define FSET_LENTO_RO 0x00000002 +#define FSET_HASPERMIT 0x00000004 /* we have a permit to WB */ +#define FSET_INSYNC 0x00000008 /* this fileset is in sync */ +#define FSET_PERMIT_WAITING 0x00000010 /* Lento is waiting for permit */ +#define FSET_STEAL_PERMIT 0x00000020 /* take permit if Lento is dead */ +#define FSET_JCLOSE_ON_WRITE 0x00000040 /* Journal closes on writes */ + + +/* what to mark indicator (ioctl parameter) */ +#define MARK_DENTRY 101 +#define MARK_FSET 102 +#define MARK_CACHE 103 +#define MARK_GETFL 104 + + + +struct readmount { + int io_len; /* this is IN & OUT: true length of str is returned */ + char *io_string; +}; + +/* modeled after setsockopt */ +/* so if you have no /proc, oh well. */ +/* for now it's all ints. We may grow this later for non-ints. */ +struct psdev_opt { + int optname; + int optval; +}; + +struct lento_input { + char *name; + struct lento_vfs_context info; +}; + +struct lento_input_attr { + char *name; +#if BITS_PER_LONG < 64 + __u32 dummy; /* XXX on 64-bit platforms, this is not needed */ +#endif + __u32 valid; + __u32 mode; + __u32 uid; + __u32 gid; + __u64 size; + __s64 atime; + __s64 mtime; + __s64 ctime; + __u32 attr_flags; + struct lento_vfs_context info; +}; + +struct lento_input_mode { + char *name; + __u32 mode; + struct lento_vfs_context info; +}; + +struct lento_input_old_new { + char *oldname; + char *newname; + struct lento_vfs_context info; +}; + +struct lento_input_dev { + char *name; + __u32 mode; + __u32 major; + __u32 minor; + struct lento_vfs_context info; +}; + +struct lento_input_iopen { + char *name; +#if BITS_PER_LONG < 64 + __u32 dummy; /* XXX on 64-bit platforms, this is not needed */ +#endif + __u64 ino; + __u32 generation; + __u32 flags; + __s32 fd; +}; + +struct lento_input_close { + __u32 fd; + struct lento_vfs_context info; +}; + +/* XXX: check for alignment */ +struct lento_input_ext_attr { + char *path; + char *name; + __u32 name_len; + char *buffer; + __u32 buffer_len; + __u32 flags; + __u32 mode; + struct lento_vfs_context info; +}; + +/* XXX should PRESTO_GET_* actually be of type _IOR, since we are reading? */ +#define PRESTO_GETMOUNT _IOW ('p',0x03, struct readmount *) +#define PRESTO_SETPID _IOW ('p',0x04, struct readmount *) +#define PRESTO_CLOSE_JOURNALF _IOW ('p',0x06, struct readmount *) +#define PRESTO_SET_FSETROOT _IOW ('p',0x07, struct readmount *) +#define PRESTO_CLEAR_FSETROOT _IOW ('p',0x08, struct readmount *) +#define PRESTO_SETOPT _IOW ('p',0x09, struct psdev_opt *) +#define PRESTO_GETOPT _IOW ('p',0x0a, struct psdev_opt *) +#define PRESTO_GET_KMLSIZE _IOW ('p',0x0b, struct psdev_opt *) +#define PRESTO_GET_RECNO _IOW ('p',0x0c, struct psdev_opt *) +#define PRESTO_VFS_SETATTR _IOW ('p',0x10, struct lento_input_attr *) +#define PRESTO_VFS_CREATE _IOW ('p',0x11, struct lento_input_mode *) +#define PRESTO_VFS_LINK _IOW ('p',0x12, struct lento_input_old_new *) +#define PRESTO_VFS_UNLINK _IOW ('p',0x13, struct lento_input *) +#define PRESTO_VFS_SYMLINK _IOW ('p',0x14, struct lento_input_old_new *) +#define PRESTO_VFS_MKDIR _IOW ('p',0x15, struct lento_input_mode *) +#define PRESTO_VFS_RMDIR _IOW ('p',0x16, struct lento_input *) +#define PRESTO_VFS_MKNOD _IOW ('p',0x17, struct lento_input_dev *) +#define PRESTO_VFS_RENAME _IOW ('p',0x18, struct lento_input_old_new *) +#define PRESTO_VFS_CLOSE _IOW ('p',0x1a, struct lento_input_close *) +#define PRESTO_VFS_IOPEN _IOW ('p',0x1b, struct lento_input_iopen *) +#define PRESTO_VFS_SETEXTATTR _IOW ('p',0x1c, struct lento_input_ext_attr *) +#define PRESTO_VFS_DELEXTATTR _IOW ('p',0x1d, struct lento_input_ext_attr *) + +#define PRESTO_MARK _IOW ('p',0x20, struct lento_input_open *) +#define PRESTO_RELEASE_PERMIT _IOW ('p',0x21, struct lento_input_open *) +#define PRESTO_CLEAR_ALL_FSETROOTS _IOW ('p',0x22, struct readmount *) +#define PRESTO_BACKFETCH_LML _IOW ('p',0x23, struct readmount *) +#define PRESTO_REINT _IOW ('p',0x24, struct readmount *) +#define PRESTO_CANCEL_LML _IOW ('p',0x25, struct readmount *) +#define PRESTO_RESET_FSET _IOW ('p',0x26, struct readmount *) +#define PRESTO_COMPLETE_CLOSES _IOW ('p',0x27, struct readmount *) + +#define PRESTO_REINT_BEGIN _IOW ('p',0x30, struct readmount *) +#define PRESTO_DO_REINT _IOW ('p',0x31, struct readmount *) +#define PRESTO_REINT_END _IOW ('p',0x32, struct readmount *) + +#endif diff --git a/include/linux/intermezzo_journal.h b/include/linux/intermezzo_journal.h new file mode 100644 index 000000000000..75ec0977e33d --- /dev/null +++ b/include/linux/intermezzo_journal.h @@ -0,0 +1,26 @@ +#ifndef __PRESTO_JOURNAL_H +#define __PRESTO_JOURNAL_H + + +#include <linux/version.h> + +struct journal_prefix { + int len; + u32 version; + int pid; + int uid; + int fsuid; + int fsgid; + int opcode; + u32 ngroups; + u32 groups[0]; +}; + +struct journal_suffix { + unsigned long prevrec; /* offset of previous record for dentry */ + int recno; + int time; + int len; +}; + +#endif diff --git a/include/linux/intermezzo_kml.h b/include/linux/intermezzo_kml.h new file mode 100644 index 000000000000..da11b5dcabdc --- /dev/null +++ b/include/linux/intermezzo_kml.h @@ -0,0 +1,261 @@ +#ifndef __INTERMEZZO_KML_H +#define __INTERMEZZO_KML_H + +#include <linux/version.h> +#include <linux/intermezzo_psdev.h> +#include <linux/fs.h> +#include <linux/intermezzo_journal.h> + +#define PRESTO_KML_MAJOR_VERSION 0x00010000 +#define PRESTO_KML_MINOR_VERSION 0x00002001 +#define PRESTO_OP_NOOP 0 +#define PRESTO_OP_CREATE 1 +#define PRESTO_OP_MKDIR 2 +#define PRESTO_OP_UNLINK 3 +#define PRESTO_OP_RMDIR 4 +#define PRESTO_OP_CLOSE 5 +#define PRESTO_OP_SYMLINK 6 +#define PRESTO_OP_RENAME 7 +#define PRESTO_OP_SETATTR 8 +#define PRESTO_OP_LINK 9 +#define PRESTO_OP_OPEN 10 +#define PRESTO_OP_MKNOD 11 +#define PRESTO_OP_WRITE 12 +#define PRESTO_OP_RELEASE 13 +#define PRESTO_OP_TRUNC 14 +#define PRESTO_OP_SETEXTATTR 15 +#define PRESTO_OP_DELEXTATTR 16 + +#define PRESTO_LML_DONE 1 /* flag to get first write to do LML */ +#define KML_KOP_MARK 0xffff + +struct presto_lml_data { + loff_t rec_offset; +}; + +struct big_journal_prefix { + u32 len; + u32 version; + u32 pid; + u32 uid; + u32 fsuid; + u32 fsgid; + u32 opcode; + u32 ngroups; + u32 groups[NGROUPS_MAX]; +}; + +enum kml_opcode { + KML_CREATE = 1, + KML_MKDIR, + KML_UNLINK, + KML_RMDIR, + KML_CLOSE, + KML_SYMLINK, + KML_RENAME, + KML_SETATTR, + KML_LINK, + KML_OPEN, + KML_MKNOD, + KML_ENDMARK = 0xff +}; + +struct kml_create { + char *path; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int mode; + int uid; + int gid; +}; + +struct kml_open { +}; + +struct kml_mkdir { + char *path; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int mode; + int uid; + int gid; +}; + +struct kml_unlink { + char *path, + *name; + struct presto_version old_tgtv, + old_parentv, + new_parentv; +}; + +struct kml_rmdir { + char *path, + *name; + struct presto_version old_tgtv, + old_parentv, + new_parentv; +}; + +struct kml_close { + int open_mode, + open_uid, + open_gid; + char *path; + struct presto_version new_objectv; + __u64 ino; + int generation; +}; + +struct kml_symlink { + char *sourcepath, + *targetpath; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int uid; + int gid; +}; + +struct kml_rename { + char *sourcepath, + *targetpath; + struct presto_version old_objectv, + new_objectv, + old_tgtv, + new_tgtv; +}; + +struct kml_setattr { + char *path; + struct presto_version old_objectv; + struct iattr iattr; +}; + +struct kml_link { + char *sourcepath, + *targetpath; + struct presto_version new_objectv, + old_parentv, + new_parentv; +}; + +struct kml_mknod { + char *path; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int mode; + int uid; + int gid; + int major; + int minor; +}; + +/* kml record items for optimizing */ +struct kml_kop_node +{ + u32 kml_recno; + u32 kml_flag; + u32 kml_op; + nlink_t i_nlink; + u32 i_ino; +}; + +struct kml_kop_lnode +{ + struct list_head chains; + struct kml_kop_node node; +}; + +struct kml_endmark { + u32 total; + struct kml_kop_node *kop; +}; + +/* kml_flag */ +#define KML_REC_DELETE 1 +#define KML_REC_EXIST 0 + +struct kml_optimize { + struct list_head kml_chains; + u32 kml_flag; + u32 kml_op; + nlink_t i_nlink; + u32 i_ino; +}; + +struct kml_rec { + /* attribute of this record */ + int rec_size; + int rec_kml_offset; + + struct big_journal_prefix rec_head; + union { + struct kml_create create; + struct kml_open open; + struct kml_mkdir mkdir; + struct kml_unlink unlink; + struct kml_rmdir rmdir; + struct kml_close close; + struct kml_symlink symlink; + struct kml_rename rename; + struct kml_setattr setattr; + struct kml_mknod mknod; + struct kml_link link; + struct kml_endmark endmark; + } rec_kml; + struct journal_suffix rec_tail; + + /* for kml optimize only */ + struct kml_optimize kml_optimize; +}; + +/* kml record items for optimizing */ +extern void kml_kop_init (struct presto_file_set *fset); +extern void kml_kop_addrec (struct presto_file_set *fset, + struct inode *ino, u32 op, u32 flag); +extern int kml_kop_flush (struct presto_file_set *fset); + +/* defined in kml_setup.c */ +extern int kml_init (struct presto_file_set *fset); +extern int kml_cleanup (struct presto_file_set *fset); + +/* defined in kml.c */ +extern int begin_kml_reint (struct file *file, unsigned long arg); +extern int do_kml_reint (struct file *file, unsigned long arg); +extern int end_kml_reint (struct file *file, unsigned long arg); + +/* kml_utils.c */ +extern char *dlogit (void *tbuf, const void *sbuf, int size); +extern char * bdup_printf (char *format, ...); + +/* defined in kml_decode.c */ +/* printop */ +#define PRINT_KML_PREFIX 0x1 +#define PRINT_KML_SUFFIX 0x2 +#define PRINT_KML_REC 0x4 +#define PRINT_KML_OPTIMIZE 0x8 +#define PRINT_KML_EXIST 0x10 +#define PRINT_KML_DELETE 0x20 +extern void kml_printrec (struct kml_rec *rec, int printop); +extern int print_allkmlrec (struct list_head *head, int printop); +extern int delete_kmlrec (struct list_head *head); +extern int kml_decoderec (char *buf, int pos, int buflen, int *size, + struct kml_rec **newrec); +extern int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen); +extern void kml_freerec (struct kml_rec *rec); + +/* defined in kml_reint.c */ +#define KML_CLOSE_BACKFETCH 1 +extern int kml_reintbuf (struct kml_fsdata *kml_fsdata, + char *mtpt, struct kml_rec **rec); + +/* defined in kml_setup.c */ +extern int kml_init (struct presto_file_set *fset); +extern int kml_cleanup (struct presto_file_set *fset); + +#endif + diff --git a/include/linux/intermezzo_psdev.h b/include/linux/intermezzo_psdev.h new file mode 100644 index 000000000000..9b1e557bdd7a --- /dev/null +++ b/include/linux/intermezzo_psdev.h @@ -0,0 +1,72 @@ +#ifndef __PRESTO_PSDEV_H +#define __PRESTO_PSDEV_H + +#ifdef PRESTO_DEVEL +# define PRESTO_FS_NAME "izofs" +# define PRESTO_PSDEV_NAME "/dev/izo" +# define PRESTO_PSDEV_MAJOR 186 +#else +# define PRESTO_FS_NAME "InterMezzo" +# define PRESTO_PSDEV_NAME "/dev/intermezzo" +# define PRESTO_PSDEV_MAJOR 185 +#endif + +#define MAX_PRESTODEV 16 + +#include <linux/version.h> + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) +#define wait_queue_head_t struct wait_queue * +#define DECLARE_WAITQUEUE(name,task) \ + struct wait_queue name = { task, NULL } +#define init_waitqueue_head(arg) +#else +#ifndef __initfunc +#define __initfunc(arg) arg +#endif +#endif + + +/* represents state of a /dev/presto */ +/* communication pending & processing queues */ +struct upc_comm { + unsigned int uc_seq; + wait_queue_head_t uc_waitq; /* Lento wait queue */ + struct list_head uc_pending; + struct list_head uc_processing; + int uc_pid; /* Lento's pid */ + int uc_hard; /* allows signals during upcalls */ + int uc_no_filter; + int uc_no_journal; + int uc_no_upcall; + int uc_timeout; /* . sec: signals will dequeue upc */ + long uc_errorval; /* for testing I/O failures */ + struct list_head uc_cache_list; + int uc_minor; + char * uc_devname; +}; + +#define ISLENTO(minor) (current->pid == upc_comms[minor].uc_pid \ + || current->p_pptr->pid == upc_comms[minor].uc_pid) + +extern struct upc_comm upc_comms[MAX_PRESTODEV]; + +/* messages between presto filesystem in kernel and Venus */ +#define REQ_READ 1 +#define REQ_WRITE 2 +#define REQ_ASYNC 4 +#define REQ_DEAD 8 + +struct upc_req { + struct list_head rq_chain; + caddr_t rq_data; + u_short rq_flags; + u_short rq_bufsize; + u_short rq_rep_size; + u_short rq_opcode; /* copied from data to save lookup */ + int rq_unique; + wait_queue_head_t rq_sleep; /* process' wait queue */ + unsigned long rq_posttime; +}; + +#endif diff --git a/include/linux/intermezzo_upcall.h b/include/linux/intermezzo_upcall.h new file mode 100644 index 000000000000..0b3e6ff74e3a --- /dev/null +++ b/include/linux/intermezzo_upcall.h @@ -0,0 +1,146 @@ +/* + * Based on cfs.h from Coda, but revamped for increased simplicity. + * Linux modifications by Peter Braam, Aug 1996 + * Rewritten for InterMezzo + */ + +#ifndef _PRESTO_HEADER_ +#define _PRESTO_HEADER_ + + +/* upcall.c */ +#define SYNCHRONOUS 0 +#define ASYNCHRONOUS 1 + +int lento_permit(int minor, int pathlen, int fsetnamelen, char *path, char *fset); +int lento_opendir(int minor, int pathlen, char *path, int async); +int lento_kml(int minor, unsigned int offset, unsigned int first_recno, + unsigned int length, unsigned int last_recno, int namelen, + char *fsetname); +int lento_open(int minor, int pathlen, char *path); +int lento_journal(int minor, char *page, int async); +int lento_release_permit(int minor, int cookie); + +/* + * Kernel <--> Lento communications. + */ +/* upcalls */ +#define LENTO_PERMIT 1 +#define LENTO_JOURNAL 2 +#define LENTO_OPENDIR 3 +#define LENTO_OPEN 4 +#define LENTO_SIGNAL 5 +#define LENTO_KML 6 +#define LENTO_COOKIE 7 + +/* Lento <-> Presto RPC arguments */ +struct lento_up_hdr { + unsigned int opcode; + unsigned int unique; /* Keep multiple outstanding msgs distinct */ + u_short pid; /* Common to all */ + u_short uid; +}; + +/* This structure _must_ sit at the beginning of the buffer */ +struct lento_down_hdr { + unsigned int opcode; + unsigned int unique; + unsigned int result; +}; + +/* lento_permit: */ +struct lento_permit_in { + struct lento_up_hdr uh; + int pathlen; + int fsetnamelen; + char path[0]; +}; +struct lento_permit_out { + struct lento_down_hdr dh; +}; + + +/* lento_opendir: */ +struct lento_opendir_in { + struct lento_up_hdr uh; + int async; + int pathlen; + char path[0]; +}; +struct lento_opendir_out { + struct lento_down_hdr dh; +}; + + +/* lento_kml: */ +struct lento_kml_in { + struct lento_up_hdr uh; + unsigned int offset; + unsigned int first_recno; + unsigned int length; + unsigned int last_recno; + int namelen; + char fsetname[0]; +}; + +struct lento_kml_out { + struct lento_down_hdr dh; +}; + + +/* lento_open: */ +struct lento_open_in { + struct lento_up_hdr uh; + int pathlen; + char path[0]; +}; +struct lento_open_out { + struct lento_down_hdr dh; +}; + +/* lento_response_cookie */ +struct lento_response_cookie_in { + struct lento_up_hdr uh; + int cookie; +}; + +struct lento_response_cookie_out { + struct lento_down_hdr dh; +}; + + +struct lento_mknod { + struct lento_down_hdr dh; + int major; + int minor; + int mode; + char path[0]; +}; + + +/* NB: every struct below begins with an up_hdr */ +union up_args { + struct lento_up_hdr uh; + struct lento_permit_in lento_permit; + struct lento_open_in lento_open; + struct lento_opendir_in lento_opendir; + struct lento_kml_in lento_kml; + struct lento_response_cookie_in lento_response_cookie; +}; + +union down_args { + struct lento_down_hdr dh; + struct lento_permit_out lento_permit; + struct lento_open_out lento_open; + struct lento_opendir_out lento_opendir; + struct lento_kml_out lento_kml; + struct lento_response_cookie_out lento_response_cookie; +}; + +#include "intermezzo_psdev.h" + +int lento_upcall(int minor, int read_size, int *rep_size, + union up_args *buffer, int async, + struct upc_req *rq ); +#endif + diff --git a/include/linux/lvm.h b/include/linux/lvm.h index b1bc98c045a9..b3e68a6db266 100644 --- a/include/linux/lvm.h +++ b/include/linux/lvm.h @@ -52,6 +52,7 @@ * 08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit * 01/01/2000 - extended lv_v2 core structure by wait_queue member * 12/02/2000 - integrated Andrea Arcagnelli's snapshot work + * 14/02/2001 - changed LVM_SNAPSHOT_MIN_CHUNK to 1 page * 18/02/2000 - seperated user and kernel space parts by * #ifdef them with __KERNEL__ * 08/03/2000 - implemented cluster/shared bits for vg_access @@ -60,6 +61,11 @@ * 12/11/2000 - removed unneeded timestamp definitions * 24/12/2000 - removed LVM_TO_{CORE,DISK}*, use cpu_{from, to}_le* * instead - Christoph Hellwig + * 01/03/2001 - Rename VG_CREATE to VG_CREATE_OLD and add new VG_CREATE + * 08/03/2001 - new lv_t (in core) version number 5: changed page member + * to (struct kiobuf *) to use for COW exception table io + * 23/03/2001 - Change a (presumably) mistyped pv_t* to an lv_t* + * 26/03/2001 - changed lv_v4 to lv_v5 in structure definition [HM] * */ @@ -67,9 +73,11 @@ #ifndef _LVM_H_INCLUDE #define _LVM_H_INCLUDE -#define _LVM_KERNEL_H_VERSION "LVM 0.9.1_beta2 (18/01/2001)" +#define LVM_RELEASE_NAME "1.0.1-rc4(ish)" +#define LVM_RELEASE_DATE "03/10/2001" + +#define _LVM_KERNEL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")" -#include <linux/config.h> #include <linux/version.h> /* @@ -127,24 +135,11 @@ #define SECTOR_SIZE 512 #endif -#define LVM_STRUCT_VERSION 1 /* structure version */ +/* structure version */ +#define LVM_STRUCT_VERSION 1 #define LVM_DIR_PREFIX "/dev/" -/* set the default structure version */ -#if ( LVM_STRUCT_VERSION == 1) -#define pv_t pv_v2_t -#define lv_t lv_v4_t -#define vg_t vg_v3_t -#define pv_disk_t pv_disk_v2_t -#define lv_disk_t lv_disk_v3_t -#define vg_disk_t vg_disk_v2_t -#define lv_block_exception_t lv_block_exception_v1_t -#define lv_COW_table_disk_t lv_COW_table_disk_v1_t -#endif - - - /* * i/o protocol version * @@ -194,67 +189,6 @@ /* - * VGDA: default disk spaces and offsets - * - * there's space after the structures for later extensions. - * - * offset what size - * --------------- ---------------------------------- ------------ - * 0 physical volume structure ~500 byte - * - * 1K volume group structure ~200 byte - * - * 6K namelist of physical volumes 128 byte each - * - * 6k + n * ~300byte n logical volume structures ~300 byte each - * - * + m * 4byte m physical extent alloc. structs 4 byte each - * - * End of disk - first physical extent typically 4 megabyte - * PE total * - * PE size - * - * - */ - -/* DONT TOUCH THESE !!! */ -/* base of PV structure in disk partition */ -#define LVM_PV_DISK_BASE 0L - -/* size reserved for PV structure on disk */ -#define LVM_PV_DISK_SIZE 1024L - -/* base of VG structure in disk partition */ -#define LVM_VG_DISK_BASE LVM_PV_DISK_SIZE - -/* size reserved for VG structure */ -#define LVM_VG_DISK_SIZE ( 9 * 512L) - -/* size reserved for timekeeping */ -#define LVM_TIMESTAMP_DISK_BASE ( LVM_VG_DISK_BASE + LVM_VG_DISK_SIZE) -#define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */ - -/* name list of physical volumes on disk */ -#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \ - LVM_TIMESTAMP_DISK_SIZE) - -/* now for the dynamically calculated parts of the VGDA */ -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \ - sizeof ( lv_disk_t) * b) -#define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \ - (pv)->pe_on_disk.size) -#define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \ - ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE)) -#define LVM_PE_ON_DISK_BASE(pv) \ - { int rest; \ - pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \ - if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \ - pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \ - } -/* END default disk spaces and offsets for PVs */ - - -/* * LVM_PE_T_MAX corresponds to: * * 8KB PE size can map a ~512 MB logical volume at the cost of 1MB memory, @@ -283,9 +217,8 @@ #define LVM_MAX_STRIPES 128 /* max # of stripes */ #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ #define LVM_MAX_MIRRORS 2 /* future use */ -#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */ -#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */ -#define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */ +#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */ +#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */ #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */ #define LVM_PARTITION 0xfe /* LVM partition id */ #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */ @@ -296,28 +229,15 @@ #define LVM_SNAPSHOT_MIN_CHUNK (PAGE_SIZE/1024) /* 4 or 8 KB */ #define UNDEF -1 -#define FALSE 0 -#define TRUE 1 - - -#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \ - vg->pe_size / lv->lv_chunk_size) - -#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \ -{ \ - int COW_table_entries_per_PE; \ - int COW_table_chunks_per_PE; \ -\ - COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \ - COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \ - COW_table_entries_per_PE - COW_table_chunks_per_PE;}) - /* * ioctls + * FIXME: the last parameter to _IO{W,R,WR} is a data type. The macro will + * expand this using sizeof(), so putting "1" there is misleading + * because sizeof(1) = sizeof(int) = sizeof(2) = 4 on a 32-bit machine! */ /* volume group */ -#define VG_CREATE _IOW ( 0xfe, 0x00, 1) +#define VG_CREATE_OLD _IOW ( 0xfe, 0x00, 1) #define VG_REMOVE _IOW ( 0xfe, 0x01, 1) #define VG_EXTEND _IOW ( 0xfe, 0x03, 1) @@ -330,6 +250,8 @@ #define VG_SET_EXTENDABLE _IOW ( 0xfe, 0x08, 1) #define VG_RENAME _IOW ( 0xfe, 0x09, 1) +/* Since 0.9beta6 */ +#define VG_CREATE _IOW ( 0xfe, 0x0a, 1) /* logical volume */ #define LV_CREATE _IOW ( 0xfe, 0x20, 1) @@ -412,6 +334,9 @@ #define PV_ALLOCATABLE 0x02 /* pv_allocatable */ +/* misc */ +#define LVM_SNAPSHOT_DROPPED_SECTOR 1 + /* * Structure definitions core/disk follow * @@ -424,21 +349,21 @@ #define UUID_LEN 32 /* don't change!!! */ /* copy on write tables in disk format */ -typedef struct { +typedef struct lv_COW_table_disk_v1 { uint64_t pv_org_number; uint64_t pv_org_rsector; uint64_t pv_snap_number; uint64_t pv_snap_rsector; -} lv_COW_table_disk_v1_t; +} lv_COW_table_disk_t; /* remap physical sector/rdev pairs including hash */ -typedef struct { +typedef struct lv_block_exception_v1 { struct list_head hash; - ulong rsector_org; - kdev_t rdev_org; - ulong rsector_new; - kdev_t rdev_new; -} lv_block_exception_v1_t; + uint32_t rsector_org; + kdev_t rdev_org; + uint32_t rsector_new; + kdev_t rdev_new; +} lv_block_exception_t; /* disk stored pe information */ typedef struct { @@ -454,37 +379,11 @@ typedef struct { /* - * Structure Physical Volume (PV) Version 1 + * physical volume structures */ /* core */ -typedef struct { - char id[2]; /* Identifier */ - unsigned short version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - char pv_name[NAME_LEN]; - char vg_name[NAME_LEN]; - char system_id[NAME_LEN]; /* for vgexport/vgimport */ - kdev_t pv_dev; - uint pv_number; - uint pv_status; - uint pv_allocatable; - uint pv_size; /* HM */ - uint lv_cur; - uint pe_size; - uint pe_total; - uint pe_allocated; - uint pe_stale; /* for future use */ - pe_disk_t *pe; /* HM */ - struct inode *inode; /* HM */ -} pv_v1_t; - -/* core */ -typedef struct { +typedef struct pv_v2 { char id[2]; /* Identifier */ unsigned short version; /* HM lvm version */ lvm_disk_data_t pv_on_disk; @@ -506,36 +405,17 @@ typedef struct { uint pe_allocated; uint pe_stale; /* for future use */ pe_disk_t *pe; /* HM */ - struct inode *inode; /* HM */ + struct block_device *bd; char pv_uuid[UUID_LEN+1]; -} pv_v2_t; +#ifndef __KERNEL__ + uint32_t pe_start; /* in sectors */ +#endif +} pv_t; -/* disk */ -typedef struct { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - uint32_t pv_major; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; -} pv_disk_v1_t; /* disk */ -typedef struct { +typedef struct pv_disk_v2 { uint8_t id[2]; /* Identifier */ uint16_t version; /* HM lvm version */ lvm_disk_data_t pv_on_disk; @@ -555,7 +435,11 @@ typedef struct { uint32_t pe_size; uint32_t pe_total; uint32_t pe_allocated; -} pv_disk_v2_t; + + /* new in struct version 2 */ + uint32_t pe_start; /* in sectors */ + +} pv_disk_t; /* @@ -565,17 +449,17 @@ typedef struct { /* core PE information */ typedef struct { kdev_t dev; - ulong pe; /* to be changed if > 2TB */ - ulong reads; - ulong writes; + uint32_t pe; /* to be changed if > 2TB */ + uint32_t reads; + uint32_t writes; } pe_t; typedef struct { char lv_name[NAME_LEN]; kdev_t old_dev; kdev_t new_dev; - ulong old_pe; - ulong new_pe; + uint32_t old_pe; + uint32_t new_pe; } le_remap_req_t; typedef struct lv_bmap { @@ -588,7 +472,7 @@ typedef struct lv_bmap { */ /* core */ -typedef struct lv_v4 { +typedef struct lv_v5 { char lv_name[NAME_LEN]; char vg_name[NAME_LEN]; uint lv_access; @@ -611,9 +495,9 @@ typedef struct lv_v4 { uint lv_read_ahead; /* delta to version 1 starts here */ - struct lv_v4 *lv_snapshot_org; - struct lv_v4 *lv_snapshot_prev; - struct lv_v4 *lv_snapshot_next; + struct lv_v5 *lv_snapshot_org; + struct lv_v5 *lv_snapshot_prev; + struct lv_v5 *lv_snapshot_next; lv_block_exception_t *lv_block_exception; uint lv_remap_ptr; uint lv_remap_end; @@ -621,23 +505,23 @@ typedef struct lv_v4 { uint lv_snapshot_minor; #ifdef __KERNEL__ struct kiobuf *lv_iobuf; - struct semaphore lv_snapshot_sem; + struct kiobuf *lv_COW_table_iobuf; + struct rw_semaphore lv_lock; struct list_head *lv_snapshot_hash_table; - ulong lv_snapshot_hash_table_size; - ulong lv_snapshot_hash_mask; - struct page *lv_COW_table_page; + uint32_t lv_snapshot_hash_table_size; + uint32_t lv_snapshot_hash_mask; wait_queue_head_t lv_snapshot_wait; int lv_snapshot_use_rate; - void *vg; + struct vg_v3 *vg; uint lv_allocated_snapshot_le; #else char dummy[200]; #endif -} lv_v4_t; +} lv_t; /* disk */ -typedef struct { +typedef struct lv_disk_v3 { uint8_t lv_name[NAME_LEN]; uint8_t vg_name[NAME_LEN]; uint32_t lv_access; @@ -659,36 +543,14 @@ typedef struct { uint32_t lv_allocation; uint32_t lv_io_timeout; /* for future use */ uint32_t lv_read_ahead; /* HM */ -} lv_disk_v3_t; +} lv_disk_t; /* * Structure Volume Group (VG) Version 1 */ /* core */ -typedef struct { - char vg_name[NAME_LEN]; /* volume group name */ - uint vg_number; /* volume group number */ - uint vg_access; /* read/write */ - uint vg_status; /* active or not */ - uint lv_max; /* maximum logical volumes */ - uint lv_cur; /* current logical volumes */ - uint lv_open; /* open logical volumes */ - uint pv_max; /* maximum physical volumes */ - uint pv_cur; /* current physical volumes FU */ - uint pv_act; /* active physical volumes */ - uint dummy; /* was obsolete max_pe_per_pv */ - uint vgda; /* volume group descriptor arrays FU */ - uint pe_size; /* physical extent size in sectors */ - uint pe_total; /* total of physical extents */ - uint pe_allocated; /* allocated physical extents */ - uint pvg_total; /* physical volume groups FU */ - struct proc_dir_entry *proc; - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ -} vg_v1_t; - -typedef struct { +typedef struct vg_v3 { char vg_name[NAME_LEN]; /* volume group name */ uint vg_number; /* volume group number */ uint vg_access; /* read/write */ @@ -716,30 +578,11 @@ typedef struct { #else char dummy1[200]; #endif -} vg_v3_t; +} vg_t; /* disk */ -typedef struct { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* active physical volumes */ - uint32_t dummy; - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ -} vg_disk_v1_t; - -typedef struct { +typedef struct vg_disk_v2 { uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */ uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */ uint32_t vg_number; /* volume group number */ @@ -757,7 +600,7 @@ typedef struct { uint32_t pe_total; /* total of physical extents */ uint32_t pe_allocated; /* allocated physical extents */ uint32_t pvg_total; /* physical volume groups FU */ -} vg_disk_v2_t; +} vg_disk_t; /* @@ -785,7 +628,7 @@ typedef struct { struct { kdev_t lv_dev; kdev_t pv_dev; - ulong pv_offset; + uint32_t pv_offset; } data; } pe_lock_req_t; @@ -798,7 +641,7 @@ typedef struct { /* Request structure LV_STATUS_BYINDEX */ typedef struct { - ulong lv_index; + uint32_t lv_index; lv_t *lv; /* Transfer size because user space and kernel space differ */ ushort size; @@ -807,7 +650,7 @@ typedef struct { /* Request structure LV_STATUS_BYDEV... */ typedef struct { dev_t dev; - pv_t *lv; + lv_t *lv; } lv_status_bydev_req_t; @@ -817,4 +660,37 @@ typedef struct { int rate; } lv_snapshot_use_rate_req_t; + +/* useful inlines */ +static inline ulong round_up(ulong n, ulong size) { + size--; + return (n + size) & ~size; +} + +static inline ulong div_up(ulong n, ulong size) { + return round_up(n, size) / size; +} + +static int inline LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg_t *vg, lv_t *lv) { + return vg->pe_size / lv->lv_chunk_size; +} + +static int inline LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg_t *vg, lv_t *lv) { + ulong chunks = vg->pe_size / lv->lv_chunk_size; + ulong entry_size = sizeof(lv_COW_table_disk_t); + ulong chunk_size = lv->lv_chunk_size * SECTOR_SIZE; + ulong entries = (vg->pe_size * SECTOR_SIZE) / + (entry_size + chunk_size); + + if(chunks < 2) + return 0; + + for(; entries; entries--) + if((div_up(entries * entry_size, chunk_size) + entries) <= + chunks) + break; + + return entries; +} + #endif /* #ifndef _LVM_H_INCLUDE */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 171bf9e410d1..7737d585b0a2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -43,7 +43,8 @@ extern struct list_head inactive_list; struct vm_area_struct { struct mm_struct * vm_mm; /* The address space we belong to. */ unsigned long vm_start; /* Our start address within vm_mm. */ - unsigned long vm_end; /* Our end address within vm_mm. */ + unsigned long vm_end; /* The first byte after our end address + within vm_mm. */ /* linked list of VM areas per task, sorted by address */ struct vm_area_struct *vm_next; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h new file mode 100644 index 000000000000..1cf66cd69e6c --- /dev/null +++ b/include/linux/seq_file.h @@ -0,0 +1,55 @@ +#ifndef _LINUX_SEQ_FILE_H +#define _LINUX_SEQ_FILE_H +#ifdef __KERNEL__ + +struct seq_operations; + +struct seq_file { + char *buf; + size_t size; + size_t from; + size_t count; + loff_t index; + struct semaphore sem; + struct seq_operations *op; +}; + +struct seq_operations { + void * (*start) (struct seq_file *m, loff_t *pos); + void (*stop) (struct seq_file *m, void *v); + void * (*next) (struct seq_file *m, void *v, loff_t *pos); + int (*show) (struct seq_file *m, void *v); +}; + +int seq_open(struct file *, struct seq_operations *); +ssize_t seq_read(struct file *, char *, size_t, loff_t *); +loff_t seq_lseek(struct file *, loff_t, int); +int seq_release(struct inode *, struct file *); +int seq_escape(struct seq_file *, const char *, const char *); + +static inline int seq_putc(struct seq_file *m, char c) +{ + if (m->count < m->size) { + m->buf[m->count++] = c; + return 0; + } + return -1; +} + +static inline int seq_puts(struct seq_file *m, const char *s) +{ + int len = strlen(s); + if (m->count + len < m->size) { + memcpy(m->buf + m->count, s, len); + m->count += len; + return 0; + } + m->count = m->size; + return -1; +} + +int seq_printf(struct seq_file *, const char *, ...) + __attribute__ ((format (printf,2,3))); + +#endif +#endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index de85345183a7..938560387354 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -62,7 +62,8 @@ enum CTL_DEBUG=6, /* Debugging */ CTL_DEV=7, /* Devices */ CTL_BUS=8, /* Busses */ - CTL_ABI=9 /* Binary emulation */ + CTL_ABI=9, /* Binary emulation */ + CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ }; /* CTL_BUS names: */ diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index e8c8e946397b..cb474cee9941 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -77,7 +77,6 @@ static struct exec_domain * lookup_exec_domain(u_long personality) { struct exec_domain * ep; - char buffer[30]; u_long pers = personality(personality); read_lock(&exec_domains_lock); @@ -89,8 +88,11 @@ lookup_exec_domain(u_long personality) #ifdef CONFIG_KMOD read_unlock(&exec_domains_lock); - sprintf(buffer, "personality-%ld", pers); - request_module(buffer); + { + char buffer[30]; + sprintf(buffer, "personality-%ld", pers); + request_module(buffer); + } read_lock(&exec_domains_lock); for (ep = exec_domains; ep; ep = ep->next) { diff --git a/kernel/module.c b/kernel/module.c index e246f49d0818..34bb9edaa2b3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/kmod.h> +#include <linux/seq_file.h> /* * Originally by Anonymous (as far as I know...) @@ -1156,52 +1157,84 @@ fini: * Called by the /proc file system to return a current list of ksyms. */ -int -get_ksyms_list(char *buf, char **start, off_t offset, int length) -{ +struct mod_sym { struct module *mod; - char *p = buf; - int len = 0; /* code from net/ipv4/proc.c */ - off_t pos = 0; - off_t begin = 0; + int index; +}; - for (mod = module_list; mod; mod = mod->next) { - unsigned i; - struct module_symbol *sym; +/* iterator */ - if (!MOD_CAN_QUERY(mod)) - continue; +static void *s_start(struct seq_file *m, loff_t *pos) +{ + struct mod_sym *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct module *v; + loff_t n = *pos; - for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) { - p = buf + len; - if (*mod->name) { - len += sprintf(p, "%0*lx %s\t[%s]\n", - (int)(2*sizeof(void*)), - sym->value, sym->name, - mod->name); - } else { - len += sprintf(p, "%0*lx %s\n", - (int)(2*sizeof(void*)), - sym->value, sym->name); - } - pos = begin + len; - if (pos < offset) { - len = 0; - begin = pos; - } - pos = begin + len; - if (pos > offset+length) - goto leave_the_loop; + if (!p) + return ERR_PTR(-ENOMEM); + lock_kernel(); + for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) { + if (n < v->nsyms) { + p->mod = v; + p->index = n; + return p; } } -leave_the_loop: - *start = buf + (offset - begin); - len -= (offset - begin); - if (len > length) - len = length; - return len; + unlock_kernel(); + kfree(p); + return NULL; } +static void *s_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct mod_sym *v = p; + (*pos)++; + if (++v->index >= v->mod->nsyms) { + do { + v->mod = v->mod->next; + if (!v->mod) { + unlock_kernel(); + kfree(p); + return NULL; + } + } while (!v->mod->nsyms); + v->index = 0; + } + return p; +} + +static void s_stop(struct seq_file *m, void *p) +{ + if (p && !IS_ERR(p)) { + unlock_kernel(); + kfree(p); + } +} + +static int s_show(struct seq_file *m, void *p) +{ + struct mod_sym *v = p; + struct module_symbol *sym; + + if (!MOD_CAN_QUERY(v->mod)) + return 0; + sym = &v->mod->syms[v->index]; + if (*v->mod->name) + seq_printf(m, "%0*lx %s\t[%s]\n", (int)(2*sizeof(void*)), + sym->value, sym->name, v->mod->name); + else + seq_printf(m, "%0*lx %s\n", (int)(2*sizeof(void*)), + sym->value, sym->name); + return 0; +} + +struct seq_operations ksyms_op = { + start: s_start, + next: s_next, + stop: s_stop, + show: s_show +}; + #else /* CONFIG_MODULES */ /* Dummy syscalls for people who don't want modules */ diff --git a/kernel/printk.c b/kernel/printk.c index d2d1a1a257da..4505465f2dd7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -535,6 +535,18 @@ void console_print(const char *s) } EXPORT_SYMBOL(console_print); +void console_unblank(void) +{ + struct console *c; + + acquire_console_sem(); + for (c = console_drivers; c != NULL; c = c->next) + if ((c->flags & CON_ENABLED) && c->unblank) + c->unblank(); + release_console_sem(); +} +EXPORT_SYMBOL(console_unblank); + /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to diff --git a/kernel/sched.c b/kernel/sched.c index 250f11d0147f..15e96ada76dd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1250,11 +1250,6 @@ void reparent_to_init(void) SET_LINKS(this_task); /* Set the exit signal to SIGCHLD so we signal init on exit */ - if (this_task->exit_signal != 0) { - printk(KERN_ERR "task `%s' exit_signal %d in " - __FUNCTION__ "\n", - this_task->comm, this_task->exit_signal); - } this_task->exit_signal = SIGCHLD; /* We also take the runqueue_lock while altering task fields diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c index a21f5b2bff13..6489ad787110 100644 --- a/net/ipv4/netfilter/ip_fw_compat.c +++ b/net/ipv4/netfilter/ip_fw_compat.c @@ -78,20 +78,12 @@ fw_in(unsigned int hooknum, { int ret = FW_BLOCK; u_int16_t redirpt; - struct sk_buff *nskb; /* Assume worse case: any hook could change packet */ (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; if ((*pskb)->ip_summed == CHECKSUM_HW) (*pskb)->ip_summed = CHECKSUM_NONE; - /* Firewall rules can alter TOS: raw socket may have clone of - skb: don't disturb it --RR */ - nskb = skb_unshare(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - *pskb = nskb; - switch (hooknum) { case NF_IP_PRE_ROUTING: if (fwops->fw_acct_in) diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 6a973372901c..79d533aae138 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -734,15 +734,6 @@ do_bindings(struct ip_conntrack *ct, synchronize_bh()) can vanish. */ READ_LOCK(&ip_nat_lock); for (i = 0; i < info->num_manips; i++) { - struct sk_buff *nskb; - /* raw socket may have clone of skb: don't disturb it --RR */ - nskb = skb_unshare(*pskb, GFP_ATOMIC); - if (!nskb) { - READ_UNLOCK(&ip_nat_lock); - return NF_DROP; - } - *pskb = nskb; - if (info->manips[i].direction == dir && info->manips[i].hooknum == hooknum) { DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 9219006abe0b..ced0a9dced52 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -48,13 +48,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, u_int16_t tcplen, newtotlen, oldval, newmss; unsigned int i; u_int8_t *opt; - struct sk_buff *nskb; - - /* raw socket may have clone of skb: don't disturb it --RR */ - nskb = skb_unshare(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - *pskb = nskb; tcplen = (*pskb)->len - iph->ihl*4; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 8c335db43e6c..e6061d707ac5 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -19,15 +19,8 @@ target(struct sk_buff **pskb, const struct ipt_tos_target_info *tosinfo = targinfo; if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { - struct sk_buff *nskb; u_int16_t diffs[2]; - /* raw socket may have clone of skb: don't disturb it --RR */ - nskb = skb_unshare(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - *pskb = nskb; - diffs[0] = htons(iph->tos) ^ 0xFFFF; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; diffs[1] = htons(iph->tos); diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 1bb82ed57e23..1571ee4d008d 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -130,6 +130,9 @@ int __init irda_device_init( void) #ifdef CONFIG_SA1100_FIR sa1100_irda_init(); #endif +#ifdef CONFIG_SA1100_FIR + sa1100_irda_init(); +#endif #ifdef CONFIG_NSC_FIR nsc_ircc_init(); #endif @@ -160,6 +163,9 @@ int __init irda_device_init( void) #ifdef CONFIG_EP7211_IR ep7211_ir_init(); #endif +#ifdef CONFIG_EP7211_IR + ep7211_ir_init(); +#endif return 0; } |
