diff -Naur linux/arch/i386/config.in linux.numa/arch/i386/config.in --- linux/arch/i386/config.in Fri Apr 20 20:26:15 2001 +++ linux.numa/arch/i386/config.in Wed Aug 1 17:45:03 2001 @@ -169,6 +169,9 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +if [ "$CONFIG_SMP" = "y" ]; then + bool 'NUMA Aware Spinlocks' CONFIG_NUMA_LOCKS +fi if [ "$CONFIG_SMP" != "y" ]; then bool 'APIC and IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then diff -Naur linux/arch/i386/kernel/Makefile linux.numa/arch/i386/kernel/Makefile --- linux/arch/i386/kernel/Makefile Fri Dec 29 17:35:47 2000 +++ linux.numa/arch/i386/kernel/Makefile Wed Aug 1 17:45:03 2001 @@ -37,6 +37,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o +obj-$(CONFIG_NUMA_LOCKS) += numalock.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o diff -Naur linux/arch/i386/kernel/numalock.c linux.numa/arch/i386/kernel/numalock.c --- linux/arch/i386/kernel/numalock.c Wed Dec 31 18:00:00 1969 +++ linux.numa/arch/i386/kernel/numalock.c Wed Aug 1 18:14:20 2001 @@ -0,0 +1,272 @@ +/* + * IBM NUMA Locking routines. + * + * (c) 2001 Jack F. Vogel (jfv@us.ibm.com), + * Swaminathan Sivasubramanian (ssivasu@us.ibm.com) + * + * This code is released under the GNU General Public License + * version 2 or later. + */ + +#include + +#include +#include +#include +#include +#include +#include + +/* +** The per-cpu-structure for spinning +*/ + +union numaspins numa_spins[32] __cacheline_aligned; + +#define numa_wake(cpu,level) numa_spins[(cpu)].wakeup[(level)] + + +/* +** Atomic exclusive add, returns the old value +*/ +ulong +_xadd(volatile ulong *ptr, ulong const val) +{ + ulong ret; + + __asm__ __volatile__( + "lock; xaddl %%eax, (%%ecx) ;\n" + : "=m" (ptr), "=a" (ret) + : "c" (ptr), "a" (val) + : "memory"); + + return ret; +} + +/* +** This is the function called by the numa_lock(), which finds the next +** empty wakeup pointer and returns the index to array of global per-cpu +** wakeup pointers +*/ + +int find_next_wakeup(int cpu) +{ + int i=0; + + for (;ilock & ~(mask); + if (fwdcpus != 0) { + index = ffs(fwdcpus) - 1; + } else { + backcpus = numa->lock & mask; + if (!backcpus) return 0; + index = ffs(backcpus) - 1; + } + + /* + ** If level is -1, imples we are in the middle of a situation + ** wherein a CPU that wants a lock had set its lock bit but + ** had not set its wakeup pointer yet + */ + + level = find_wakeup(numa,index); + + if (level == -1) return 0; + + if (cmpxchg((ulong *)&numa_wake(index,level), (ulong)numa, 0U)) + return 1U; + + return 0U; +} + +/* +** A one shot attempt to get the spinlock +*/ +int numa_trylock(numalock_t *numa) +{ + register ulong mask = 1U<lock, 0U, mask)) + return 1U; + + return 0U; +} + +/* +** The traditional spin to you get it... +** this version spins with interrupts enabled. +*/ +void numa_lock(numalock_t *numa) +{ + ulong cpu = smp_processor_id(); + ulong flags, mask = 1U<lock & mask) BUG(); + + if (_xadd((ulong *)&numa->lock, mask) == 0) { + // We have the lock + local_irq_restore(flags); + return; + } else { + i = find_next_wakeup(cpu); + if (i == -1) BUG(); + spin = &numa_wake(cpu,i); + + *spin = numa; + local_irq_restore(flags); + + for (;;){ + if (!*spin) { + // We have the lock... + return; + } + } + } +} + +void numa_unlock(numalock_t *numa) +{ + register ulong flags, wanted, mask = 1U<lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit(smp_processor_id(), (ulong *)&numa->lock); + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, + mask, 0) == mask){ + return; + } + } + } +} + +/* +** This pair of routines +** are called by macros in +** numalock.h manipulating interrupts. +*/ +void __numa_lock(numalock_t *numa) +{ + ulong cpu = smp_processor_id(); + ulong mask = 1U<lock, mask) == 0) { + // We have the lock + return; + } else { + i = find_next_wakeup(cpu); + if ( i == -1 ) BUG(); + spin = &numa_wake(cpu,i); + + *spin = numa; + /* + ** Here is the spin, when we exit the loop we + ** own the lock. We spin with interrupts enabled. + */ + for (;;){ + if (!*spin) { + // We have the lock... + return; + } + } + } +} + + +void __numa_unlock(numalock_t *numa) +{ + register ulong wanted, mask = 1U<lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit(smp_processor_id(), (ulong *)&numa->lock); + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, + mask, 0) == mask){ + return; + } + } + } +} + diff -Naur linux/arch/ia64/config.in linux.numa/arch/ia64/config.in --- linux/arch/ia64/config.in Tue Apr 17 19:19:24 2001 +++ linux.numa/arch/ia64/config.in Wed Aug 1 17:51:28 2001 @@ -114,6 +114,10 @@ tristate '/proc/pal support' CONFIG_IA64_PALINFO tristate '/proc/efi support' CONFIG_IA64_EFIVARS +if [ "$CONFIG_SMP" = "y" ]; then + bool 'NUMA Aware Spinlocks' CONFIG_NUMA_LOCKS +fi + bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT diff -Naur linux/arch/ia64/kernel/Makefile linux.numa/arch/ia64/kernel/Makefile --- linux/arch/ia64/kernel/Makefile Thu Apr 5 14:51:47 2001 +++ linux.numa/arch/ia64/kernel/Makefile Wed Aug 1 17:52:03 2001 @@ -14,7 +14,7 @@ export-objs := ia64_ksyms.o obj-y := acpi.o entry.o gate.o efi.o efi_stub.o ia64_ksyms.o irq.o irq_ia64.o irq_sapic.o ivt.o \ - machvec.o pal.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ + machvec.o numalock.o pal.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o obj-$(CONFIG_IA64_GENERIC) += machvec.o iosapic.o obj-$(CONFIG_IA64_DIG) += iosapic.o diff -Naur linux/arch/ia64/kernel/numalock.c linux.numa/arch/ia64/kernel/numalock.c --- linux/arch/ia64/kernel/numalock.c Wed Dec 31 18:00:00 1969 +++ linux.numa/arch/ia64/kernel/numalock.c Wed Aug 1 18:14:42 2001 @@ -0,0 +1,287 @@ +/* + * IBM NUMA Locking routines. + * + * (c) 2001 Jack F. Vogel (jfv@us.ibm.com), + * Swaminathan Sivasubramanian (ssivasu@us.ibm.com) + * + * This code is released under the GNU General Public License + * version 2 or later. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +/* +** The per-cpu-structure for spinning +*/ + +union numaspins numa_spins[32] __cacheline_aligned; + +#define numa_wake(cpu,level) numa_spins[(cpu)].wakeup[(level)] + +/* +** The 64-bit clear bit manipulation routine +*/ + +void clear_bit_64 (ulong nr, volatile ulong *addr) +{ + __u64 mask, old, new; + + mask = ~( 1UL << nr ); + do { + old = *addr; + new = old & mask; + } while (cmpxchg_acq(addr, old, new) != old); +} + +/* +** Atomic exclusive add, returns the old value +*/ + +ulong _xadd(volatile ulong *addr, ulong const val) +{ + __u64 mask, old, new; + + do { + old = *addr; + new = old + val; + } while (cmpxchg_acq(addr, old, new) != old); + return old; +} + +/* +** This is the function called by the numa_lock(), which finds the next +** empty wakeup pointer and returns the index to array of global per-cpu +** wakeup pointers +*/ + +int find_next_wakeup(int cpu) +{ + int i=0; + + for(;ilock & ~(mask); + if (fwdcpus != 0) { + index = ffs(fwdcpus) - 1; + } else { + backcpus = numa->lock & mask; + if (!backcpus) return 0; + index = ffs(backcpus) - 1; + } + + /* + ** If level is -1, imples we are in the middle of a situation + ** wherein a CPU that wants a lock had set its lock bit but + ** had not set its wakeup pointer yet + */ + + level = find_wakeup(numa,index); + + if ( level == -1 ) return 0; + + if (cmpxchg((ulong *)&numa_wake(index,level), (ulong)numa, 0U)) + return 1U; + + return 0U; +} + +/* +** A one shot attempt to get the spinlock +*/ + +int numa_trylock(numalock_t *numa) +{ + register ulong mask = 1U<lock, 0U, mask)) + return 1U; + + return 0U; +} + +/* +** The traditional spin to you get it... +** this version spins with interrupts enabled. +*/ + +void numa_lock(numalock_t *numa) +{ + ulong cpu = smp_processor_id(); + ulong flags, mask = 1U<lock & mask) BUG(); + + if (_xadd((ulong *)&numa->lock, mask) == 0) { + // We have the lock + local_irq_restore(flags); + return; + } else { + i = find_next_wakeup(cpu); + if(i == -1) BUG(); + spin = &numa_wake(cpu,i); + + *spin = numa; + local_irq_restore(flags); + + for (;;){ + if (!*spin) { + // We have the lock... + return; + } + } + } +} + +void numa_unlock(numalock_t *numa) +{ + register ulong flags, wanted, mask = 1U<lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit_64(smp_processor_id(), (ulong *)&numa->lock); + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, + mask, 0) == mask){ + return; + } + } + } +} + +/* +** This pair of routines +** are called by macros in +** numalock.h manipulating interrupts. +*/ + +void __numa_lock(numalock_t *numa) +{ + ulong cpu = smp_processor_id(); + ulong mask = 1U<lock, mask) == 0) { + // We have the lock + return; + } else { + i = find_next_wakeup(cpu); + if ( i == -1 ) BUG(); + spin = &numa_wake(cpu,i); + + *spin = numa; + /* + ** Here is the spin, when we exit the loop we + ** own the lock. We spin with interrupts enabled. + */ + for (;;){ + if (!*spin) { + // We have the lock... + return; + } + } + } +} + + +void __numa_unlock(numalock_t *numa) +{ + register ulong wanted, mask = 1U<lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit_64(smp_processor_id(), (ulong *)&numa->lock); + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, + mask, 0) == mask){ + return; + } + } + } +} + diff -Naur linux/include/asm-i386/numalock.h linux.numa/include/asm-i386/numalock.h --- linux/include/asm-i386/numalock.h Wed Dec 31 18:00:00 1969 +++ linux.numa/include/asm-i386/numalock.h Wed Aug 1 17:49:04 2001 @@ -0,0 +1,49 @@ +/* + * IBM Numa Aware Locking Primitives + * + * + */ + +#ifndef _NUMA_LOCKS_H +#define _NUMA_LOCKS_H + +#define MAX_INT_LEVELS 5 + +struct numalock_t { + unsigned long lock; +}; + +typedef struct numalock_t numalock_t; + +union numaspins { + numalock_t *volatile wakeup[MAX_INT_LEVELS]; + char __pad[SMP_CACHE_BYTES]; +} ; + +extern union numaspins numa_spins[]; + +uint nspin[32]; //Interrupt spin counters + +#define NUMA_LOCK_UNLOCKED (numalock_t) {0} +#define numa_lock_init(x) do { *(x) = NUMA_LOCK_UNLOCKED; } while(0) +#define numa_is_locked(x) (*(volatile char *)(&(x)->lock) != 0) +#define numa_unlock_wait(x) do { barrier(); } while(numa_is_locked(x)) + +#define numa_lock_irq(x) \ + do { local_irq_disable(); __numa_lock(x); } while(0) +#define numa_unlock_irq(x) \ + do { __numa_unlock(x); local_irq_enable(); } while(0) +#define numa_lock_irqsave(x,flags) \ + do { local_irq_save(flags); __numa_lock(x); } while(0) +#define numa_unlock_irqrestore(x,flags) \ + do { __numa_unlock(x); local_irq_restore(flags); } while(0) + +extern ulong _xadd(volatile ulong *ptr, ulong const val); +extern int numa_wakeup(volatile numalock_t *numa, ulong wanted); +extern int numa_trylock(numalock_t *numa); +extern void numa_lock(numalock_t *numa); +extern void numa_unlock(numalock_t *numa); +extern void __numa_lock(numalock_t *numa); +extern void __numa_unlock(numalock_t *numa); + +#endif diff -Naur linux/include/asm-ia64/numalock.h linux.numa/include/asm-ia64/numalock.h --- linux/include/asm-ia64/numalock.h Wed Dec 31 18:00:00 1969 +++ linux.numa/include/asm-ia64/numalock.h Wed Aug 1 17:52:28 2001 @@ -0,0 +1,48 @@ +/* + * IBM Numa Aware Locking Primitives + * + * + */ + +#ifndef _NUMA_LOCKS_H +#define _NUMA_LOCKS_H + +#define MAX_INT_LEVELS 5 + +struct numalock_t { + unsigned long lock; +}; + +typedef struct numalock_t numalock_t; + +union numaspins{ + numalock_t *volatile wakeup[MAX_INT_LEVELS]; + char __pad[SMP_CACHE_BYTES]; +}; + +uint nspin[32]; //Interrupt spin counters + +#define NUMA_LOCK_UNLOCKED (numalock_t) {0} +#define numa_lock_init(x) do { *(x) = NUMA_LOCK_UNLOCKED; } while(0) +#define numa_is_locked(x) (*(volatile char *)(&(x)->lock) != 0) +#define numa_unlock_wait(x) do { barrier(); } while(numa_is_locked(x)) + +#define numa_lock_irq(x) \ + do { local_irq_disable(); __numa_lock(x); } while(0) +#define numa_unlock_irq(x) \ + do { __numa_unlock(x); local_irq_enable(); } while(0) +#define numa_lock_irqsave(x,flags) \ + do { local_irq_save(flags); __numa_lock(x); } while(0) +#define numa_unlock_irqrestore(x,flags) \ + do { __numa_unlock(x); local_irq_restore(flags); } while(0) + +extern ulong _xadd(volatile ulong *ptr, ulong const val); +extern void myclear_bit(ulong, volatile ulong *ptr); +extern int numa_wakeup(volatile numalock_t *numa, ulong wanted); +extern int numa_trylock(numalock_t *numa); +extern void numa_lock(numalock_t *numa); +extern void numa_unlock(numalock_t *numa); +extern void __numa_lock(numalock_t *numa); +extern void __numa_unlock(numalock_t *numa); + +#endif