diff -Naur linux.gen/arch/i386/config.in linux.numa/arch/i386/config.in --- linux.gen/arch/i386/config.in Wed Jun 20 17:47:39 2001 +++ linux.numa/arch/i386/config.in Mon Jul 16 13:41:06 2001 @@ -169,6 +169,9 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +if [ "$CONFIG_SMP" = "y" ]; then + bool 'NUMA Aware Spinlocks' CONFIG_NUMA_LOCKS +fi if [ "$CONFIG_SMP" != "y" ]; then bool 'APIC and IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then diff -Naur linux.gen/arch/i386/kernel/Makefile linux.numa/arch/i386/kernel/Makefile --- linux.gen/arch/i386/kernel/Makefile Fri Dec 29 14:35:47 2000 +++ linux.numa/arch/i386/kernel/Makefile Mon Jul 16 13:41:51 2001 @@ -37,6 +37,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o +obj-$(CONFIG_NUMA_LOCKS) += numalock.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o diff -Naur linux.gen/arch/i386/kernel/irq.c linux.numa/arch/i386/kernel/irq.c --- linux.gen/arch/i386/kernel/irq.c Wed Jun 20 11:06:38 2001 +++ linux.numa/arch/i386/kernel/irq.c Mon Jul 16 13:45:55 2001 @@ -575,6 +575,9 @@ struct irqaction * action; unsigned int status; +#if defined(CONFIG_NUMA_LOCKS) + numa_unspin(); +#endif kstat.irqs[cpu][irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); @@ -636,6 +639,9 @@ if (softirq_pending(cpu)) do_softirq(); +#if defined(CONFIG_NUMA_LOCKS) + numa_respin(); +#endif return 1; } diff -Naur linux.gen/arch/i386/kernel/numalock.c linux.numa/arch/i386/kernel/numalock.c --- linux.gen/arch/i386/kernel/numalock.c Wed Dec 31 16:00:00 1969 +++ linux.numa/arch/i386/kernel/numalock.c Mon Jul 16 16:46:46 2001 @@ -0,0 +1,335 @@ +/* + * IBM NUMA Locking routines. + * + * (c) 2001 Jack F. Vogel (jfv@us.ibm.com), + * Swaminathan Sivasubramanian (ssivasu@us.ibm.com) + * + * This code is released under the GNU General Public License + * version 2 or later. + */ + +#include + +#include +#include +#include +#include +#include +#include + +/* +** The per-cpu-structure for spinning +*/ + +static union { + struct numa_spin{ + spinlock_t *volatile wakeup; + spinlock_t *volatile irq[3]; + }numa_spin; + char __pad[SMP_CACHE_BYTES]; +} numa_spins [32] __cacheline_aligned; + +#define numa_wake(index) numa_spins[(index)].numa_spin.wakeup +#define numa_irq(index,level) numa_spins[(index)].numa_spin.irq[(level)] + + +/* +** Atomic exclusive add, returns the old value +*/ +ulong +_xadd(volatile ulong *ptr, ulong const val) +{ + ulong ret; + + __asm__ __volatile__( + "lock; xaddl %%eax, (%%ecx) ;\n" + : "=m" (ptr), "=a" (ret) + : "c" (ptr), "a" (val) + : "memory"); + + return ret; +} + +/* +** numa_wakeup - this is the logic to decide upon +** release of the spinlock who to pass +** off to. +*/ + +int numa_wakeup(volatile spinlock_t *numa, ulong wanted) +{ + register ulong mask, fwdcpus,backcpus; + int index; + + /* + ** For the time being the selection logic is + ** fairly simple minded, basically round robin. + ** To make the code NUMA fair will simply + ** be a change to the selection logic here. + */ + if (!wanted) return 0; + + mask = (1 << (1 + smp_processor_id())) - 1; + fwdcpus = numa->lock & ~(mask); + if (fwdcpus != 0) { + index = ffs(fwdcpus) - 1; + } else { + backcpus = numa->lock & mask; + if (!backcpus) return 0; + index = ffs(backcpus) - 1; + } + + if (cmpxchg((ulong *)&numa_wake(index), (ulong)numa, 0U)) + return 1U; + + return 0U; // shouldnt happen +} + +/* +** A one shot attempt to get the spinlock +*/ +int numa_trylock(spinlock_t *numa) +{ + register ulong mask = 1U<lock, 0U, mask)) + return 1U; + + return 0U; +} + +/* +** The traditional spin to you get it... +** this version spins with interrupts enabled. +*/ +void numa_lock(spinlock_t *numa) +{ + ulong cpu = smp_processor_id(); + ulong flags, mask = 1U<lock & mask) BUG(); + + if (_xadd((ulong *)&numa->lock, mask) == 0) { + // We have the lock + local_irq_restore(flags); + return; + } else { + ulong spincnt = 0; + spin = &numa_wake(cpu); + + // Save the lock address for this nesting... + irqsave = &numa_irq(cpu,nspin[cpu]); + + *spin = *irqsave = numa; + + /* + ** Here is the spin, when we exit the loop we + ** own the lock. We spin with interrupts enabled. + */ + local_irq_restore(flags); + for (;;){ + local_irq_save(flags); + if (!*spin) { + // We have the lock... + *irqsave = 0U; + local_irq_restore(flags); + return; + } + local_irq_restore(flags); + ++spincnt; + } + } +} + +void numa_unlock(spinlock_t *numa) +{ + register ulong flags, wanted, mask = 1U<lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit(smp_processor_id(), (ulong *)&numa->lock); + local_irq_restore(flags); + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, + mask, 0) == mask){ + local_irq_restore(flags); + return; + } + } + } +} + +/* +** Test to see if we hold a lock, +** this assumes we are not in the +** middle of a spin for the lock. +*/ +int numa_held(spinlock_t *numa) +{ + register ulong mask = 1U<lock & mask) != 0); +} + +/* +** This routine is called in do_IRQ() +** Called with interrupts disabled. +*/ +void +numa_unspin(void) +{ + spinlock_t *volatile *saved; + spinlock_t volatile *numa; + register ulong cpu = smp_processor_id(); + register ulong mask = 1U << cpu; + ulong wanted; + + saved = &numa_irq(cpu,nspin[cpu]); // saved lock address + if (*saved) { // Were we spinning for lock? + /* Clear ourselves from being a spin candidate */ + numa = *saved; + if (cmpxchg((ulong *)&numa_wake(cpu), numa, 0U)) { + // Disabled from being woken up, clear out bit + clear_bit(cpu, (ulong *)&numa->lock); + ++nspin[cpu]; + return; + } + /* + ** Apparently another process granted us the lock + ** but we dont want it now, unlock it.. This code + ** reproduces numa_unlock, we cant just call it because + ** we do not want interrupts enabled... + */ + for (;;) { + wanted = numa->lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit(cpu, (ulong *)&numa->lock); + ++nspin[cpu]; + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, mask, + 0) == mask){ + ++nspin[cpu]; + return; + } + } + // go round again... + } + } else + ++nspin[cpu]; +} + +/* +** Called at the return in do_IRQ here +** we reset our spinning state... +*/ +void +numa_respin(void) +{ + register ulong cpu = smp_processor_id(); + register ulong mask = 1U << cpu; + spinlock_t *volatile *save; + spinlock_t *volatile *wake; + + if (--nspin[cpu] < 0) + BUG(); + save = &numa_irq(cpu,nspin[cpu]); + + if(*save == 0U) + return; + + wake = &numa_wake(cpu); + *wake = *save; + if (_xadd((ulong *)*wake, mask) == 0) { + // We have the lock, null the wakeup pointer + *wake = *save = 0U; + return; + } + // We will return to the spin normally +} + +/* +** This pair of routines +** are called by macros in +** numalock.h manipulating interrupts. +*/ +void __numa_lock(spinlock_t *numa) +{ + ulong cpu = smp_processor_id(); + ulong mask = 1U<lock, mask) == 0) { + // We have the lock + return; + } else { + ulong spincnt = 0; + spin = &numa_wake(cpu); + + *spin = numa; + /* + ** Here is the spin, when we exit the loop we + ** own the lock. We spin with interrupts enabled. + */ + for (;;){ + if (!*spin) { + // We have the lock... + return; + } + } + } +} + + +void __numa_unlock(spinlock_t *numa) +{ + register ulong wanted, mask = 1U<lock & ~mask; + if (wanted && numa_wakeup(numa, wanted)) { + /* + ** We found a new owner, drop + ** our bit from the lock. + */ + clear_bit(smp_processor_id(), (ulong *)&numa->lock); + return; + } else if (!wanted) { + /* + ** No one waiting, attempt to zero it. + ** Could have a race and fail but then + ** we just go round again... + */ + if (cmpxchg((ulong *)&numa->lock, + mask, 0) == mask){ + return; + } + } + } +} + diff -Naur linux.gen/include/asm-i386/numalock.h linux.numa/include/asm-i386/numalock.h --- linux.gen/include/asm-i386/numalock.h Wed Dec 31 16:00:00 1969 +++ linux.numa/include/asm-i386/numalock.h Mon Jul 16 16:03:46 2001 @@ -0,0 +1,37 @@ +/* + * IBM Numa Aware Locking Primitives + * + * + */ + +#ifndef _NUMA_LOCKS_H +#define _NUMA_LOCKS_H + +uint nspin[32]; //Interrupt spin counters + +#define NUMA_LOCK_UNLOCKED (spinlock_t) {0} +#define numa_lock_init(x) do { *(x) = NUMA_LOCK_UNLOCKED; } while(0) +#define numa_is_locked(x) (*(volatile char *)(&(x)->lock) != 0) +#define numa_unlock_wait(x) do { barrier(); } while(numa_is_locked(x)) + +#define numa_lock_irq(x) \ + do { local_irq_disable(); __numa_lock(x); } while(0) +#define numa_unlock_irq(x) \ + do { __numa_unlock(x); local_irq_enable(); } while(0) +#define numa_lock_irqsave(x,flags) \ + do { local_irq_save(flags); __numa_lock(x); } while(0) +#define numa_unlock_irqrestore(x,flags) \ + do { __numa_unlock(x); local_irq_restore(flags); } while(0) + +extern ulong _xadd(volatile ulong *ptr, ulong const val); +extern int numa_wakeup(volatile spinlock_t *numa, ulong wanted); +extern int numa_trylock(spinlock_t *numa); +extern void numa_lock(spinlock_t *numa); +extern void numa_unlock(spinlock_t *numa); +extern void __numa_lock(spinlock_t *numa); +extern void __numa_unlock(spinlock_t *numa); +extern int numa_held(spinlock_t *numa); +extern void numa_unspin(void); +extern void numa_respin(void); + +#endif diff -Naur linux.gen/include/asm-i386/smplock.h linux.numa/include/asm-i386/smplock.h --- linux.gen/include/asm-i386/smplock.h Tue Jul 3 15:42:55 2001 +++ linux.numa/include/asm-i386/smplock.h Mon Jul 16 13:48:02 2001 @@ -8,6 +8,10 @@ #include #include +#if defined (CONFIG_NUMA_LOCKS) +#include +#endif + extern spinlock_t kernel_flag; #define kernel_locked() spin_is_locked(&kernel_flag)