diff -Naur linux.gen/arch/i386/config.in linux.numa/arch/i386/config.in
--- linux.gen/arch/i386/config.in	Wed Jun 20 17:47:39 2001
+++ linux.numa/arch/i386/config.in	Mon Jul 16 13:41:06 2001
@@ -169,6 +169,9 @@
 bool 'Math emulation' CONFIG_MATH_EMULATION
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
+if [ "$CONFIG_SMP" = "y" ]; then
+   bool 'NUMA Aware Spinlocks' CONFIG_NUMA_LOCKS
+fi
 if [ "$CONFIG_SMP" != "y" ]; then
    bool 'APIC and IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC
    if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then
diff -Naur linux.gen/arch/i386/kernel/Makefile linux.numa/arch/i386/kernel/Makefile
--- linux.gen/arch/i386/kernel/Makefile	Fri Dec 29 14:35:47 2000
+++ linux.numa/arch/i386/kernel/Makefile	Mon Jul 16 13:41:51 2001
@@ -37,6 +37,7 @@
 obj-$(CONFIG_MICROCODE)	+= microcode.o
 obj-$(CONFIG_APM)	+= apm.o
 obj-$(CONFIG_SMP)	+= smp.o smpboot.o trampoline.o
+obj-$(CONFIG_NUMA_LOCKS)	+= numalock.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o mpparse.o
 obj-$(CONFIG_X86_VISWS_APIC)	+= visws_apic.o
diff -Naur linux.gen/arch/i386/kernel/irq.c linux.numa/arch/i386/kernel/irq.c
--- linux.gen/arch/i386/kernel/irq.c	Wed Jun 20 11:06:38 2001
+++ linux.numa/arch/i386/kernel/irq.c	Mon Jul 16 13:45:55 2001
@@ -575,6 +575,9 @@
 	struct irqaction * action;
 	unsigned int status;
 
+#if defined(CONFIG_NUMA_LOCKS)
+	numa_unspin();
+#endif
 	kstat.irqs[cpu][irq]++;
 	spin_lock(&desc->lock);
 	desc->handler->ack(irq);
@@ -636,6 +639,9 @@
 
 	if (softirq_pending(cpu))
 		do_softirq();
+#if defined(CONFIG_NUMA_LOCKS)
+	numa_respin();
+#endif
 	return 1;
 }
 
diff -Naur linux.gen/arch/i386/kernel/numalock.c linux.numa/arch/i386/kernel/numalock.c
--- linux.gen/arch/i386/kernel/numalock.c	Wed Dec 31 16:00:00 1969
+++ linux.numa/arch/i386/kernel/numalock.c	Mon Jul 16 16:46:46 2001
@@ -0,0 +1,335 @@
+/*
+ *	IBM NUMA Locking routines.
+ *
+ *	(c) 2001 Jack F. Vogel (jfv@us.ibm.com),
+ *		 Swaminathan Sivasubramanian (ssivasu@us.ibm.com)
+ *
+ *	This code is released under the GNU General Public License
+ *	version 2 or later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/config.h>
+
+/*
+** The per-cpu-structure for spinning
+*/
+
+static union {
+        struct  numa_spin{
+                spinlock_t      *volatile wakeup;
+                spinlock_t      *volatile irq[3];
+        }numa_spin;
+        char __pad[SMP_CACHE_BYTES];
+} numa_spins [32] __cacheline_aligned;
+
+#define numa_wake(index) numa_spins[(index)].numa_spin.wakeup
+#define numa_irq(index,level) numa_spins[(index)].numa_spin.irq[(level)]
+
+
+/*
+** Atomic exclusive add, returns the old value
+*/
+ulong
+_xadd(volatile ulong *ptr, ulong const val)
+{
+	ulong ret;
+
+	__asm__ __volatile__(
+		"lock; xaddl %%eax, (%%ecx) ;\n"
+		: "=m" (ptr), "=a" (ret) 
+		: "c" (ptr), "a" (val) 
+		: "memory");
+
+	return ret;
+}
+
+/*
+** numa_wakeup - this is the logic to decide upon 
+**		release of the spinlock who to pass
+**		off to.
+*/
+
+int numa_wakeup(volatile spinlock_t *numa, ulong wanted)
+{
+	register ulong mask, fwdcpus,backcpus;
+	int index;
+	
+	/*
+	** For the time being the selection logic is
+	** fairly simple minded, basically round robin.
+	** To make the code NUMA fair will simply
+	** be a change to the selection logic here.
+	*/
+	if (!wanted) return 0;
+
+	mask = (1 << (1 + smp_processor_id())) - 1; 
+	fwdcpus = numa->lock & ~(mask);
+	if (fwdcpus != 0) {
+		index = ffs(fwdcpus) - 1;
+	} else {
+		backcpus = numa->lock & mask;
+		if (!backcpus) return 0;
+		index = ffs(backcpus) - 1;
+	}
+	
+	if (cmpxchg((ulong *)&numa_wake(index), (ulong)numa, 0U))
+		return 1U;
+
+	return 0U;	// shouldnt happen 
+}
+
+/*
+** A one shot attempt to get the spinlock
+*/
+int numa_trylock(spinlock_t *numa)
+{
+	register ulong mask = 1U<<smp_processor_id();
+	
+	if (!cmpxchg((ulong *)&numa->lock, 0U, mask))
+		return 1U;
+
+	return 0U;
+}
+
+/*
+**	The traditional spin to you get it...
+**	this version spins with interrupts enabled.
+*/
+void numa_lock(spinlock_t *numa)
+{
+	ulong cpu = smp_processor_id();
+	ulong flags, mask = 1U<<cpu;
+	spinlock_t * volatile *spin;
+	spinlock_t * volatile *irqsave;
+
+	local_irq_save(flags);
+
+	if (numa->lock & mask) BUG();
+	
+	if (_xadd((ulong *)&numa->lock, mask) == 0) {
+		// We have the lock
+		local_irq_restore(flags);
+		return;
+	} else {
+		ulong spincnt = 0;
+		spin = &numa_wake(cpu);
+
+		// Save the lock address for this nesting...
+		irqsave = &numa_irq(cpu,nspin[cpu]);
+
+		*spin = *irqsave = numa;
+
+		/*
+		** Here is the spin, when we exit the loop we
+		** own the lock. We spin with interrupts enabled.
+		*/
+		local_irq_restore(flags);
+		for (;;){
+			local_irq_save(flags);
+			if (!*spin) {
+				// We have the lock...
+				*irqsave = 0U;
+				local_irq_restore(flags);
+				return;
+			}
+			local_irq_restore(flags);
+			++spincnt;
+		}
+	}
+}
+
+void numa_unlock(spinlock_t *numa)
+{
+	register ulong flags, wanted, mask = 1U<<smp_processor_id();
+
+	local_irq_save(flags);
+	for (;;) {
+		wanted = numa->lock & ~mask;
+		if (wanted && numa_wakeup(numa, wanted)) {
+			/*
+			** We found a new owner, drop
+			** our bit from the lock.
+			*/
+			clear_bit(smp_processor_id(), (ulong *)&numa->lock);
+			local_irq_restore(flags);
+			return;
+		} else if (!wanted) {
+			/*
+			** No one waiting, attempt to zero it.
+			** Could have a race and fail but then
+			** we just go round again...
+			*/
+			if (cmpxchg((ulong *)&numa->lock,
+			    mask, 0) == mask){
+				local_irq_restore(flags);
+				return;
+			}
+		} 
+	}	
+}
+
+/*
+** Test to see if we hold a lock, 
+** this assumes we are not in the
+** middle of a spin for the lock.
+*/
+int numa_held(spinlock_t *numa)
+{
+	register ulong mask = 1U<<smp_processor_id();
+
+	return ((numa->lock & mask) != 0);
+}
+
+/*
+** This routine is called in do_IRQ() 
+** Called with interrupts disabled.
+*/
+void
+numa_unspin(void)
+{
+	spinlock_t *volatile *saved;
+	spinlock_t volatile *numa;
+	register ulong cpu = smp_processor_id();
+	register ulong mask = 1U << cpu;
+	ulong wanted;
+
+	saved = &numa_irq(cpu,nspin[cpu]); // saved lock address
+	if (*saved) {	// Were we spinning for lock?
+		/* Clear ourselves from being a spin candidate */
+		numa = *saved;
+		if (cmpxchg((ulong *)&numa_wake(cpu), numa, 0U)) {
+			// Disabled from being woken up, clear out bit
+			clear_bit(cpu, (ulong *)&numa->lock);
+			++nspin[cpu];
+			return;
+		} 
+		/*
+		** Apparently another process granted us the lock
+		** but we dont want it now, unlock it.. This code
+		** reproduces numa_unlock, we cant just call it because
+		** we do not want interrupts enabled...
+		*/
+		for (;;) {
+			wanted = numa->lock & ~mask;
+			if (wanted && numa_wakeup(numa, wanted)) {
+				/*
+				** We found a new owner, drop
+				** our bit from the lock.
+				*/
+				clear_bit(cpu, (ulong *)&numa->lock);
+				++nspin[cpu];
+				return;
+			} else if (!wanted) {
+				/*
+				** No one waiting, attempt to zero it.
+				** Could have a race and fail but then
+				** we just go round again...
+				*/
+				if (cmpxchg((ulong *)&numa->lock, mask,
+				    0) == mask){
+					++nspin[cpu];
+					return;
+				}
+			} 
+			// go round again...
+		}
+	} else
+		++nspin[cpu];
+}
+
+/*
+** Called at the return in do_IRQ here
+** we reset our spinning state...
+*/
+void
+numa_respin(void)
+{
+	register ulong cpu = smp_processor_id();
+	register ulong mask = 1U << cpu;
+	spinlock_t *volatile *save;
+	spinlock_t *volatile *wake;
+
+	if (--nspin[cpu] < 0)
+		BUG();
+	save = &numa_irq(cpu,nspin[cpu]);
+
+	if(*save == 0U)
+		return;
+
+	wake = &numa_wake(cpu);
+	*wake = *save;
+	if (_xadd((ulong *)*wake, mask) == 0) {
+		// We have the lock, null the wakeup pointer
+		*wake = *save = 0U;
+		return;
+	} 
+	// We will return to the spin normally
+}
+
+/*
+**	This pair of routines 
+**	are called by macros in 
+**	numalock.h manipulating interrupts.
+*/
+void __numa_lock(spinlock_t *numa)
+{
+	ulong cpu = smp_processor_id();
+	ulong mask = 1U<<cpu;
+	spinlock_t * volatile *spin;
+
+	if (_xadd((ulong *)&numa->lock, mask) == 0) {
+		// We have the lock
+		return;
+	} else {
+		ulong spincnt = 0;
+		spin = &numa_wake(cpu);
+
+		*spin = numa;
+		/*
+		** Here is the spin, when we exit the loop we
+		** own the lock. We spin with interrupts enabled.
+		*/
+		for (;;){
+			if (!*spin) {
+				// We have the lock...
+				return;
+			}
+		}
+	}
+}
+
+
+void __numa_unlock(spinlock_t *numa)
+{
+	register ulong wanted, mask = 1U<<smp_processor_id();
+
+	for (;;) {
+		wanted = numa->lock & ~mask;
+		if (wanted && numa_wakeup(numa, wanted)) {
+			/*
+			** We found a new owner, drop
+			** our bit from the lock.
+			*/
+			clear_bit(smp_processor_id(), (ulong *)&numa->lock);
+			return;
+		} else if (!wanted) {
+			/*
+			** No one waiting, attempt to zero it.
+			** Could have a race and fail but then
+			** we just go round again...
+			*/
+			if (cmpxchg((ulong *)&numa->lock,
+			    mask, 0) == mask){
+				return;
+			}
+		} 
+	}	
+}
+
diff -Naur linux.gen/include/asm-i386/numalock.h linux.numa/include/asm-i386/numalock.h
--- linux.gen/include/asm-i386/numalock.h	Wed Dec 31 16:00:00 1969
+++ linux.numa/include/asm-i386/numalock.h	Mon Jul 16 16:03:46 2001
@@ -0,0 +1,37 @@
+/*
+ * IBM Numa Aware Locking Primitives
+ * <asm/numalock.h>
+ *
+ */
+
+#ifndef _NUMA_LOCKS_H
+#define _NUMA_LOCKS_H
+
+uint nspin[32];	//Interrupt spin counters
+
+#define NUMA_LOCK_UNLOCKED (spinlock_t) {0}
+#define numa_lock_init(x)	do { *(x) = NUMA_LOCK_UNLOCKED; } while(0)
+#define numa_is_locked(x)	(*(volatile char *)(&(x)->lock) != 0)
+#define numa_unlock_wait(x)	do { barrier(); } while(numa_is_locked(x))
+
+#define numa_lock_irq(x) \
+	do { local_irq_disable(); __numa_lock(x); } while(0)
+#define numa_unlock_irq(x) \
+	do { __numa_unlock(x); local_irq_enable(); } while(0)
+#define numa_lock_irqsave(x,flags) \
+	do { local_irq_save(flags); __numa_lock(x); } while(0)
+#define numa_unlock_irqrestore(x,flags) \
+	do { __numa_unlock(x); local_irq_restore(flags); } while(0)
+
+extern ulong _xadd(volatile ulong *ptr, ulong const val);
+extern int numa_wakeup(volatile spinlock_t *numa, ulong wanted);
+extern int numa_trylock(spinlock_t *numa);
+extern void numa_lock(spinlock_t *numa);	
+extern void numa_unlock(spinlock_t *numa);
+extern void __numa_lock(spinlock_t *numa);	
+extern void __numa_unlock(spinlock_t *numa);
+extern int numa_held(spinlock_t *numa);
+extern void numa_unspin(void);
+extern void numa_respin(void);
+
+#endif
diff -Naur linux.gen/include/asm-i386/smplock.h linux.numa/include/asm-i386/smplock.h
--- linux.gen/include/asm-i386/smplock.h	Tue Jul  3 15:42:55 2001
+++ linux.numa/include/asm-i386/smplock.h	Mon Jul 16 13:48:02 2001
@@ -8,6 +8,10 @@
 #include <linux/sched.h>
 #include <asm/current.h>
 
+#if defined (CONFIG_NUMA_LOCKS)
+#include <asm/numalock.h>
+#endif
+
 extern spinlock_t kernel_flag;
 
 #define kernel_locked()		spin_is_locked(&kernel_flag)