/* -*- C++ -*- */

/*

  Heap Layers: An Extensible Memory Allocation Infrastructure
  
  Copyright (C) 2000-2004 by Emery Berger
  http://www.cs.umass.edu/~emery
  emery@cs.umass.edu
  
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/

#ifndef _SPINLOCK_H_
#define _SPINLOCK_H_

#if defined(unix)
#include <sched.h>
#endif

#if defined(__SVR4)
#include <thread.h>
#endif

#if defined(__sgi)
#include <mutex.h>
#endif

#include "hldefines.h"
#include "cpuinfo.h"

#if defined(_MSC_VER)

#if !defined(NO_INLINE)
#pragma inline_depth(255)
#define NO_INLINE __declspec(noinline)
#define INLINE __forceinline
#define inline __forceinline
#endif

#else

#endif


#if defined(__sparc) && !defined(__GNUC__)
extern "C" unsigned long MyInterlockedExchange (unsigned long * oldval,
					      unsigned long newval);
#endif

#if defined(_WIN32)
#define _WIN32_WINNT 0x0500

// NOTE: Below is the new "pause" instruction, which is inocuous for
// previous architectures, but crucial for Intel chips with
// hyperthreading.  See
// http://www.usenix.org/events/wiess02/tech/full_papers/nakajima/nakajima.pdf
// for discussion.

#define _MM_PAUSE {__asm{_emit 0xf3};__asm {_emit 0x90}}
#include <windows.h>
#else
#define _MM_PAUSE
#endif

extern volatile int anyThreadCreated;

namespace HL {

class SpinLockType {
public:
  
  SpinLockType (void)
    : mutex (UNLOCKED)
  {}
  
  ~SpinLockType (void)
  {}

  inline void lock (void) {
    // A yielding lock (with an initial spin).
    if (anyThreadCreated) {
      if (MyInterlockedExchange (const_cast<unsigned long *>(&mutex), LOCKED)
	  != UNLOCKED) {
	contendedLock();
      }
    } else {
      mutex = LOCKED;
    }
  }

 
  inline void unlock (void) {
    if (anyThreadCreated) {
#if 1
#if defined(_WIN32)
      __asm {}
#elif !defined(sparc)
      asm volatile ("" : : : "memory");
#endif 
#endif
      // SFENCE here?
      // MyInterlockedExchange (const_cast<unsigned long *>(&mutex), UNLOCKED);
    }
    mutex = UNLOCKED;
  }


#if !defined(__sparc) || defined(__GNUC__)
  inline static volatile unsigned long MyInterlockedExchange (unsigned long *,unsigned long); 
#endif

private:

  NO_INLINE
  void contendedLock (void) {
    int spinCount = 1;
    do {
      if (MyInterlockedExchange (const_cast<unsigned long *>(&mutex), LOCKED)
	  == UNLOCKED) {
	// We got the lock.
	return;
      }
      _MM_PAUSE;
      // Exponential back-off protocol.
      for (volatile int q = 0; q < spinCount; q++) {
      }
      spinCount <<= 1;
      if (spinCount > MAX_SPIN_LIMIT) {
	yieldProcessor();
	spinCount = 1;
      }
    } while (1);
  }

  // Is this system a multiprocessor?
  inline bool onMultiprocessor (void) {
    CPUInfo cpuInfo;
    return (cpuInfo.getNumProcessors() > 1);
  }

  inline void yieldProcessor (void) {
#if defined(_WIN32)
    Sleep(0);
#else
#if defined(__SVR4)
    thr_yield();
#else
    sched_yield();
#endif
#endif
  }

  enum { UNLOCKED = 0, LOCKED = 1 };
  
  enum { MAX_SPIN_LIMIT = 1024 };

  union {
    double _dummy;
    volatile unsigned long mutex;
  };

};

}

// Atomically:
//   retval = *oldval;
//   *oldval = newval;
//   return retval;

#if !defined(__sparc) || defined(__GNUC__)

inline volatile unsigned long 
HL::SpinLockType::MyInterlockedExchange (unsigned long * oldval,
				   unsigned long newval)
{
#if defined(_WIN32) && defined(_MSC_VER)
  
  return InterlockedExchange ((volatile LONG *) oldval, newval);
#if 0
  __asm
    {
      mov ecx, oldval
      mov eax, newval
      lock xchg dword ptr[ecx], eax
    }
#endif

#elif defined(__sparc)
  asm volatile ("swap [%1],%0"
		:"=r" (newval)
		:"r" (oldval), "0" (newval)
		: "memory");
  
#elif defined(__i386__)
  asm volatile ("lock; xchgl %0, %1"
		: "=r" (newval)
		: "m" (*oldval), "0" (newval)
		: "memory");
#elif defined(__sgi)
  newval = test_and_set (oldval, newval);
#elif defined(__ppc)
  // Contributed by Maged Michael.
  int ret; 
  asm volatile ( 
		"La..%=0:    lwarx %0,0,%1 ;" 
		"      cmpw  %0,%2;" 
		"      beq La..%=1;" 
		"      stwcx. %2,0,%1;" 
		"      bne- La..%=0;" 
		"La..%=1:    isync;" 
                : "=&r"(ret) 
                : "r"(oldval), "r"(newval) 
                : "cr0", "memory"); 
  return ret;
#else
#error "No spin lock implementation is available for this platform."
#endif
  return newval;
}

#endif


#endif
