mirror of
https://github.com/mangosfour/server.git
synced 2025-12-29 16:37:04 +00:00
You should add two libraries into your server binaries: tbb.so/tbbmalloc.so on *nix and tbb(_debug).dll/tbbmalloc(_debug).dll on Windows!!! Define USE_STANDARD_MALLOC while compiling 'framework' project to use OS' default memory allocator! Signed-off-by: Ambal <pogrebniak@gala.net>
2391 lines
82 KiB
C++
2391 lines
82 KiB
C++
/*
|
|
Copyright 2005-2009 Intel Corporation. All Rights Reserved.
|
|
|
|
This file is part of Threading Building Blocks.
|
|
|
|
Threading Building Blocks is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License
|
|
version 2 as published by the Free Software Foundation.
|
|
|
|
Threading Building Blocks is distributed in the hope that it will be
|
|
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
|
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Threading Building Blocks; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
As a special exception, you may use this file as part of a free software
|
|
library without restriction. Specifically, if other files instantiate
|
|
templates or use macros or inline functions from this file, or you compile
|
|
this file and link it with other files to produce an executable, this
|
|
file does not by itself cause the resulting executable to be covered by
|
|
the GNU General Public License. This exception does not however
|
|
invalidate any other reasons why the executable file might be covered by
|
|
the GNU General Public License.
|
|
*/
|
|
|
|
|
|
#include "TypeDefinitions.h" /* Also includes customization layer Customize.h */
|
|
|
|
#if USE_PTHREAD
|
|
// Some pthreads documentation says that <pthreads.h> must be first header.
|
|
#include <pthread.h>
|
|
#define TlsSetValue_func pthread_setspecific
|
|
#define TlsGetValue_func pthread_getspecific
|
|
typedef pthread_key_t tls_key_t;
|
|
#include <sched.h>
|
|
inline void do_yield() {sched_yield();}
|
|
|
|
#elif USE_WINTHREAD
|
|
#define _WIN32_WINNT 0x0400
|
|
#include <windows.h>
|
|
#define TlsSetValue_func TlsSetValue
|
|
#define TlsGetValue_func TlsGetValue
|
|
typedef DWORD tls_key_t;
|
|
inline void do_yield() {SwitchToThread();}
|
|
|
|
#else
|
|
#error Must define USE_PTHREAD or USE_WINTHREAD
|
|
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <stdlib.h>
|
|
#if MALLOC_CHECK_RECURSION
|
|
#include <new> /* for placement new */
|
|
#endif /* MALLOC_CHECK_RECURSION */
|
|
|
|
extern "C" {
|
|
void * scalable_malloc(size_t size);
|
|
void scalable_free(void *object);
|
|
void mallocThreadShutdownNotification(void*);
|
|
}
|
|
|
|
/********* Various compile-time options **************/
|
|
|
|
#define MALLOC_TRACE 0
|
|
|
|
#if MALLOC_TRACE
|
|
#define TRACEF(x) printf x
|
|
#else
|
|
#define TRACEF(x) ((void)0)
|
|
#endif /* MALLOC_TRACE */
|
|
|
|
#define ASSERT_TEXT NULL
|
|
|
|
//! Define the main synchronization method
|
|
/** It should be specified before including LifoQueue.h */
|
|
#define FINE_GRAIN_LOCKS
|
|
#include "LifoQueue.h"
|
|
|
|
#define COLLECT_STATISTICS MALLOC_DEBUG && defined(MALLOCENV_COLLECT_STATISTICS)
|
|
#include "Statistics.h"
|
|
|
|
#define FREELIST_NONBLOCKING 1
|
|
|
|
// If USE_MALLOC_FOR_LARGE_OBJECT is nonzero, then large allocations are done via malloc.
|
|
// Otherwise large allocations are done using the scalable allocator's block allocator.
|
|
// As of 06.Jun.17, using malloc is about 10x faster on Linux.
|
|
#if !_WIN32
|
|
#define USE_MALLOC_FOR_LARGE_OBJECT 1
|
|
#endif
|
|
|
|
/********* End compile-time options **************/
|
|
|
|
namespace rml {
|
|
|
|
namespace internal {
|
|
|
|
/******* A helper class to support overriding malloc with scalable_malloc *******/
|
|
#if MALLOC_CHECK_RECURSION
|
|
|
|
inline bool isMallocInitialized();
|
|
|
|
class RecursiveMallocCallProtector {
|
|
// pointer to an automatic data of holding thread
|
|
static void *autoObjPtr;
|
|
static MallocMutex rmc_mutex;
|
|
static pthread_t owner_thread;
|
|
/* Under FreeBSD 8.0 1st call to any pthread function including pthread_self
|
|
leads to pthread initialization, that causes malloc calls. As 1st usage of
|
|
RecursiveMallocCallProtector can be before pthread initialized, pthread calls
|
|
can't be used in 1st instance of RecursiveMallocCallProtector.
|
|
RecursiveMallocCallProtector is used 1st time in checkInitialization(),
|
|
so there is a guarantee that on 2nd usage pthread is initialized.
|
|
No such situation observed with other supported OSes.
|
|
*/
|
|
#if __FreeBSD__
|
|
static bool canUsePthread;
|
|
#else
|
|
static const bool canUsePthread = true;
|
|
#endif
|
|
/*
|
|
The variable modified in checkInitialization,
|
|
so can be read without memory barriers.
|
|
*/
|
|
static bool mallocRecursionDetected;
|
|
|
|
MallocMutex::scoped_lock* lock_acquired;
|
|
char scoped_lock_space[sizeof(MallocMutex::scoped_lock)+1];
|
|
|
|
static uintptr_t absDiffPtr(void *x, void *y) {
|
|
uintptr_t xi = (uintptr_t)x, yi = (uintptr_t)y;
|
|
return xi > yi ? xi - yi : yi - xi;
|
|
}
|
|
public:
|
|
|
|
RecursiveMallocCallProtector() : lock_acquired(NULL) {
|
|
lock_acquired = new (scoped_lock_space) MallocMutex::scoped_lock( rmc_mutex );
|
|
if (canUsePthread)
|
|
owner_thread = pthread_self();
|
|
autoObjPtr = &scoped_lock_space;
|
|
}
|
|
~RecursiveMallocCallProtector() {
|
|
if (lock_acquired) {
|
|
autoObjPtr = NULL;
|
|
lock_acquired->~scoped_lock();
|
|
}
|
|
}
|
|
static bool sameThreadActive() {
|
|
if (!autoObjPtr) // fast path
|
|
return false;
|
|
// Some thread has an active recursive call protector; check if the current one.
|
|
// Exact pthread_self based test
|
|
if (canUsePthread)
|
|
if (pthread_equal( owner_thread, pthread_self() )) {
|
|
mallocRecursionDetected = true;
|
|
return true;
|
|
} else
|
|
return false;
|
|
// inexact stack size based test
|
|
const uintptr_t threadStackSz = 2*1024*1024;
|
|
int dummy;
|
|
return absDiffPtr(autoObjPtr, &dummy)<threadStackSz;
|
|
}
|
|
static bool noRecursion() {
|
|
MALLOC_ASSERT(isMallocInitialized(),
|
|
"Recursion status can be checked only when initialization was done.");
|
|
return !mallocRecursionDetected;
|
|
}
|
|
/* The function is called on 1st scalable_malloc call to check if malloc calls
|
|
scalable_malloc (nested call must set mallocRecursionDetected). */
|
|
static void detectNaiveOverload() {
|
|
if (!malloc_proxy) {
|
|
#if __FreeBSD__
|
|
/* If !canUsePthread, we can't call pthread_self() before, but now pthread
|
|
is already on, so can do it. False positives here lead to silent switching
|
|
from malloc to mmap for all large allocations with bad performance impact. */
|
|
if (!canUsePthread) {
|
|
canUsePthread = true;
|
|
owner_thread = pthread_self();
|
|
}
|
|
#endif
|
|
free(malloc(1));
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
MallocMutex RecursiveMallocCallProtector::rmc_mutex;
|
|
pthread_t RecursiveMallocCallProtector::owner_thread;
|
|
void *RecursiveMallocCallProtector::autoObjPtr;
|
|
bool RecursiveMallocCallProtector::mallocRecursionDetected;
|
|
#if __FreeBSD__
|
|
bool RecursiveMallocCallProtector::canUsePthread;
|
|
#endif
|
|
|
|
#else
|
|
|
|
class RecursiveMallocCallProtector {
|
|
public:
|
|
RecursiveMallocCallProtector() {}
|
|
~RecursiveMallocCallProtector() {}
|
|
};
|
|
|
|
#endif /* MALLOC_CHECK_RECURSION */
|
|
|
|
/*********** Code to provide thread ID and a thread-local void pointer **********/
|
|
|
|
typedef intptr_t ThreadId;
|
|
|
|
static ThreadId ThreadIdCount;
|
|
|
|
static tls_key_t TLS_pointer_key;
|
|
static tls_key_t Tid_key;
|
|
|
|
static inline ThreadId getThreadId(void)
|
|
{
|
|
ThreadId result;
|
|
result = reinterpret_cast<ThreadId>(TlsGetValue_func(Tid_key));
|
|
if( !result ) {
|
|
RecursiveMallocCallProtector scoped;
|
|
// Thread-local value is zero -> first call from this thread,
|
|
// need to initialize with next ID value (IDs start from 1)
|
|
result = AtomicIncrement(ThreadIdCount); // returned new value!
|
|
TlsSetValue_func( Tid_key, reinterpret_cast<void*>(result) );
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static inline void* getThreadMallocTLS() {
|
|
void *result;
|
|
result = TlsGetValue_func( TLS_pointer_key );
|
|
// The assert below is incorrect: with lazy initialization, it fails on the first call of the function.
|
|
// MALLOC_ASSERT( result, "Memory allocator not initialized" );
|
|
return result;
|
|
}
|
|
|
|
static inline void setThreadMallocTLS( void * newvalue ) {
|
|
RecursiveMallocCallProtector scoped;
|
|
TlsSetValue_func( TLS_pointer_key, newvalue );
|
|
}
|
|
|
|
/*********** End code to provide thread ID and a TLS pointer **********/
|
|
|
|
/*
|
|
* The identifier to make sure that memory is allocated by scalable_malloc.
|
|
*/
|
|
const uint64_t theMallocUniqueID=0xE3C7AF89A1E2D8C1ULL;
|
|
|
|
/*
|
|
* This number of bins in the TLS that leads to blocks that we can allocate in.
|
|
*/
|
|
const uint32_t numBlockBinLimit = 32;
|
|
|
|
/*
|
|
* The number of bins to cache large objects.
|
|
*/
|
|
const uint32_t numLargeObjectBins = 1024; // for 1024 max cached size is near 8MB
|
|
|
|
/********* The data structures and global objects **************/
|
|
|
|
struct FreeObject {
|
|
FreeObject *next;
|
|
};
|
|
|
|
/*
|
|
* The following constant is used to define the size of struct Block, the block header.
|
|
* The intent is to have the size of a Block multiple of the cache line size, this allows us to
|
|
* get good alignment at the cost of some overhead equal to the amount of padding included in the Block.
|
|
*/
|
|
|
|
const int blockHeaderAlignment = 64; // a common size of a cache line
|
|
|
|
struct Block;
|
|
|
|
/* The 'next' field in the block header has to maintain some invariants:
|
|
* it needs to be on a 16K boundary and the first field in the block.
|
|
* Any value stored there needs to have the lower 14 bits set to 0
|
|
* so that various assert work. This means that if you want to smash this memory
|
|
* for debugging purposes you will need to obey this invariant.
|
|
* The total size of the header needs to be a power of 2 to simplify
|
|
* the alignment requirements. For now it is a 128 byte structure.
|
|
* To avoid false sharing, the fields changed only locally are separated
|
|
* from the fields changed by foreign threads.
|
|
* Changing the size of the block header would require to change
|
|
* some bin allocation sizes, in particular "fitting" sizes (see above).
|
|
*/
|
|
|
|
struct LocalBlockFields {
|
|
Block *next; /* This field needs to be on a 16K boundary and the first field in the block
|
|
for LIFO lists to work. */
|
|
uint64_t mallocUniqueID; /* The field to identify memory allocated by scalable_malloc */
|
|
Block *previous; /* Use double linked list to speed up removal */
|
|
unsigned int objectSize;
|
|
unsigned int owner;
|
|
FreeObject *bumpPtr; /* Bump pointer moves from the end to the beginning of a block */
|
|
FreeObject *freeList;
|
|
unsigned int allocatedCount; /* Number of objects allocated (obviously by the owning thread) */
|
|
unsigned int isFull;
|
|
};
|
|
|
|
struct Block : public LocalBlockFields {
|
|
size_t __pad_local_fields[(blockHeaderAlignment-sizeof(LocalBlockFields))/sizeof(size_t)];
|
|
FreeObject *publicFreeList;
|
|
Block *nextPrivatizable;
|
|
size_t __pad_public_fields[(blockHeaderAlignment-2*sizeof(void*))/sizeof(size_t)];
|
|
};
|
|
|
|
struct Bin {
|
|
Block *activeBlk;
|
|
Block *mailbox;
|
|
MallocMutex mailLock;
|
|
};
|
|
|
|
/*
|
|
* This is a LIFO linked list that one can init, push or pop from
|
|
*/
|
|
static LifoQueue freeBlockList;
|
|
|
|
/*
|
|
* When a block that is not completely free is returned for reuse by other threads
|
|
* this is where the block goes.
|
|
*
|
|
* LifoQueue assumes zero initialization; so below its constructors are omitted,
|
|
* to avoid linking with C++ libraries on Linux.
|
|
*/
|
|
static char globalBinSpace[sizeof(LifoQueue)*numBlockBinLimit];
|
|
static LifoQueue* globalSizeBins = (LifoQueue*)globalBinSpace;
|
|
|
|
static struct LargeObjectCacheStat {
|
|
uintptr_t age;
|
|
size_t cacheSize;
|
|
} loCacheStat;
|
|
|
|
struct CachedObject {
|
|
CachedObject *next,
|
|
*prev;
|
|
uintptr_t age;
|
|
bool fromMapMemory;
|
|
};
|
|
|
|
class CachedObjectsList {
|
|
CachedObject *first,
|
|
*last;
|
|
/* age of an oldest object in the list; equal to last->age, if last defined,
|
|
used for quick cheching it without acquiring the lock. */
|
|
uintptr_t oldest;
|
|
/* currAge when something was excluded out of list because of the age,
|
|
not because of cache hit */
|
|
uintptr_t lastCleanedAge;
|
|
/* Current threshold value for the objects of a particular size.
|
|
Set on cache miss. */
|
|
uintptr_t ageThreshold;
|
|
|
|
MallocMutex lock;
|
|
/* CachedObjectsList should be placed in zero-initialized memory,
|
|
ctor not needed. */
|
|
CachedObjectsList();
|
|
public:
|
|
inline void push(void *buf, bool fromMapMemory, uintptr_t currAge);
|
|
inline CachedObject* pop(uintptr_t currAge);
|
|
void releaseLastIfOld(uintptr_t currAge, size_t size);
|
|
};
|
|
|
|
/*
|
|
* Array of bins with lists of recently freed objects cached for re-use.
|
|
*/
|
|
static char globalCachedObjectBinsSpace[sizeof(CachedObjectsList)*numLargeObjectBins];
|
|
static CachedObjectsList* globalCachedObjectBins = (CachedObjectsList*)globalCachedObjectBinsSpace;
|
|
|
|
/********* End of the data structures **************/
|
|
|
|
/********** Various numeric parameters controlling allocations ********/
|
|
|
|
/*
|
|
* The size of the TLS should be enough to hold numBlockBinLimit bins.
|
|
*/
|
|
const uint32_t tlsSize = numBlockBinLimit * sizeof(Bin);
|
|
|
|
/*
|
|
* blockSize - the size of a block, it must be larger than maxSegregatedObjectSize.
|
|
*
|
|
*/
|
|
const uintptr_t blockSize = 16*1024;
|
|
|
|
/*
|
|
* There are bins for all 8 byte aligned objects less than this segregated size; 8 bins in total
|
|
*/
|
|
const uint32_t minSmallObjectIndex = 0;
|
|
const uint32_t numSmallObjectBins = 8;
|
|
const uint32_t maxSmallObjectSize = 64;
|
|
|
|
/*
|
|
* There are 4 bins between each couple of powers of 2 [64-128-256-...]
|
|
* from maxSmallObjectSize till this size; 16 bins in total
|
|
*/
|
|
const uint32_t minSegregatedObjectIndex = minSmallObjectIndex+numSmallObjectBins;
|
|
const uint32_t numSegregatedObjectBins = 16;
|
|
const uint32_t maxSegregatedObjectSize = 1024;
|
|
|
|
/*
|
|
* And there are 5 bins with the following allocation sizes: 1792, 2688, 3968, 5376, 8064.
|
|
* They selected to fit 9, 6, 4, 3, and 2 sizes per a block, and also are multiples of 128.
|
|
* If sizeof(Block) changes from 128, these sizes require close attention!
|
|
*/
|
|
const uint32_t minFittingIndex = minSegregatedObjectIndex+numSegregatedObjectBins;
|
|
const uint32_t numFittingBins = 5;
|
|
|
|
const uint32_t fittingAlignment = 128;
|
|
|
|
#define SET_FITTING_SIZE(N) ( (blockSize-sizeof(Block))/N ) & ~(fittingAlignment-1)
|
|
const uint32_t fittingSize1 = SET_FITTING_SIZE(9);
|
|
const uint32_t fittingSize2 = SET_FITTING_SIZE(6);
|
|
const uint32_t fittingSize3 = SET_FITTING_SIZE(4);
|
|
const uint32_t fittingSize4 = SET_FITTING_SIZE(3);
|
|
const uint32_t fittingSize5 = SET_FITTING_SIZE(2);
|
|
#undef SET_FITTING_SIZE
|
|
|
|
/*
|
|
* The total number of thread-specific Block-based bins
|
|
*/
|
|
const uint32_t numBlockBins = minFittingIndex+numFittingBins;
|
|
|
|
/*
|
|
* Objects of this size and larger are considered large objects.
|
|
*/
|
|
const uint32_t minLargeObjectSize = fittingSize5 + 1;
|
|
|
|
/*
|
|
* Block::objectSize value used to mark blocks allocated by startupAlloc
|
|
*/
|
|
const unsigned int startupAllocObjSizeMark = ~(unsigned int)0;
|
|
|
|
/*
|
|
* Difference between object sizes in large object bins
|
|
*/
|
|
const uint32_t largeObjectCacheStep = 8*1024;
|
|
|
|
/*
|
|
* Object cache cleanup frequency.
|
|
* It should be power of 2 for the fast checking.
|
|
*/
|
|
const unsigned cacheCleanupFreq = 256;
|
|
|
|
/*
|
|
* Get virtual memory in pieces of this size: 0x0100000 is 1 megabyte decimal
|
|
*/
|
|
static size_t mmapRequestSize = 0x0100000;
|
|
|
|
/********** End of numeric parameters controlling allocations *********/
|
|
|
|
#if !MALLOC_DEBUG
|
|
#if __INTEL_COMPILER || _MSC_VER
|
|
#define NOINLINE(decl) __declspec(noinline) decl
|
|
#define ALWAYSINLINE(decl) __forceinline decl
|
|
#elif __GNUC__
|
|
#define NOINLINE(decl) decl __attribute__ ((noinline))
|
|
#define ALWAYSINLINE(decl) decl __attribute__ ((always_inline))
|
|
#else
|
|
#define NOINLINE(decl) decl
|
|
#define ALWAYSINLINE(decl) decl
|
|
#endif
|
|
|
|
static NOINLINE( Block* getPublicFreeListBlock(Bin* bin) );
|
|
static NOINLINE( void moveBlockToBinFront(Block *block) );
|
|
static NOINLINE( void processLessUsedBlock(Block *block) );
|
|
|
|
static ALWAYSINLINE( Bin* getAllocationBin(size_t size) );
|
|
static ALWAYSINLINE( void checkInitialization() );
|
|
|
|
#undef ALWAYSINLINE
|
|
#undef NOINLINE
|
|
#endif /* !MALLOC_DEBUG */
|
|
|
|
/*********** Code to acquire memory from the OS or other executive ****************/
|
|
|
|
#if USE_DEFAULT_MEMORY_MAPPING
|
|
#include "MapMemory.h"
|
|
#else
|
|
/* assume MapMemory and UnmapMemory are customized */
|
|
#endif
|
|
|
|
#if USE_MALLOC_FOR_LARGE_OBJECT
|
|
|
|
// (get|free)RawMemory only necessary for the USE_MALLOC_FOR_LARGE_OBJECT case
|
|
static inline void* getRawMemory (size_t size, bool alwaysUseMap = false)
|
|
{
|
|
void *object;
|
|
|
|
if (alwaysUseMap)
|
|
object = MapMemory(size);
|
|
else
|
|
#if MALLOC_CHECK_RECURSION
|
|
if (RecursiveMallocCallProtector::noRecursion())
|
|
object = malloc(size);
|
|
else if ( rml::internal::original_malloc_found )
|
|
object = (*rml::internal::original_malloc_ptr)(size);
|
|
else
|
|
object = MapMemory(size);
|
|
#else
|
|
object = malloc(size);
|
|
#endif /* MALLOC_CHECK_RECURSION */
|
|
return object;
|
|
}
|
|
|
|
static inline void freeRawMemory (void *object, size_t size, bool alwaysUseMap)
|
|
{
|
|
if (alwaysUseMap)
|
|
UnmapMemory(object, size);
|
|
else
|
|
#if MALLOC_CHECK_RECURSION
|
|
if (RecursiveMallocCallProtector::noRecursion())
|
|
free(object);
|
|
else if ( rml::internal::original_malloc_found )
|
|
(*rml::internal::original_free_ptr)(object);
|
|
else
|
|
UnmapMemory(object, size);
|
|
#else
|
|
free(object);
|
|
#endif /* MALLOC_CHECK_RECURSION */
|
|
}
|
|
|
|
#else /* USE_MALLOC_FOR_LARGE_OBJECT */
|
|
|
|
static inline void* getRawMemory (size_t size, bool = false) { return MapMemory(size); }
|
|
|
|
static inline void freeRawMemory (void *object, size_t size, bool) {
|
|
UnmapMemory(object, size);
|
|
}
|
|
|
|
#endif /* USE_MALLOC_FOR_LARGE_OBJECT */
|
|
|
|
/********* End memory acquisition code ********************************/
|
|
|
|
/********* Now some rough utility code to deal with indexing the size bins. **************/
|
|
|
|
/*
|
|
* Given a number return the highest non-zero bit in it. It is intended to work with 32-bit values only.
|
|
* Moreover, on IPF, for sake of simplicity and performance, it is narrowed to only serve for 64 to 1023.
|
|
* This is enough for current algorithm of distribution of sizes among bins.
|
|
*/
|
|
#if _WIN64 && _MSC_VER>=1400 && !__INTEL_COMPILER
|
|
extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w );
|
|
#pragma intrinsic(_BitScanReverse)
|
|
#endif
|
|
static inline unsigned int highestBitPos(unsigned int n)
|
|
{
|
|
unsigned int pos;
|
|
#if __ARCH_x86_32||__ARCH_x86_64
|
|
|
|
# if __linux__||__APPLE__||__FreeBSD__||__sun||__MINGW32__
|
|
__asm__ ("bsr %1,%0" : "=r"(pos) : "r"(n));
|
|
# elif (_WIN32 && (!_WIN64 || __INTEL_COMPILER))
|
|
__asm
|
|
{
|
|
bsr eax, n
|
|
mov pos, eax
|
|
}
|
|
# elif _WIN64 && _MSC_VER>=1400
|
|
_BitScanReverse((unsigned long*)&pos, (unsigned long)n);
|
|
# else
|
|
# error highestBitPos() not implemented for this platform
|
|
# endif
|
|
|
|
#elif __ARCH_ipf || __ARCH_other
|
|
static unsigned int bsr[16] = {0,6,7,7,8,8,8,8,9,9,9,9,9,9,9,9};
|
|
MALLOC_ASSERT( n>=64 && n<1024, ASSERT_TEXT );
|
|
pos = bsr[ n>>6 ];
|
|
#else
|
|
# error highestBitPos() not implemented for this platform
|
|
#endif /* __ARCH_* */
|
|
return pos;
|
|
}
|
|
|
|
/*
|
|
* Depending on indexRequest, for a given size return either the index into the bin
|
|
* for objects of this size, or the actual size of objects in this bin.
|
|
*/
|
|
template<bool indexRequest>
|
|
static unsigned int getIndexOrObjectSize (unsigned int size)
|
|
{
|
|
if (size <= maxSmallObjectSize) { // selection from 4/8/16/24/32/40/48/56/64
|
|
/* Index 0 holds up to 8 bytes, Index 1 16 and so forth */
|
|
return indexRequest ? (size - 1) >> 3 : alignUp(size,8);
|
|
}
|
|
else if (size <= maxSegregatedObjectSize ) { // 80/96/112/128 / 160/192/224/256 / 320/384/448/512 / 640/768/896/1024
|
|
unsigned int order = highestBitPos(size-1); // which group of bin sizes?
|
|
MALLOC_ASSERT( 6<=order && order<=9, ASSERT_TEXT );
|
|
if (indexRequest)
|
|
return minSegregatedObjectIndex - (4*6) - 4 + (4*order) + ((size-1)>>(order-2));
|
|
else {
|
|
unsigned int alignment = 128 >> (9-order); // alignment in the group
|
|
MALLOC_ASSERT( alignment==16 || alignment==32 || alignment==64 || alignment==128, ASSERT_TEXT );
|
|
return alignUp(size,alignment);
|
|
}
|
|
}
|
|
else {
|
|
if( size <= fittingSize3 ) {
|
|
if( size <= fittingSize2 ) {
|
|
if( size <= fittingSize1 )
|
|
return indexRequest ? minFittingIndex : fittingSize1;
|
|
else
|
|
return indexRequest ? minFittingIndex+1 : fittingSize2;
|
|
} else
|
|
return indexRequest ? minFittingIndex+2 : fittingSize3;
|
|
} else {
|
|
if( size <= fittingSize5 ) {
|
|
if( size <= fittingSize4 )
|
|
return indexRequest ? minFittingIndex+3 : fittingSize4;
|
|
else
|
|
return indexRequest ? minFittingIndex+4 : fittingSize5;
|
|
} else {
|
|
MALLOC_ASSERT( 0,ASSERT_TEXT ); // this should not happen
|
|
return ~0U;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static unsigned int getIndex (unsigned int size)
|
|
{
|
|
return getIndexOrObjectSize</*indexRequest*/true>(size);
|
|
}
|
|
|
|
static unsigned int getObjectSize (unsigned int size)
|
|
{
|
|
return getIndexOrObjectSize</*indexRequest*/false>(size);
|
|
}
|
|
|
|
/*
|
|
* Initialization code.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* Big Blocks are the blocks we get from the OS or some similar place using getMemory above.
|
|
* They are placed on the freeBlockList once they are acquired.
|
|
*/
|
|
|
|
static inline void *alignBigBlock(void *unalignedBigBlock)
|
|
{
|
|
void *alignedBigBlock;
|
|
/* align the entireHeap so all blocks are aligned. */
|
|
alignedBigBlock = alignUp(unalignedBigBlock, blockSize);
|
|
return alignedBigBlock;
|
|
}
|
|
|
|
/* Divide the big block into smaller bigBlocks that hold this many blocks.
|
|
* This is done since we really need a lot of blocks on the freeBlockList or there will be
|
|
* contention problems.
|
|
*/
|
|
const unsigned int blocksPerBigBlock = 16;
|
|
|
|
/* Returns 0 if unsuccessful, otherwise 1. */
|
|
static int mallocBigBlock()
|
|
{
|
|
void *unalignedBigBlock;
|
|
void *alignedBigBlock;
|
|
void *bigBlockCeiling;
|
|
Block *splitBlock;
|
|
void *splitEdge;
|
|
size_t bigBlockSplitSize;
|
|
|
|
unalignedBigBlock = getRawMemory(mmapRequestSize, /*alwaysUseMap=*/true);
|
|
|
|
if (!unalignedBigBlock) {
|
|
TRACEF(( "[ScalableMalloc trace] in mallocBigBlock, getMemory returns 0\n" ));
|
|
/* We can't get any more memory from the OS or executive so return 0 */
|
|
return 0;
|
|
}
|
|
|
|
alignedBigBlock = alignBigBlock(unalignedBigBlock);
|
|
bigBlockCeiling = (void*)((uintptr_t)unalignedBigBlock + mmapRequestSize);
|
|
|
|
bigBlockSplitSize = blocksPerBigBlock * blockSize;
|
|
|
|
splitBlock = (Block*)alignedBigBlock;
|
|
|
|
while ( ((uintptr_t)splitBlock + blockSize) <= (uintptr_t)bigBlockCeiling ) {
|
|
splitEdge = (void*)((uintptr_t)splitBlock + bigBlockSplitSize);
|
|
if( splitEdge > bigBlockCeiling) {
|
|
splitEdge = alignDown(bigBlockCeiling, blockSize);
|
|
}
|
|
splitBlock->bumpPtr = (FreeObject*)splitEdge;
|
|
freeBlockList.push((void**) splitBlock);
|
|
splitBlock = (Block*)splitEdge;
|
|
}
|
|
|
|
TRACEF(( "[ScalableMalloc trace] in mallocBigBlock returning 1\n" ));
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* The malloc routines themselves need to be able to occasionally malloc some space,
|
|
* in order to set up the structures used by the thread local structures. This
|
|
* routine preforms that fuctions.
|
|
*/
|
|
|
|
/*
|
|
* Forward Refs
|
|
*/
|
|
static void initEmptyBlock(Block *block, size_t size);
|
|
static Block *getEmptyBlock(size_t size);
|
|
|
|
static MallocMutex bootStrapLock;
|
|
|
|
static Block *bootStrapBlock = NULL;
|
|
static Block *bootStrapBlockUsed = NULL;
|
|
static FreeObject *bootStrapObjectList = NULL;
|
|
|
|
static void *bootStrapMalloc(size_t size)
|
|
{
|
|
FreeObject *result;
|
|
|
|
MALLOC_ASSERT( size == tlsSize, ASSERT_TEXT );
|
|
|
|
{ // Lock with acquire
|
|
MallocMutex::scoped_lock scoped_cs(bootStrapLock);
|
|
|
|
if( bootStrapObjectList) {
|
|
result = bootStrapObjectList;
|
|
bootStrapObjectList = bootStrapObjectList->next;
|
|
} else {
|
|
if (!bootStrapBlock) {
|
|
bootStrapBlock = getEmptyBlock(size);
|
|
if (!bootStrapBlock) return NULL;
|
|
}
|
|
result = bootStrapBlock->bumpPtr;
|
|
bootStrapBlock->bumpPtr = (FreeObject *)((uintptr_t)bootStrapBlock->bumpPtr - bootStrapBlock->objectSize);
|
|
if ((uintptr_t)bootStrapBlock->bumpPtr < (uintptr_t)bootStrapBlock+sizeof(Block)) {
|
|
bootStrapBlock->bumpPtr = NULL;
|
|
bootStrapBlock->next = bootStrapBlockUsed;
|
|
bootStrapBlockUsed = bootStrapBlock;
|
|
bootStrapBlock = NULL;
|
|
}
|
|
}
|
|
} // Unlock with release
|
|
|
|
memset (result, 0, size);
|
|
return (void*)result;
|
|
}
|
|
|
|
static void bootStrapFree(void* ptr)
|
|
{
|
|
MALLOC_ASSERT( ptr, ASSERT_TEXT );
|
|
{ // Lock with acquire
|
|
MallocMutex::scoped_lock scoped_cs(bootStrapLock);
|
|
((FreeObject*)ptr)->next = bootStrapObjectList;
|
|
bootStrapObjectList = (FreeObject*)ptr;
|
|
} // Unlock with release
|
|
}
|
|
|
|
/********* End rough utility code **************/
|
|
|
|
/********* Thread and block related code *************/
|
|
|
|
#if MALLOC_DEBUG>1
|
|
/* The debug version verifies the TLSBin as needed */
|
|
static void verifyTLSBin (Bin* bin, size_t size)
|
|
{
|
|
Block* temp;
|
|
Bin* tls;
|
|
uint32_t index = getIndex(size);
|
|
uint32_t objSize = getObjectSize(size);
|
|
|
|
tls = (Bin*)getThreadMallocTLS();
|
|
MALLOC_ASSERT( bin == tls+index, ASSERT_TEXT );
|
|
|
|
if (tls[index].activeBlk) {
|
|
MALLOC_ASSERT( tls[index].activeBlk->mallocUniqueID==theMallocUniqueID, ASSERT_TEXT );
|
|
MALLOC_ASSERT( tls[index].activeBlk->owner == getThreadId(), ASSERT_TEXT );
|
|
MALLOC_ASSERT( tls[index].activeBlk->objectSize == objSize, ASSERT_TEXT );
|
|
|
|
for (temp = tls[index].activeBlk->next; temp; temp=temp->next) {
|
|
MALLOC_ASSERT( temp!=tls[index].activeBlk, ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->mallocUniqueID==theMallocUniqueID, ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->owner == getThreadId(), ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
|
|
if (temp->next) {
|
|
MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
|
|
}
|
|
}
|
|
for (temp = tls[index].activeBlk->previous; temp; temp=temp->previous) {
|
|
MALLOC_ASSERT( temp!=tls[index].activeBlk, ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->mallocUniqueID==theMallocUniqueID, ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->owner == getThreadId(), ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->objectSize == objSize, ASSERT_TEXT );
|
|
MALLOC_ASSERT( temp->next->previous == temp, ASSERT_TEXT );
|
|
if (temp->previous) {
|
|
MALLOC_ASSERT( temp->previous->next == temp, ASSERT_TEXT );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
inline static void verifyTLSBin (Bin*, size_t) {}
|
|
#endif /* MALLOC_DEBUG>1 */
|
|
|
|
/*
|
|
* Add a block to the start of this tls bin list.
|
|
*/
|
|
static void pushTLSBin (Bin* bin, Block* block)
|
|
{
|
|
/* The objectSize should be defined and not a parameter
|
|
because the function is applied to partially filled blocks as well */
|
|
unsigned int size = block->objectSize;
|
|
Block* activeBlk;
|
|
|
|
MALLOC_ASSERT( block->owner == getThreadId(), ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT );
|
|
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
verifyTLSBin(bin, size);
|
|
activeBlk = bin->activeBlk;
|
|
|
|
block->next = activeBlk;
|
|
if( activeBlk ) {
|
|
block->previous = activeBlk->previous;
|
|
activeBlk->previous = block;
|
|
if( block->previous )
|
|
block->previous->next = block;
|
|
} else {
|
|
bin->activeBlk = block;
|
|
}
|
|
|
|
verifyTLSBin(bin, size);
|
|
}
|
|
|
|
/*
|
|
* Take a block out of its tls bin (e.g. before removal).
|
|
*/
|
|
static void outofTLSBin (Bin* bin, Block* block)
|
|
{
|
|
unsigned int size = block->objectSize;
|
|
|
|
MALLOC_ASSERT( block->owner == getThreadId(), ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->objectSize != 0, ASSERT_TEXT );
|
|
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
verifyTLSBin(bin, size);
|
|
|
|
if (block == bin->activeBlk) {
|
|
bin->activeBlk = block->previous? block->previous : block->next;
|
|
}
|
|
/* Delink the block */
|
|
if (block->previous) {
|
|
MALLOC_ASSERT( block->previous->next == block, ASSERT_TEXT );
|
|
block->previous->next = block->next;
|
|
}
|
|
if (block->next) {
|
|
MALLOC_ASSERT( block->next->previous == block, ASSERT_TEXT );
|
|
block->next->previous = block->previous;
|
|
}
|
|
block->next = NULL;
|
|
block->previous = NULL;
|
|
|
|
verifyTLSBin(bin, size);
|
|
}
|
|
|
|
/*
|
|
* Return the bin for the given size. If the TLS bin structure is absent, create it.
|
|
*/
|
|
static Bin* getAllocationBin(size_t size)
|
|
{
|
|
Bin* tls = (Bin*)getThreadMallocTLS();
|
|
if( !tls ) {
|
|
MALLOC_ASSERT( tlsSize >= sizeof(Bin) * numBlockBins, ASSERT_TEXT );
|
|
tls = (Bin*) bootStrapMalloc(tlsSize);
|
|
if ( !tls ) return NULL;
|
|
/* the block contains zeroes after bootStrapMalloc, so bins are initialized */
|
|
#if MALLOC_DEBUG
|
|
for (int i = 0; i < numBlockBinLimit; i++) {
|
|
MALLOC_ASSERT( tls[i].activeBlk == 0, ASSERT_TEXT );
|
|
MALLOC_ASSERT( tls[i].mailbox == 0, ASSERT_TEXT );
|
|
}
|
|
#endif
|
|
setThreadMallocTLS(tls);
|
|
}
|
|
MALLOC_ASSERT( tls, ASSERT_TEXT );
|
|
return tls+getIndex(size);
|
|
}
|
|
|
|
const float emptyEnoughRatio = 1.0 / 4.0; /* "Reactivate" a block if this share of its objects is free. */
|
|
|
|
static unsigned int emptyEnoughToUse (Block *mallocBlock)
|
|
{
|
|
const float threshold = (blockSize - sizeof(Block)) * (1-emptyEnoughRatio);
|
|
|
|
if (mallocBlock->bumpPtr) {
|
|
/* If we are still using a bump ptr for this block it is empty enough to use. */
|
|
STAT_increment(mallocBlock->owner, getIndex(mallocBlock->objectSize), examineEmptyEnough);
|
|
mallocBlock->isFull = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* allocatedCount shows how many objects in the block are in use; however it still counts
|
|
blocks freed by other threads; so prior call to privatizePublicFreeList() is recommended */
|
|
mallocBlock->isFull = (mallocBlock->allocatedCount*mallocBlock->objectSize > threshold)? 1: 0;
|
|
#if COLLECT_STATISTICS
|
|
if (mallocBlock->isFull)
|
|
STAT_increment(mallocBlock->owner, getIndex(mallocBlock->objectSize), examineNotEmpty);
|
|
else
|
|
STAT_increment(mallocBlock->owner, getIndex(mallocBlock->objectSize), examineEmptyEnough);
|
|
#endif
|
|
return 1-mallocBlock->isFull;
|
|
}
|
|
|
|
/* Restore the bump pointer for an empty block that is planned to use */
|
|
static void restoreBumpPtr (Block *block)
|
|
{
|
|
MALLOC_ASSERT( block->allocatedCount == 0, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->publicFreeList == NULL, ASSERT_TEXT );
|
|
STAT_increment(block->owner, getIndex(block->objectSize), freeRestoreBumpPtr);
|
|
block->bumpPtr = (FreeObject *)((uintptr_t)block + blockSize - block->objectSize);
|
|
block->freeList = NULL;
|
|
block->isFull = 0;
|
|
}
|
|
|
|
#if !(FREELIST_NONBLOCKING)
|
|
static MallocMutex publicFreeListLock; // lock for changes of publicFreeList
|
|
#endif
|
|
|
|
const uintptr_t UNUSABLE = 0x1;
|
|
inline bool isSolidPtr( void* ptr )
|
|
{
|
|
return (UNUSABLE|(uintptr_t)ptr)!=UNUSABLE;
|
|
}
|
|
inline bool isNotForUse( void* ptr )
|
|
{
|
|
return (uintptr_t)ptr==UNUSABLE;
|
|
}
|
|
|
|
static void freePublicObject (Block *block, FreeObject *objectToFree)
|
|
{
|
|
Bin* theBin;
|
|
FreeObject *publicFreeList;
|
|
|
|
#if FREELIST_NONBLOCKING
|
|
FreeObject *temp = block->publicFreeList;
|
|
MALLOC_ITT_SYNC_RELEASING(&block->publicFreeList);
|
|
do {
|
|
publicFreeList = objectToFree->next = temp;
|
|
temp = (FreeObject*)AtomicCompareExchange(
|
|
(intptr_t&)block->publicFreeList,
|
|
(intptr_t)objectToFree, (intptr_t)publicFreeList );
|
|
// no backoff necessary because trying to make change, not waiting for a change
|
|
} while( temp != publicFreeList );
|
|
#else
|
|
STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
|
|
publicFreeList = objectToFree->next = block->publicFreeList;
|
|
block->publicFreeList = objectToFree;
|
|
}
|
|
#endif
|
|
|
|
if( publicFreeList==NULL ) {
|
|
// if the block is abandoned, its nextPrivatizable pointer should be UNUSABLE
|
|
// otherwise, it should point to the bin the block belongs to.
|
|
// reading nextPrivatizable is thread-safe below, because:
|
|
// 1) the executing thread atomically got publicFreeList==NULL and changed it to non-NULL;
|
|
// 2) only owning thread can change it back to NULL,
|
|
// 3) but it can not be done until the block is put to the mailbox
|
|
// So the executing thread is now the only one that can change nextPrivatizable
|
|
if( !isNotForUse(block->nextPrivatizable) ) {
|
|
MALLOC_ASSERT( block->nextPrivatizable!=NULL, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->owner!=0, ASSERT_TEXT );
|
|
theBin = (Bin*) block->nextPrivatizable;
|
|
MallocMutex::scoped_lock scoped_cs(theBin->mailLock);
|
|
block->nextPrivatizable = theBin->mailbox;
|
|
theBin->mailbox = block;
|
|
} else {
|
|
MALLOC_ASSERT( block->owner==0, ASSERT_TEXT );
|
|
}
|
|
}
|
|
STAT_increment(getThreadId(), ThreadCommonCounters, freeToOtherThread);
|
|
STAT_increment(block->owner, getIndex(block->objectSize), freeByOtherThread);
|
|
}
|
|
|
|
static void privatizePublicFreeList (Block *mallocBlock)
|
|
{
|
|
FreeObject *temp, *publicFreeList;
|
|
|
|
MALLOC_ASSERT( mallocBlock->owner == getThreadId(), ASSERT_TEXT );
|
|
#if FREELIST_NONBLOCKING
|
|
temp = mallocBlock->publicFreeList;
|
|
do {
|
|
publicFreeList = temp;
|
|
temp = (FreeObject*)AtomicCompareExchange(
|
|
(intptr_t&)mallocBlock->publicFreeList,
|
|
0, (intptr_t)publicFreeList);
|
|
// no backoff necessary because trying to make change, not waiting for a change
|
|
} while( temp != publicFreeList );
|
|
MALLOC_ITT_SYNC_ACQUIRED(&mallocBlock->publicFreeList);
|
|
#else
|
|
STAT_increment(mallocBlock->owner, ThreadCommonCounters, lockPublicFreeList);
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
|
|
publicFreeList = mallocBlock->publicFreeList;
|
|
mallocBlock->publicFreeList = NULL;
|
|
}
|
|
temp = publicFreeList;
|
|
#endif
|
|
|
|
MALLOC_ASSERT( publicFreeList && publicFreeList==temp, ASSERT_TEXT ); // there should be something in publicFreeList!
|
|
if( !isNotForUse(temp) ) { // return/getPartialBlock could set it to UNUSABLE
|
|
MALLOC_ASSERT( mallocBlock->allocatedCount <= (blockSize-sizeof(Block))/mallocBlock->objectSize, ASSERT_TEXT );
|
|
/* other threads did not change the counter freeing our blocks */
|
|
mallocBlock->allocatedCount--;
|
|
while( isSolidPtr(temp->next) ){ // the list will end with either NULL or UNUSABLE
|
|
temp = temp->next;
|
|
mallocBlock->allocatedCount--;
|
|
}
|
|
MALLOC_ASSERT( mallocBlock->allocatedCount < (blockSize-sizeof(Block))/mallocBlock->objectSize, ASSERT_TEXT );
|
|
/* merge with local freeList */
|
|
temp->next = mallocBlock->freeList;
|
|
mallocBlock->freeList = publicFreeList;
|
|
STAT_increment(mallocBlock->owner, getIndex(mallocBlock->objectSize), allocPrivatized);
|
|
}
|
|
}
|
|
|
|
static Block* getPublicFreeListBlock (Bin* bin)
|
|
{
|
|
Block* block;
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
// the counter should be changed STAT_increment(getThreadId(), ThreadCommonCounters, lockPublicFreeList);
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(bin->mailLock);
|
|
block = bin->mailbox;
|
|
if( block ) {
|
|
MALLOC_ASSERT( block->owner == getThreadId(), ASSERT_TEXT );
|
|
MALLOC_ASSERT( !isNotForUse(block->nextPrivatizable), ASSERT_TEXT );
|
|
bin->mailbox = block->nextPrivatizable;
|
|
block->nextPrivatizable = (Block*) bin;
|
|
}
|
|
}
|
|
if( block ) {
|
|
MALLOC_ASSERT( isSolidPtr(block->publicFreeList), ASSERT_TEXT );
|
|
privatizePublicFreeList(block);
|
|
}
|
|
return block;
|
|
}
|
|
|
|
static Block *getPartialBlock(Bin* bin, unsigned int size)
|
|
{
|
|
Block *result;
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
unsigned int index = getIndex(size);
|
|
result = (Block *) globalSizeBins[index].pop();
|
|
if (result) {
|
|
MALLOC_ASSERT( result->mallocUniqueID==theMallocUniqueID, ASSERT_TEXT );
|
|
result->next = NULL;
|
|
result->previous = NULL;
|
|
MALLOC_ASSERT( result->publicFreeList!=NULL, ASSERT_TEXT );
|
|
/* There is not a race here since no other thread owns this block */
|
|
MALLOC_ASSERT( result->owner == 0, ASSERT_TEXT );
|
|
result->owner = getThreadId();
|
|
// It is safe to change nextPrivatizable, as publicFreeList is not null
|
|
MALLOC_ASSERT( isNotForUse(result->nextPrivatizable), ASSERT_TEXT );
|
|
result->nextPrivatizable = (Block*)bin;
|
|
// the next call is required to change publicFreeList to 0
|
|
privatizePublicFreeList(result);
|
|
if( result->allocatedCount ) {
|
|
// check its fullness and set result->isFull
|
|
emptyEnoughToUse(result);
|
|
} else {
|
|
restoreBumpPtr(result);
|
|
}
|
|
MALLOC_ASSERT( !isNotForUse(result->publicFreeList), ASSERT_TEXT );
|
|
STAT_increment(result->owner, index, allocBlockPublic);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static void returnPartialBlock(Bin* bin, Block *block)
|
|
{
|
|
unsigned int index = getIndex(block->objectSize);
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->owner==getThreadId(), ASSERT_TEXT );
|
|
STAT_increment(block->owner, index, freeBlockPublic);
|
|
// need to set publicFreeList to non-zero, so other threads
|
|
// will not change nextPrivatizable and it can be zeroed.
|
|
if ((intptr_t)block->nextPrivatizable==(intptr_t)bin) {
|
|
void* oldval;
|
|
#if FREELIST_NONBLOCKING
|
|
oldval = (void*)AtomicCompareExchange((intptr_t&)block->publicFreeList, (intptr_t)UNUSABLE, 0);
|
|
#else
|
|
STAT_increment(block->owner, ThreadCommonCounters, lockPublicFreeList);
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(publicFreeListLock);
|
|
if ( (oldval=block->publicFreeList)==NULL )
|
|
(uintptr_t&)(block->publicFreeList) = UNUSABLE;
|
|
}
|
|
#endif
|
|
if ( oldval!=NULL ) {
|
|
// another thread freed an object; we need to wait until it finishes.
|
|
// I believe there is no need for exponential backoff, as the wait here is not for a lock;
|
|
// but need to yield, so the thread we wait has a chance to run.
|
|
int count = 256;
|
|
while( (intptr_t)const_cast<Block* volatile &>(block->nextPrivatizable)==(intptr_t)bin ) {
|
|
if (--count==0) {
|
|
do_yield();
|
|
count = 256;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
MALLOC_ASSERT( isSolidPtr(block->publicFreeList), ASSERT_TEXT );
|
|
}
|
|
MALLOC_ASSERT( block->publicFreeList!=NULL, ASSERT_TEXT );
|
|
// now it is safe to change our data
|
|
block->previous = NULL;
|
|
block->owner = 0;
|
|
// it is caller responsibility to ensure that the list of blocks
|
|
// formed by nextPrivatizable pointers is kept consistent if required.
|
|
// if only called from thread shutdown code, it does not matter.
|
|
(uintptr_t&)(block->nextPrivatizable) = UNUSABLE;
|
|
globalSizeBins[index].push((void **)block);
|
|
}
|
|
|
|
static void cleanBlockHeader(Block *block)
|
|
{
|
|
#if MALLOC_DEBUG
|
|
memset (block, 0x0e5, blockSize);
|
|
#endif
|
|
block->next = NULL;
|
|
block->previous = NULL;
|
|
block->freeList = NULL;
|
|
block->allocatedCount = 0;
|
|
block->isFull = 0;
|
|
|
|
block->publicFreeList = NULL;
|
|
}
|
|
|
|
static void initEmptyBlock(Block *block, size_t size)
|
|
{
|
|
// Having getIndex and getObjectSize called next to each other
|
|
// allows better compiler optimization as they basically share the code.
|
|
unsigned int index = getIndex(size);
|
|
unsigned int objectSize = getObjectSize(size);
|
|
Bin* tls = (Bin*)getThreadMallocTLS();
|
|
|
|
cleanBlockHeader(block);
|
|
block->mallocUniqueID = theMallocUniqueID;
|
|
block->objectSize = objectSize;
|
|
block->owner = getThreadId();
|
|
// bump pointer should be prepared for first allocation - thus mode it down to objectSize
|
|
block->bumpPtr = (FreeObject *)((uintptr_t)block + blockSize - objectSize);
|
|
|
|
// each block should have the address where the head of the list of "privatizable" blocks is kept
|
|
// the only exception is a block for boot strap which is initialized when TLS is yet NULL
|
|
block->nextPrivatizable = tls? (Block*)(tls + index) : NULL;
|
|
TRACEF(( "[ScalableMalloc trace] Empty block %p is initialized, owner is %d, objectSize is %d, bumpPtr is %p\n",
|
|
block, block->owner, block->objectSize, block->bumpPtr ));
|
|
}
|
|
|
|
/* Return an empty uninitialized block in a non-blocking fashion. */
|
|
static Block *getRawBlock()
|
|
{
|
|
Block *result;
|
|
Block *bigBlock;
|
|
|
|
result = NULL;
|
|
|
|
bigBlock = (Block *) freeBlockList.pop();
|
|
|
|
while (!bigBlock) {
|
|
/* We are out of blocks so go to the OS and get another one */
|
|
if (!mallocBigBlock()) {
|
|
return NULL;
|
|
}
|
|
bigBlock = (Block *) freeBlockList.pop();
|
|
}
|
|
|
|
// check alignment
|
|
MALLOC_ASSERT( isAligned( bigBlock, blockSize ), ASSERT_TEXT );
|
|
MALLOC_ASSERT( isAligned( bigBlock->bumpPtr, blockSize ), ASSERT_TEXT );
|
|
// block should be at least as big as blockSize; otherwise the previous block can be damaged.
|
|
MALLOC_ASSERT( (uintptr_t)bigBlock->bumpPtr >= (uintptr_t)bigBlock + blockSize, ASSERT_TEXT );
|
|
bigBlock->bumpPtr = (FreeObject *)((uintptr_t)bigBlock->bumpPtr - blockSize);
|
|
result = (Block *)bigBlock->bumpPtr;
|
|
if ( result!=bigBlock ) {
|
|
TRACEF(( "[ScalableMalloc trace] Pushing partial rest of block back on.\n" ));
|
|
freeBlockList.push((void **)bigBlock);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/* Return an empty uninitialized block in a non-blocking fashion. */
|
|
static Block *getEmptyBlock(size_t size)
|
|
{
|
|
Block *result = getRawBlock();
|
|
|
|
if (result) {
|
|
initEmptyBlock(result, size);
|
|
STAT_increment(result->owner, getIndex(result->objectSize), allocBlockNew);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/* We have a block give it back to the malloc block manager */
|
|
static void returnEmptyBlock (Block *block, bool keepTheBin = true)
|
|
{
|
|
// it is caller's responsibility to ensure no data is lost before calling this
|
|
MALLOC_ASSERT( block->allocatedCount==0, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->publicFreeList==NULL, ASSERT_TEXT );
|
|
if (keepTheBin) {
|
|
/* We should keep the TLS bin structure */
|
|
MALLOC_ASSERT( block->next == NULL, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->previous == NULL, ASSERT_TEXT );
|
|
}
|
|
STAT_increment(block->owner, getIndex(block->objectSize), freeBlockBack);
|
|
|
|
cleanBlockHeader(block);
|
|
|
|
block->nextPrivatizable = NULL;
|
|
|
|
block->mallocUniqueID=0;
|
|
block->objectSize = 0;
|
|
block->owner = (unsigned)-1;
|
|
// for an empty block, bump pointer should point right after the end of the block
|
|
block->bumpPtr = (FreeObject *)((uintptr_t)block + blockSize);
|
|
freeBlockList.push((void **)block);
|
|
}
|
|
|
|
inline static Block* getActiveBlock( Bin* bin )
|
|
{
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
return bin->activeBlk;
|
|
}
|
|
|
|
inline static void setActiveBlock (Bin* bin, Block *block)
|
|
{
|
|
MALLOC_ASSERT( bin, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->owner == getThreadId(), ASSERT_TEXT );
|
|
// it is the caller responsibility to keep bin consistence (i.e. ensure this block is in the bin list)
|
|
bin->activeBlk = block;
|
|
}
|
|
|
|
inline static Block* setPreviousBlockActive( Bin* bin )
|
|
{
|
|
MALLOC_ASSERT( bin && bin->activeBlk, ASSERT_TEXT );
|
|
Block* temp = bin->activeBlk->previous;
|
|
if( temp ) {
|
|
MALLOC_ASSERT( temp->isFull == 0, ASSERT_TEXT );
|
|
bin->activeBlk = temp;
|
|
}
|
|
return temp;
|
|
}
|
|
|
|
#if MALLOC_CHECK_RECURSION
|
|
|
|
/*
|
|
* It's a special kind of allocation that can be used when malloc is
|
|
* not available (either during startup or when malloc was already called and
|
|
* we are, say, inside pthread_setspecific's call).
|
|
* Block can contain objects of different sizes,
|
|
* allocations are performed by moving bump pointer and increasing of object counter,
|
|
* releasing is done via counter of objects allocated in the block
|
|
* or moving bump pointer if releasing object is on a bound.
|
|
*/
|
|
|
|
struct StartupBlock : public Block {
|
|
size_t availableSize() {
|
|
return blockSize - ((uintptr_t)bumpPtr - (uintptr_t)this);
|
|
}
|
|
};
|
|
|
|
static MallocMutex startupMallocLock;
|
|
static StartupBlock *firstStartupBlock;
|
|
|
|
static StartupBlock *getNewStartupBlock()
|
|
{
|
|
StartupBlock *block = (StartupBlock *)getRawBlock();
|
|
|
|
if (!block) return NULL;
|
|
|
|
cleanBlockHeader(block);
|
|
block->mallocUniqueID = theMallocUniqueID;
|
|
// use startupAllocObjSizeMark to mark objects from startup block marker
|
|
block->objectSize = startupAllocObjSizeMark;
|
|
block->bumpPtr = (FreeObject *)((uintptr_t)block + sizeof(StartupBlock));
|
|
return block;
|
|
}
|
|
|
|
/* TODO: Function is called when malloc nested call is detected, so simultaneous
|
|
usage from different threads are unprobable, so block pre-allocation
|
|
can be not useful, and the code might be simplified. */
|
|
static FreeObject *startupAlloc(size_t size)
|
|
{
|
|
FreeObject *result;
|
|
StartupBlock *newBlock = NULL;
|
|
bool newBlockUnused = false;
|
|
|
|
/* Objects must be aligned on their natural bounds,
|
|
and objects bigger than word on word's bound. */
|
|
size = alignUp(size, sizeof(size_t));
|
|
// We need size of an object to implement msize.
|
|
size_t reqSize = size + sizeof(size_t);
|
|
// speculatively allocates newBlock to later use or return it as unused
|
|
if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize)
|
|
if (!(newBlock = getNewStartupBlock()))
|
|
return NULL;
|
|
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(startupMallocLock);
|
|
|
|
if (!firstStartupBlock || firstStartupBlock->availableSize() < reqSize) {
|
|
if (!newBlock && !(newBlock = getNewStartupBlock()))
|
|
return NULL;
|
|
newBlock->next = (Block*)firstStartupBlock;
|
|
if (firstStartupBlock)
|
|
firstStartupBlock->previous = (Block*)newBlock;
|
|
firstStartupBlock = newBlock;
|
|
} else
|
|
newBlockUnused = true;
|
|
result = firstStartupBlock->bumpPtr;
|
|
firstStartupBlock->allocatedCount++;
|
|
firstStartupBlock->bumpPtr =
|
|
(FreeObject *)((uintptr_t)firstStartupBlock->bumpPtr + reqSize);
|
|
}
|
|
if (newBlock && newBlockUnused)
|
|
returnEmptyBlock(newBlock);
|
|
|
|
// keep object size at the negative offset
|
|
*((size_t*)result) = size;
|
|
return (FreeObject*)((size_t*)result+1);
|
|
}
|
|
|
|
static size_t startupMsize(void *ptr) { return *((size_t*)ptr - 1); }
|
|
|
|
static void startupFree(StartupBlock *block, void *ptr)
|
|
{
|
|
Block* blockToRelease = NULL;
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(startupMallocLock);
|
|
|
|
MALLOC_ASSERT(firstStartupBlock, ASSERT_TEXT);
|
|
MALLOC_ASSERT(startupAllocObjSizeMark==block->objectSize
|
|
&& block->allocatedCount>0, ASSERT_TEXT);
|
|
MALLOC_ASSERT((uintptr_t)ptr>=(uintptr_t)block+sizeof(StartupBlock)
|
|
&& (uintptr_t)ptr+startupMsize(ptr)<=(uintptr_t)block+blockSize,
|
|
ASSERT_TEXT);
|
|
if (0 == --block->allocatedCount) {
|
|
if (block == firstStartupBlock)
|
|
firstStartupBlock = (StartupBlock*)firstStartupBlock->next;
|
|
if (block->previous)
|
|
block->previous->next = block->next;
|
|
if (block->next)
|
|
block->next->previous = block->previous;
|
|
blockToRelease = block;
|
|
} else if ((uintptr_t)ptr + startupMsize(ptr) == (uintptr_t)block->bumpPtr) {
|
|
// last object in the block released
|
|
FreeObject *newBump = (FreeObject*)((size_t*)ptr - 1);
|
|
MALLOC_ASSERT((uintptr_t)newBump>(uintptr_t)block+sizeof(StartupBlock),
|
|
ASSERT_TEXT);
|
|
block->bumpPtr = newBump;
|
|
}
|
|
}
|
|
if (blockToRelease) {
|
|
blockToRelease->previous = blockToRelease->next = NULL;
|
|
returnEmptyBlock(blockToRelease);
|
|
}
|
|
}
|
|
|
|
#endif /* MALLOC_CHECK_RECURSION */
|
|
|
|
/********* End thread related code *************/
|
|
|
|
/********* Library initialization *************/
|
|
|
|
//! Value indicating the state of initialization.
|
|
/* 0 = initialization not started.
|
|
* 1 = initialization started but not finished.
|
|
* 2 = initialization finished.
|
|
* In theory, we only need values 0 and 2. But value 1 is nonetheless
|
|
* useful for detecting errors in the double-check pattern.
|
|
*/
|
|
static int mallocInitialized; // implicitly initialized to 0
|
|
static MallocMutex initAndShutMutex;
|
|
|
|
inline bool isMallocInitialized() { return 2 == mallocInitialized; }
|
|
|
|
/*
|
|
* Allocator initialization routine;
|
|
* it is called lazily on the very first scalable_malloc call.
|
|
*/
|
|
static void initMemoryManager()
|
|
{
|
|
TRACEF(( "[ScalableMalloc trace] sizeof(Block) is %d (expected 128); sizeof(uintptr_t) is %d\n",
|
|
sizeof(Block), sizeof(uintptr_t) ));
|
|
MALLOC_ASSERT( 2*blockHeaderAlignment == sizeof(Block), ASSERT_TEXT );
|
|
|
|
// Create keys for thread-local storage and for thread id
|
|
// TODO: add error handling, and on error do something better than exit(1)
|
|
#if USE_WINTHREAD
|
|
TLS_pointer_key = TlsAlloc();
|
|
Tid_key = TlsAlloc();
|
|
#else
|
|
int status1 = pthread_key_create( &TLS_pointer_key, mallocThreadShutdownNotification );
|
|
int status2 = pthread_key_create( &Tid_key, NULL );
|
|
if ( status1 || status2 ) {
|
|
fprintf (stderr, "The memory manager cannot create tls key during initialization; exiting \n");
|
|
exit(1);
|
|
}
|
|
#endif /* USE_WINTHREAD */
|
|
#if COLLECT_STATISTICS
|
|
initStatisticsCollection();
|
|
#endif
|
|
|
|
TRACEF(( "[ScalableMalloc trace] Asking for a mallocBigBlock\n" ));
|
|
if (!mallocBigBlock()) {
|
|
fprintf (stderr, "The memory manager cannot access sufficient memory to initialize; exiting \n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
//! Ensures that initMemoryManager() is called once and only once.
|
|
/** Does not return until initMemoryManager() has been completed by a thread.
|
|
There is no need to call this routine if mallocInitialized==2 . */
|
|
static void checkInitialization()
|
|
{
|
|
if (mallocInitialized==2) return;
|
|
MallocMutex::scoped_lock lock( initAndShutMutex );
|
|
if (mallocInitialized!=2) {
|
|
MALLOC_ASSERT( mallocInitialized==0, ASSERT_TEXT );
|
|
mallocInitialized = 1;
|
|
RecursiveMallocCallProtector scoped;
|
|
initMemoryManager();
|
|
#ifdef MALLOC_EXTRA_INITIALIZATION
|
|
MALLOC_EXTRA_INITIALIZATION;
|
|
#endif
|
|
#if MALLOC_CHECK_RECURSION
|
|
RecursiveMallocCallProtector::detectNaiveOverload();
|
|
#endif
|
|
MALLOC_ASSERT( mallocInitialized==1, ASSERT_TEXT );
|
|
mallocInitialized = 2;
|
|
}
|
|
MALLOC_ASSERT( mallocInitialized==2, ASSERT_TEXT ); /* It can't be 0 or I would have initialized it */
|
|
}
|
|
|
|
/********* End library initialization *************/
|
|
|
|
/********* The malloc show begins *************/
|
|
|
|
|
|
/********* Allocation of large objects ************/
|
|
|
|
/*
|
|
* The program wants a large object that we are not prepared to deal with.
|
|
* so we pass the problem on to the OS. Large Objects are the only objects in
|
|
* the system that begin on a 16K byte boundary since the blocks used for smaller
|
|
* objects have the Block structure at each 16K boundary.
|
|
*
|
|
*/
|
|
|
|
struct LargeObjectHeader {
|
|
void *unalignedResult; /* The base of the memory returned from getMemory, this is what is used to return this to the OS */
|
|
size_t unalignedSize; /* The size that was requested from getMemory */
|
|
uint64_t mallocUniqueID; /* The field to check whether the memory was allocated by scalable_malloc */
|
|
size_t objectSize; /* The size originally requested by a client */
|
|
bool fromMapMemory; /* Memory allocated when MapMemory usage is forced */
|
|
};
|
|
|
|
void CachedObjectsList::push(void *buf, bool fromMapMemory, uintptr_t currAge)
|
|
{
|
|
CachedObject *ptr = (CachedObject*)buf;
|
|
ptr->prev = NULL;
|
|
ptr->age = currAge;
|
|
ptr->fromMapMemory = fromMapMemory;
|
|
|
|
MallocMutex::scoped_lock scoped_cs(lock);
|
|
ptr->next = first;
|
|
first = ptr;
|
|
if (ptr->next) ptr->next->prev = ptr;
|
|
if (!last) {
|
|
MALLOC_ASSERT(0 == oldest, ASSERT_TEXT);
|
|
oldest = currAge;
|
|
last = ptr;
|
|
}
|
|
}
|
|
|
|
CachedObject *CachedObjectsList::pop(uintptr_t currAge)
|
|
{
|
|
CachedObject *result=NULL;
|
|
{
|
|
MallocMutex::scoped_lock scoped_cs(lock);
|
|
if (first) {
|
|
result = first;
|
|
first = result->next;
|
|
if (first)
|
|
first->prev = NULL;
|
|
else {
|
|
last = NULL;
|
|
oldest = 0;
|
|
}
|
|
} else {
|
|
/* If cache miss occured, set ageThreshold to twice the difference
|
|
between current time and last time cache was cleaned. */
|
|
ageThreshold = 2*(currAge - lastCleanedAge);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void CachedObjectsList::releaseLastIfOld(uintptr_t currAge, size_t size)
|
|
{
|
|
CachedObject *toRelease = NULL;
|
|
|
|
/* oldest may be more recent then age, that's why cast to signed type
|
|
was used. age overflow is also processed correctly. */
|
|
if (last && (intptr_t)(currAge - oldest) > ageThreshold) {
|
|
MallocMutex::scoped_lock scoped_cs(lock);
|
|
// double check
|
|
if (last && (intptr_t)(currAge - last->age) > ageThreshold) {
|
|
do {
|
|
last = last->prev;
|
|
} while (last && (intptr_t)(currAge - last->age) > ageThreshold);
|
|
if (last) {
|
|
toRelease = last->next;
|
|
oldest = last->age;
|
|
last->next = NULL;
|
|
} else {
|
|
toRelease = first;
|
|
first = NULL;
|
|
oldest = 0;
|
|
}
|
|
MALLOC_ASSERT( toRelease, ASSERT_TEXT );
|
|
lastCleanedAge = toRelease->age;
|
|
}
|
|
else
|
|
return;
|
|
}
|
|
while ( toRelease ) {
|
|
CachedObject *helper = toRelease->next;
|
|
freeRawMemory(toRelease, size, toRelease->fromMapMemory);
|
|
toRelease = helper;
|
|
}
|
|
}
|
|
|
|
/* A predicate checks whether an object starts on blockSize boundary */
|
|
static inline unsigned int isLargeObject(void *object)
|
|
{
|
|
return isAligned(object, blockSize);
|
|
}
|
|
|
|
static uintptr_t cleanupCacheIfNeed ()
|
|
{
|
|
/* loCacheStat.age overflow is OK, as we only want difference between
|
|
* its current value and some recent.
|
|
*
|
|
* Both malloc and free should increment loCacheStat.age, as in
|
|
* a different case mulitiple cache object would have same age,
|
|
* and accuracy of predictors suffers.
|
|
*/
|
|
uintptr_t currAge = (uintptr_t)AtomicIncrement((intptr_t&)loCacheStat.age);
|
|
|
|
if ( 0 == currAge % cacheCleanupFreq ) {
|
|
size_t objSize;
|
|
int i;
|
|
|
|
for (i = numLargeObjectBins-1,
|
|
objSize = (numLargeObjectBins-1)*largeObjectCacheStep+blockSize;
|
|
i >= 0;
|
|
i--, objSize-=largeObjectCacheStep) {
|
|
/* cached object size on iteration is
|
|
* i*largeObjectCacheStep+blockSize, it seems iterative
|
|
* computation of it improves performance.
|
|
*/
|
|
// release from cache objects that are older then ageThreshold
|
|
globalCachedObjectBins[i].releaseLastIfOld(currAge, objSize);
|
|
}
|
|
}
|
|
return currAge;
|
|
}
|
|
|
|
static CachedObject* allocateCachedLargeObject (size_t size)
|
|
{
|
|
MALLOC_ASSERT( size%largeObjectCacheStep==0, ASSERT_TEXT );
|
|
CachedObject *block = NULL;
|
|
// blockSize is the minimal alignment and thus the minimal size of a large object.
|
|
size_t idx = (size-blockSize)/largeObjectCacheStep;
|
|
if (idx<numLargeObjectBins) {
|
|
uintptr_t currAge = cleanupCacheIfNeed();
|
|
block = globalCachedObjectBins[idx].pop(currAge);
|
|
if (block) {
|
|
STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
|
|
}
|
|
}
|
|
return block;
|
|
}
|
|
|
|
static inline void* mallocLargeObject (size_t size, size_t alignment,
|
|
bool startupAlloc = false)
|
|
{
|
|
void * unalignedArea;
|
|
size_t allocationSize = alignUp(size+sizeof(LargeObjectHeader)+alignment,
|
|
largeObjectCacheStep);
|
|
bool blockFromMapMemory = false;
|
|
|
|
if (startupAlloc) {
|
|
if (! (unalignedArea = getRawMemory(allocationSize, /*alwaysUseMap=*/true)))
|
|
return NULL;
|
|
} else {
|
|
CachedObject* cachedObj = allocateCachedLargeObject(allocationSize);
|
|
if (cachedObj) {
|
|
blockFromMapMemory = cachedObj->fromMapMemory;
|
|
unalignedArea = cachedObj;
|
|
} else {
|
|
unalignedArea = getRawMemory(allocationSize);
|
|
if (!unalignedArea)
|
|
return NULL;
|
|
STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
|
|
}
|
|
}
|
|
void *alignedArea = (void*)alignUp((uintptr_t)unalignedArea+sizeof(LargeObjectHeader), alignment);
|
|
LargeObjectHeader *header = (LargeObjectHeader*)((uintptr_t)alignedArea-sizeof(LargeObjectHeader));
|
|
header->unalignedResult = unalignedArea;
|
|
header->mallocUniqueID=theMallocUniqueID;
|
|
header->unalignedSize = allocationSize;
|
|
header->objectSize = size;
|
|
header->fromMapMemory = startupAlloc || blockFromMapMemory;
|
|
MALLOC_ASSERT( isLargeObject(alignedArea), ASSERT_TEXT );
|
|
return alignedArea;
|
|
}
|
|
|
|
static bool freeLargeObjectToCache (LargeObjectHeader* header)
|
|
{
|
|
size_t size = header->unalignedSize;
|
|
size_t idx = (size-blockSize)/largeObjectCacheStep;
|
|
if (idx<numLargeObjectBins) {
|
|
MALLOC_ASSERT( size%largeObjectCacheStep==0, ASSERT_TEXT );
|
|
uintptr_t currAge = cleanupCacheIfNeed ();
|
|
globalCachedObjectBins[idx].push(header->unalignedResult,
|
|
header->fromMapMemory, currAge);
|
|
|
|
STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static inline void freeLargeObject (void *object)
|
|
{
|
|
LargeObjectHeader *header;
|
|
header = (LargeObjectHeader *)((uintptr_t)object - sizeof(LargeObjectHeader));
|
|
header->mallocUniqueID = 0;
|
|
if (!freeLargeObjectToCache(header)) {
|
|
freeRawMemory(header->unalignedResult, header->unalignedSize,
|
|
/*alwaysUseMap=*/ header->fromMapMemory);
|
|
STAT_increment(getThreadId(), ThreadCommonCounters, freeLargeObj);
|
|
}
|
|
}
|
|
|
|
/*********** End allocation of large objects **********/
|
|
|
|
|
|
static FreeObject *allocateFromFreeList(Block *mallocBlock)
|
|
{
|
|
FreeObject *result;
|
|
|
|
if (!mallocBlock->freeList) {
|
|
return NULL;
|
|
}
|
|
|
|
result = mallocBlock->freeList;
|
|
MALLOC_ASSERT( result, ASSERT_TEXT );
|
|
|
|
mallocBlock->freeList = result->next;
|
|
MALLOC_ASSERT( mallocBlock->allocatedCount < (blockSize-sizeof(Block))/mallocBlock->objectSize, ASSERT_TEXT );
|
|
mallocBlock->allocatedCount++;
|
|
STAT_increment(mallocBlock->owner, getIndex(mallocBlock->objectSize), allocFreeListUsed);
|
|
|
|
return result;
|
|
}
|
|
|
|
static FreeObject *allocateFromBumpPtr(Block *mallocBlock)
|
|
{
|
|
FreeObject *result = mallocBlock->bumpPtr;
|
|
if (result) {
|
|
mallocBlock->bumpPtr =
|
|
(FreeObject *) ((uintptr_t) mallocBlock->bumpPtr - mallocBlock->objectSize);
|
|
if ( (uintptr_t)mallocBlock->bumpPtr < (uintptr_t)mallocBlock+sizeof(Block) ) {
|
|
mallocBlock->bumpPtr = NULL;
|
|
}
|
|
MALLOC_ASSERT( mallocBlock->allocatedCount < (blockSize-sizeof(Block))/mallocBlock->objectSize, ASSERT_TEXT );
|
|
mallocBlock->allocatedCount++;
|
|
STAT_increment(mallocBlock->owner, getIndex(mallocBlock->objectSize), allocBumpPtrUsed);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
inline static FreeObject* allocateFromBlock( Block *mallocBlock )
|
|
{
|
|
FreeObject *result;
|
|
|
|
MALLOC_ASSERT( mallocBlock->owner == getThreadId(), ASSERT_TEXT );
|
|
|
|
/* for better cache locality, first looking in the free list. */
|
|
if ( (result = allocateFromFreeList(mallocBlock)) ) {
|
|
return result;
|
|
}
|
|
MALLOC_ASSERT( !mallocBlock->freeList, ASSERT_TEXT );
|
|
|
|
/* if free list is empty, try thread local bump pointer allocation. */
|
|
if ( (result = allocateFromBumpPtr(mallocBlock)) ) {
|
|
return result;
|
|
}
|
|
MALLOC_ASSERT( !mallocBlock->bumpPtr, ASSERT_TEXT );
|
|
|
|
/* the block is considered full. */
|
|
mallocBlock->isFull = 1;
|
|
return NULL;
|
|
}
|
|
|
|
static void moveBlockToBinFront(Block *block)
|
|
{
|
|
Bin* bin = getAllocationBin(block->objectSize);
|
|
/* move the block to the front of the bin */
|
|
outofTLSBin(bin, block);
|
|
pushTLSBin(bin, block);
|
|
}
|
|
|
|
static void processLessUsedBlock(Block *block)
|
|
{
|
|
Bin* bin = getAllocationBin(block->objectSize);
|
|
if (block != getActiveBlock(bin) && block != getActiveBlock(bin)->previous ) {
|
|
/* We are not actively using this block; return it to the general block pool */
|
|
outofTLSBin(bin, block);
|
|
returnEmptyBlock(block);
|
|
} else {
|
|
/* all objects are free - let's restore the bump pointer */
|
|
restoreBumpPtr(block);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* All aligned allocations fall into one of the following categories:
|
|
* 1. if both request size and alignment are <= maxSegregatedObjectSize,
|
|
* we just align the size up, and request this amount, because for every size
|
|
* aligned to some power of 2, the allocated object is at least that aligned.
|
|
* 2. for bigger size, check if already guaranteed fittingAlignment is enough.
|
|
* 3. if size+alignment<minLargeObjectSize, we take an object of fittingSizeN and align
|
|
* its address up; given such pointer, scalable_free could find the real object.
|
|
* 4. otherwise, aligned large object is allocated.
|
|
*/
|
|
static void *allocateAligned(size_t size, size_t alignment)
|
|
{
|
|
MALLOC_ASSERT( isPowerOfTwo(alignment), ASSERT_TEXT );
|
|
|
|
void *result;
|
|
if (size<=maxSegregatedObjectSize && alignment<=maxSegregatedObjectSize)
|
|
result = scalable_malloc(alignUp(size? size: sizeof(size_t), alignment));
|
|
else if (alignment<=fittingAlignment)
|
|
result = scalable_malloc(size);
|
|
else if (size+alignment < minLargeObjectSize) {
|
|
void *unaligned = scalable_malloc(size+alignment);
|
|
if (!unaligned) return NULL;
|
|
result = alignUp(unaligned, alignment);
|
|
} else {
|
|
/* This can be the first allocation call. */
|
|
checkInitialization();
|
|
/* To correctly detect kind of allocation in scalable_free we need
|
|
to distinguish memory allocated as large object.
|
|
This is done via alignment that is greater than can be found in Block.
|
|
*/
|
|
result = mallocLargeObject(size, blockSize>alignment? blockSize: alignment);
|
|
}
|
|
|
|
MALLOC_ASSERT( isAligned(result, alignment), ASSERT_TEXT );
|
|
return result;
|
|
}
|
|
|
|
static void *reallocAligned(void *ptr, size_t size, size_t alignment = 0)
|
|
{
|
|
void *result;
|
|
size_t copySize;
|
|
|
|
if (isLargeObject(ptr)) {
|
|
LargeObjectHeader* loh = (LargeObjectHeader *)((uintptr_t)ptr - sizeof(LargeObjectHeader));
|
|
MALLOC_ASSERT( loh->mallocUniqueID==theMallocUniqueID, ASSERT_TEXT );
|
|
copySize = loh->unalignedSize-((uintptr_t)ptr-(uintptr_t)loh->unalignedResult);
|
|
if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) {
|
|
loh->objectSize = size;
|
|
return ptr;
|
|
} else {
|
|
copySize = loh->objectSize;
|
|
result = alignment ? allocateAligned(size, alignment) : scalable_malloc(size);
|
|
}
|
|
} else {
|
|
Block* block = (Block *)alignDown(ptr, blockSize);
|
|
MALLOC_ASSERT( block->mallocUniqueID==theMallocUniqueID, ASSERT_TEXT );
|
|
copySize = block->objectSize;
|
|
if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) {
|
|
return ptr;
|
|
} else {
|
|
result = alignment ? allocateAligned(size, alignment) : scalable_malloc(size);
|
|
}
|
|
}
|
|
if (result) {
|
|
memcpy(result, ptr, copySize<size? copySize: size);
|
|
scalable_free(ptr);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/* A predicate checks if an object is properly placed inside its block */
|
|
static inline bool isProperlyPlaced(const void *object, const Block *block)
|
|
{
|
|
return 0 == ((uintptr_t)block + blockSize - (uintptr_t)object) % block->objectSize;
|
|
}
|
|
|
|
/* Finds the real object inside the block */
|
|
static inline FreeObject *findAllocatedObject(const void *address, const Block *block)
|
|
{
|
|
// calculate offset from the end of the block space
|
|
uintptr_t offset = (uintptr_t)block + blockSize - (uintptr_t)address;
|
|
MALLOC_ASSERT( offset<blockSize-sizeof(Block), ASSERT_TEXT );
|
|
// find offset difference from a multiple of allocation size
|
|
offset %= block->objectSize;
|
|
// and move the address down to where the real object starts.
|
|
return (FreeObject*)((uintptr_t)address - (offset? block->objectSize-offset: 0));
|
|
}
|
|
|
|
} // namespace internal
|
|
} // namespace rml
|
|
|
|
using namespace rml::internal;
|
|
|
|
/*
|
|
* When a thread is shutting down this routine should be called to remove all the thread ids
|
|
* from the malloc blocks and replace them with a NULL thread id.
|
|
*
|
|
*/
|
|
#if MALLOC_TRACE
|
|
static unsigned int threadGoingDownCount = 0;
|
|
#endif
|
|
|
|
/*
|
|
* for pthreads, the function is set as a callback in pthread_key_create for TLS bin.
|
|
* it will be automatically called at thread exit with the key value as the argument.
|
|
*
|
|
* for Windows, it should be called directly e.g. from DllMain; the argument can be NULL
|
|
* one should include "TypeDefinitions.h" for the declaration of this function.
|
|
*/
|
|
extern "C" void mallocThreadShutdownNotification(void* arg)
|
|
{
|
|
Bin *tls;
|
|
Block *threadBlock;
|
|
Block *threadlessBlock;
|
|
unsigned int index;
|
|
|
|
{
|
|
MallocMutex::scoped_lock lock( initAndShutMutex );
|
|
if ( mallocInitialized == 0 ) return;
|
|
}
|
|
|
|
TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return start %d\n",
|
|
getThreadId(), threadGoingDownCount++ ));
|
|
#ifdef USE_WINTHREAD
|
|
tls = (Bin*)getThreadMallocTLS();
|
|
#else
|
|
tls = (Bin*)arg;
|
|
#endif
|
|
if (tls) {
|
|
for (index = 0; index < numBlockBins; index++) {
|
|
if (tls[index].activeBlk==NULL)
|
|
continue;
|
|
threadlessBlock = tls[index].activeBlk->previous;
|
|
while (threadlessBlock) {
|
|
threadBlock = threadlessBlock->previous;
|
|
if (threadlessBlock->allocatedCount==0 && threadlessBlock->publicFreeList==NULL) {
|
|
/* we destroy the thread, no need to keep its TLS bin -> the second param is false */
|
|
returnEmptyBlock(threadlessBlock, false);
|
|
} else {
|
|
returnPartialBlock(tls+index, threadlessBlock);
|
|
}
|
|
threadlessBlock = threadBlock;
|
|
}
|
|
threadlessBlock = tls[index].activeBlk;
|
|
while (threadlessBlock) {
|
|
threadBlock = threadlessBlock->next;
|
|
if (threadlessBlock->allocatedCount==0 && threadlessBlock->publicFreeList==NULL) {
|
|
/* we destroy the thread, no need to keep its TLS bin -> the second param is false */
|
|
returnEmptyBlock(threadlessBlock, false);
|
|
} else {
|
|
returnPartialBlock(tls+index, threadlessBlock);
|
|
}
|
|
threadlessBlock = threadBlock;
|
|
}
|
|
tls[index].activeBlk = 0;
|
|
}
|
|
bootStrapFree((void*)tls);
|
|
setThreadMallocTLS(NULL);
|
|
}
|
|
|
|
TRACEF(( "[ScalableMalloc trace] Thread id %d blocks return end\n", getThreadId() ));
|
|
}
|
|
|
|
extern "C" void mallocProcessShutdownNotification(void)
|
|
{
|
|
// for now, this function is only necessary for dumping statistics
|
|
#if COLLECT_STATISTICS
|
|
ThreadId nThreads = ThreadIdCount;
|
|
for( int i=1; i<=nThreads && i<MAX_THREADS; ++i )
|
|
STAT_print(i);
|
|
#endif
|
|
}
|
|
|
|
/**** Check if an object was allocated by scalable_malloc ****/
|
|
|
|
/*
|
|
* Bad dereference caused by a foreign pointer is possible only here, not earlier in call chain.
|
|
* Separate function isolates SEH code, as it has bad influence on compiler optimization.
|
|
*/
|
|
static inline uint64_t safer_dereference (uint64_t *ptr)
|
|
{
|
|
uint64_t id;
|
|
#if _MSC_VER
|
|
__try {
|
|
#endif
|
|
id = *ptr;
|
|
#if _MSC_VER
|
|
} __except( GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION?
|
|
EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH ) {
|
|
id = 0;
|
|
}
|
|
#endif
|
|
return id;
|
|
}
|
|
|
|
static inline bool isRecognized (void* ptr)
|
|
{
|
|
uint64_t id = safer_dereference(
|
|
isLargeObject(ptr)
|
|
? &((LargeObjectHeader*)((uintptr_t)ptr-sizeof(LargeObjectHeader)))->mallocUniqueID
|
|
: &((Block *)alignDown(ptr, blockSize))->mallocUniqueID
|
|
);
|
|
return id == theMallocUniqueID;
|
|
}
|
|
|
|
/********* The malloc code *************/
|
|
|
|
extern "C" void * scalable_malloc(size_t size)
|
|
{
|
|
Bin* bin;
|
|
Block * mallocBlock;
|
|
FreeObject *result;
|
|
|
|
if (!size) size = sizeof(size_t);
|
|
|
|
#if MALLOC_CHECK_RECURSION
|
|
if (RecursiveMallocCallProtector::sameThreadActive()) {
|
|
result = size<minLargeObjectSize? startupAlloc(size) :
|
|
(FreeObject*)mallocLargeObject(size, blockSize, /*startupAlloc=*/ true);
|
|
if (!result) errno = ENOMEM;
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
/* This returns only after malloc is initialized */
|
|
checkInitialization();
|
|
|
|
/*
|
|
* Use Large Object Allocation
|
|
*/
|
|
if (size >= minLargeObjectSize) {
|
|
result = (FreeObject*)mallocLargeObject(size, blockSize);
|
|
if (!result) errno = ENOMEM;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Get an element in thread-local array corresponding to the given size;
|
|
* It keeps ptr to the active block for allocations of this size
|
|
*/
|
|
bin = getAllocationBin(size);
|
|
if ( !bin ) {
|
|
errno = ENOMEM;
|
|
return NULL;
|
|
}
|
|
|
|
/* Get the block of you want to try to allocate in. */
|
|
mallocBlock = getActiveBlock(bin);
|
|
|
|
if (mallocBlock) {
|
|
do {
|
|
if( (result = allocateFromBlock(mallocBlock)) ) {
|
|
return result;
|
|
}
|
|
// the previous block, if any, should be empty enough
|
|
} while( (mallocBlock = setPreviousBlockActive(bin)) );
|
|
}
|
|
MALLOC_ASSERT( !(bin->activeBlk) || bin->activeBlk->isFull==1, ASSERT_TEXT );
|
|
|
|
/*
|
|
* else privatize publicly freed objects in some block and allocate from it
|
|
*/
|
|
mallocBlock = getPublicFreeListBlock( bin );
|
|
if (mallocBlock) {
|
|
if (emptyEnoughToUse(mallocBlock)) {
|
|
/* move the block to the front of the bin */
|
|
outofTLSBin(bin, mallocBlock);
|
|
pushTLSBin(bin, mallocBlock);
|
|
}
|
|
MALLOC_ASSERT( mallocBlock->freeList, ASSERT_TEXT );
|
|
if ( (result = allocateFromFreeList(mallocBlock)) ) {
|
|
return result;
|
|
}
|
|
/* Else something strange happened, need to retry from the beginning; */
|
|
TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in public free list; reentering.\n" ));
|
|
return scalable_malloc(size);
|
|
}
|
|
|
|
/*
|
|
* no suitable own blocks, try to get a partial block that some other thread has discarded.
|
|
*/
|
|
mallocBlock = getPartialBlock(bin, size);
|
|
while (mallocBlock) {
|
|
pushTLSBin(bin, mallocBlock);
|
|
// guaranteed by pushTLSBin: MALLOC_ASSERT( *bin==mallocBlock || (*bin)->previous==mallocBlock, ASSERT_TEXT );
|
|
setActiveBlock(bin, mallocBlock);
|
|
if( (result = allocateFromBlock(mallocBlock)) ) {
|
|
return result;
|
|
}
|
|
mallocBlock = getPartialBlock(bin, size);
|
|
}
|
|
|
|
/*
|
|
* else try to get a new empty block
|
|
*/
|
|
mallocBlock = getEmptyBlock(size);
|
|
if (mallocBlock) {
|
|
pushTLSBin(bin, mallocBlock);
|
|
// guaranteed by pushTLSBin: MALLOC_ASSERT( *bin==mallocBlock || (*bin)->previous==mallocBlock, ASSERT_TEXT );
|
|
setActiveBlock(bin, mallocBlock);
|
|
if( (result = allocateFromBlock(mallocBlock)) ) {
|
|
return result;
|
|
}
|
|
/* Else something strange happened, need to retry from the beginning; */
|
|
TRACEF(( "[ScalableMalloc trace] Something is wrong: no objects in empty block; reentering.\n" ));
|
|
return scalable_malloc(size);
|
|
}
|
|
/*
|
|
* else nothing works so return NULL
|
|
*/
|
|
TRACEF(( "[ScalableMalloc trace] No memory found, returning NULL.\n" ));
|
|
errno = ENOMEM;
|
|
return NULL;
|
|
}
|
|
|
|
/********* End the malloc code *************/
|
|
|
|
/********* The free code *************/
|
|
|
|
extern "C" void scalable_free (void *object) {
|
|
Block *block;
|
|
ThreadId myTid;
|
|
FreeObject *objectToFree;
|
|
|
|
if (!object) {
|
|
return;
|
|
}
|
|
|
|
if (isLargeObject(object)) {
|
|
freeLargeObject(object);
|
|
return;
|
|
}
|
|
|
|
block = (Block *)alignDown(object, blockSize);/* mask low bits to get the block */
|
|
MALLOC_ASSERT( block->mallocUniqueID == theMallocUniqueID, ASSERT_TEXT );
|
|
MALLOC_ASSERT( block->allocatedCount, ASSERT_TEXT );
|
|
|
|
#if MALLOC_CHECK_RECURSION
|
|
if (block->objectSize == startupAllocObjSizeMark) {
|
|
startupFree((StartupBlock *)block, object);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
myTid = getThreadId();
|
|
|
|
// Due to aligned allocations, a pointer passed to scalable_free
|
|
// might differ from the address of internally allocated object.
|
|
// Small objects however should always be fine.
|
|
if (block->objectSize <= maxSegregatedObjectSize)
|
|
objectToFree = (FreeObject*)object;
|
|
// "Fitting size" allocations are suspicious if aligned higher than naturally
|
|
else {
|
|
if ( ! isAligned(object,2*fittingAlignment) )
|
|
// TODO: the above check is questionable - it gives false negatives in ~50% cases,
|
|
// so might even be slower in average than unconditional use of findAllocatedObject.
|
|
// here it should be a "real" object
|
|
objectToFree = (FreeObject*)object;
|
|
else
|
|
// here object can be an aligned address, so applying additional checks
|
|
objectToFree = findAllocatedObject(object, block);
|
|
MALLOC_ASSERT( isAligned(objectToFree,fittingAlignment), ASSERT_TEXT );
|
|
}
|
|
MALLOC_ASSERT( isProperlyPlaced(objectToFree, block), ASSERT_TEXT );
|
|
|
|
if (myTid == block->owner) {
|
|
objectToFree->next = block->freeList;
|
|
block->freeList = objectToFree;
|
|
block->allocatedCount--;
|
|
MALLOC_ASSERT( block->allocatedCount < (blockSize-sizeof(Block))/block->objectSize, ASSERT_TEXT );
|
|
#if COLLECT_STATISTICS
|
|
if (getActiveBlock(getAllocationBin(block->objectSize)) != block)
|
|
STAT_increment(myTid, getIndex(block->objectSize), freeToInactiveBlock);
|
|
else
|
|
STAT_increment(myTid, getIndex(block->objectSize), freeToActiveBlock);
|
|
#endif
|
|
if (block->isFull) {
|
|
if (emptyEnoughToUse(block))
|
|
moveBlockToBinFront(block);
|
|
} else {
|
|
if (block->allocatedCount==0 && block->publicFreeList==NULL)
|
|
processLessUsedBlock(block);
|
|
}
|
|
} else { /* Slower path to add to the shared list, the allocatedCount is updated by the owner thread in malloc. */
|
|
freePublicObject (block, objectToFree);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* A variant that provides additional memory safety, by checking whether the given address
|
|
* was obtained with this allocator, and if not redirecting to the provided alternative call.
|
|
*/
|
|
extern "C" void safer_scalable_free (void *object, void (*original_free)(void*))
|
|
{
|
|
if (!object)
|
|
return;
|
|
|
|
if (isRecognized(object))
|
|
scalable_free(object);
|
|
else if (original_free)
|
|
original_free(object);
|
|
}
|
|
|
|
/********* End the free code *************/
|
|
|
|
/********* Code for scalable_realloc ***********/
|
|
|
|
/*
|
|
* From K&R
|
|
* "realloc changes the size of the object pointed to by p to size. The contents will
|
|
* be unchanged up to the minimum of the old and the new sizes. If the new size is larger,
|
|
* the new space is uninitialized. realloc returns a pointer to the new space, or
|
|
* NULL if the request cannot be satisfied, in which case *p is unchanged."
|
|
*
|
|
*/
|
|
extern "C" void* scalable_realloc(void* ptr, size_t size)
|
|
{
|
|
/* corner cases left out of reallocAligned to not deal with errno there */
|
|
if (!ptr) {
|
|
return scalable_malloc(size);
|
|
}
|
|
if (!size) {
|
|
scalable_free(ptr);
|
|
return NULL;
|
|
}
|
|
void* tmp = reallocAligned(ptr, size, 0);
|
|
if (!tmp) errno = ENOMEM;
|
|
return tmp;
|
|
}
|
|
|
|
/*
|
|
* A variant that provides additional memory safety, by checking whether the given address
|
|
* was obtained with this allocator, and if not redirecting to the provided alternative call.
|
|
*/
|
|
extern "C" void* safer_scalable_realloc (void* ptr, size_t sz, void* original_realloc)
|
|
{
|
|
if (!ptr) {
|
|
return scalable_malloc(sz);
|
|
}
|
|
if (isRecognized(ptr)) {
|
|
if (!sz) {
|
|
scalable_free(ptr);
|
|
return NULL;
|
|
}
|
|
void* tmp = reallocAligned(ptr, sz, 0);
|
|
if (!tmp) errno = ENOMEM;
|
|
return tmp;
|
|
}
|
|
#if USE_WINTHREAD
|
|
else if (original_realloc && sz) {
|
|
orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(original_realloc);
|
|
if ( original_ptrs->orig_msize ){
|
|
size_t oldSize = original_ptrs->orig_msize(ptr);
|
|
void *newBuf = scalable_malloc(sz);
|
|
if (newBuf) {
|
|
memcpy(newBuf, ptr, sz<oldSize? sz : oldSize);
|
|
if ( original_ptrs->orig_free ){
|
|
original_ptrs->orig_free( ptr );
|
|
}
|
|
}
|
|
return newBuf;
|
|
}
|
|
}
|
|
#else
|
|
else if (original_realloc) {
|
|
typedef void* (*realloc_ptr_t)(void*,size_t);
|
|
realloc_ptr_t original_realloc_ptr;
|
|
(void *&)original_realloc_ptr = original_realloc;
|
|
return original_realloc_ptr(ptr,sz);
|
|
}
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
/********* End code for scalable_realloc ***********/
|
|
|
|
/********* Code for scalable_calloc ***********/
|
|
|
|
/*
|
|
* From K&R
|
|
* calloc returns a pointer to space for an array of nobj objects,
|
|
* each of size size, or NULL if the request cannot be satisfied.
|
|
* The space is initialized to zero bytes.
|
|
*
|
|
*/
|
|
|
|
extern "C" void * scalable_calloc(size_t nobj, size_t size)
|
|
{
|
|
size_t arraySize = nobj * size;
|
|
void* result = scalable_malloc(arraySize);
|
|
if (result)
|
|
memset(result, 0, arraySize);
|
|
return result;
|
|
}
|
|
|
|
/********* End code for scalable_calloc ***********/
|
|
|
|
/********* Code for aligned allocation API **********/
|
|
|
|
extern "C" int scalable_posix_memalign(void **memptr, size_t alignment, size_t size)
|
|
{
|
|
if ( !isPowerOfTwoMultiple(alignment, sizeof(void*)) )
|
|
return EINVAL;
|
|
void *result = allocateAligned(size, alignment);
|
|
if (!result)
|
|
return ENOMEM;
|
|
*memptr = result;
|
|
return 0;
|
|
}
|
|
|
|
extern "C" void * scalable_aligned_malloc(size_t size, size_t alignment)
|
|
{
|
|
if (!isPowerOfTwo(alignment) || 0==size) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
void* tmp = allocateAligned(size, alignment);
|
|
if (!tmp)
|
|
errno = ENOMEM;
|
|
return tmp;
|
|
}
|
|
|
|
extern "C" void * scalable_aligned_realloc(void *ptr, size_t size, size_t alignment)
|
|
{
|
|
/* corner cases left out of reallocAligned to not deal with errno there */
|
|
if (!isPowerOfTwo(alignment)) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
if (!ptr) {
|
|
return allocateAligned(size, alignment);
|
|
}
|
|
if (!size) {
|
|
scalable_free(ptr);
|
|
return NULL;
|
|
}
|
|
|
|
void* tmp = reallocAligned(ptr, size, alignment);
|
|
if (!tmp) errno = ENOMEM;
|
|
return tmp;
|
|
}
|
|
|
|
extern "C" void * safer_scalable_aligned_realloc(void *ptr, size_t size, size_t alignment, void* orig_function)
|
|
{
|
|
/* corner cases left out of reallocAligned to not deal with errno there */
|
|
if (!isPowerOfTwo(alignment)) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
if (!ptr) {
|
|
return allocateAligned(size, alignment);
|
|
}
|
|
if (isRecognized(ptr)) {
|
|
if (!size) {
|
|
scalable_free(ptr);
|
|
return NULL;
|
|
}
|
|
void* tmp = reallocAligned(ptr, size, alignment);
|
|
if (!tmp) errno = ENOMEM;
|
|
return tmp;
|
|
}
|
|
#if USE_WINTHREAD
|
|
else {
|
|
orig_ptrs *original_ptrs = static_cast<orig_ptrs*>(orig_function);
|
|
if (size) {
|
|
if ( original_ptrs->orig_msize ){
|
|
size_t oldSize = original_ptrs->orig_msize(ptr);
|
|
void *newBuf = allocateAligned(size, alignment);
|
|
if (newBuf) {
|
|
memcpy(newBuf, ptr, size<oldSize? size : oldSize);
|
|
if ( original_ptrs->orig_free ){
|
|
original_ptrs->orig_free( ptr );
|
|
}
|
|
}
|
|
return newBuf;
|
|
}else{
|
|
//We can't do anything with this. Just keeping old pointer
|
|
return NULL;
|
|
}
|
|
} else {
|
|
if ( original_ptrs->orig_free ){
|
|
original_ptrs->orig_free( ptr );
|
|
}
|
|
return NULL;
|
|
}
|
|
}
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
extern "C" void scalable_aligned_free(void *ptr)
|
|
{
|
|
scalable_free(ptr);
|
|
}
|
|
|
|
/********* end code for aligned allocation API **********/
|
|
|
|
/********* Code for scalable_msize ***********/
|
|
|
|
/*
|
|
* Returns the size of a memory block allocated in the heap.
|
|
*/
|
|
extern "C" size_t scalable_msize(void* ptr)
|
|
{
|
|
if (ptr) {
|
|
if (isLargeObject(ptr)) {
|
|
LargeObjectHeader* loh = (LargeObjectHeader*)((uintptr_t)ptr - sizeof(LargeObjectHeader));
|
|
if (loh->mallocUniqueID==theMallocUniqueID)
|
|
return loh->unalignedSize-((uintptr_t)ptr-(uintptr_t)loh->unalignedResult);
|
|
} else {
|
|
Block* block = (Block *)alignDown(ptr, blockSize);
|
|
if (block->mallocUniqueID==theMallocUniqueID) {
|
|
#if MALLOC_CHECK_RECURSION
|
|
size_t size = block->objectSize? block->objectSize : startupMsize(ptr);
|
|
#else
|
|
size_t size = block->objectSize;
|
|
#endif
|
|
MALLOC_ASSERT(size>0 && size<minLargeObjectSize, ASSERT_TEXT);
|
|
return size;
|
|
}
|
|
}
|
|
}
|
|
errno = EINVAL;
|
|
// Unlike _msize, return 0 in case of parameter error.
|
|
// Returning size_t(-1) looks more like the way to troubles.
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* A variant that provides additional memory safety, by checking whether the given address
|
|
* was obtained with this allocator, and if not redirecting to the provided alternative call.
|
|
*/
|
|
extern "C" size_t safer_scalable_msize (void *object, size_t (*original_msize)(void*))
|
|
{
|
|
if (object) {
|
|
// Check if the memory was allocated by scalable_malloc
|
|
if (isRecognized(object))
|
|
return scalable_msize(object);
|
|
else if (original_msize)
|
|
return original_msize(object);
|
|
}
|
|
// object is NULL or unknown
|
|
errno = EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
/********* End code for scalable_msize ***********/
|