366 lines
17 KiB
C++
366 lines
17 KiB
C++
#pragma once
|
|
|
|
#include "../Lock.h"
|
|
#include "../mpmc_node_queue.h"
|
|
#include "../Algorithm.h"
|
|
#include <algorithm>
|
|
#include <type_traits>
|
|
#include <cstring>
|
|
|
|
namespace baselib
|
|
{
|
|
BASELIB_CPP_INTERFACE
|
|
{
|
|
namespace detail
|
|
{
|
|
template<class Allocator>
|
|
class tlsf_block_allocator
|
|
{
|
|
baselib::Lock m_CapacityLock;
|
|
ALIGNED_ATOMIC(size_t) m_Capacity;
|
|
baselib::mpmc_node_queue<baselib::mpmc_node> m_FreeBlocks;
|
|
|
|
struct Segment
|
|
{
|
|
uintptr_t data;
|
|
size_t size;
|
|
Segment *next;
|
|
} *m_Segments;
|
|
|
|
void LinkSegment(Segment* segment, const size_t block_size, size_t block_count)
|
|
{
|
|
uintptr_t nodeData = segment->data;
|
|
baselib::mpmc_node* firstNode = reinterpret_cast<baselib::mpmc_node*>(nodeData);
|
|
baselib::mpmc_node* node = firstNode;
|
|
for (size_t i = 0; i < block_count; ++i)
|
|
{
|
|
node = reinterpret_cast<baselib::mpmc_node*>(nodeData);
|
|
nodeData += block_size;
|
|
node->next.obj = reinterpret_cast<baselib::mpmc_node*>(nodeData);
|
|
}
|
|
m_FreeBlocks.push_back(firstNode, node);
|
|
}
|
|
|
|
bool ExpandCapacity(size_t size, size_t block_size, Allocator& allocator)
|
|
{
|
|
if (size == 0)
|
|
return true;
|
|
|
|
// Align to underlying allocator alignment. Size requested must also be of at least block_size
|
|
block_size = baselib::Algorithm::CeilAligned(block_size, alignment);
|
|
size = std::max(baselib::Algorithm::CeilAligned(size, alignment), block_size);
|
|
|
|
// Consider base allocator optimal size from required size. I.e if higher than size requested, expand using optimal size.
|
|
const size_t minSize = size + sizeof(Segment);
|
|
const size_t optimalSize = allocator.optimal_size(minSize);
|
|
const size_t segment_size = std::max(optimalSize, minSize);
|
|
const size_t block_count = size / block_size;
|
|
|
|
// Allocate one memory block that contains block data and Segment info.
|
|
uintptr_t segmentMemory = reinterpret_cast<uintptr_t>(allocator.allocate(segment_size));
|
|
if (segmentMemory == 0)
|
|
return false;
|
|
|
|
// Store data ptr and size information in segment header
|
|
Segment* segment = reinterpret_cast<Segment*>(segmentMemory + size);
|
|
segment->data = segmentMemory;
|
|
segment->size = segment_size;
|
|
|
|
// Link segment to existing segments and add capacity.
|
|
// This function is in the scope of a locked `m_CapacityLock` which has an implicit acquire (lock) release (unlock) barrier.
|
|
// Order of m_Segments and m_Capacity is irrelevant. Calling `allocate` from other threads may result in a successful allocation but
|
|
// that is not a problem since this process repeats in the case of being called from `allocate` and container is pre-emtped.
|
|
// The side effect of not
|
|
segment->next = m_Segments;
|
|
m_Segments = segment;
|
|
LinkSegment(segment, block_size, block_count);
|
|
baselib::atomic_fetch_add_explicit(m_Capacity, block_size * block_count, baselib::memory_order_relaxed);
|
|
return true;
|
|
}
|
|
|
|
public:
|
|
static constexpr uint32_t alignment = Allocator::alignment;
|
|
|
|
// non-copyable
|
|
tlsf_block_allocator(const tlsf_block_allocator& other) = delete;
|
|
tlsf_block_allocator& operator=(const tlsf_block_allocator& other) = delete;
|
|
|
|
// non-movable (strictly speaking not needed but listed to signal intent)
|
|
tlsf_block_allocator(tlsf_block_allocator&& other) = delete;
|
|
tlsf_block_allocator& operator=(tlsf_block_allocator&& other) = delete;
|
|
|
|
tlsf_block_allocator() : m_CapacityLock(), m_Capacity(0), m_FreeBlocks(), m_Segments(nullptr) {}
|
|
|
|
void* allocate()
|
|
{
|
|
return m_FreeBlocks.try_pop_front();
|
|
}
|
|
|
|
bool deallocate(void* ptr)
|
|
{
|
|
m_FreeBlocks.push_back(reinterpret_cast<baselib::mpmc_node*>(ptr));
|
|
return true;
|
|
}
|
|
|
|
bool deallocate(void* ptr_first, void* ptr_last)
|
|
{
|
|
m_FreeBlocks.push_back(reinterpret_cast<baselib::mpmc_node*>(ptr_first), reinterpret_cast<baselib::mpmc_node*>(ptr_last));
|
|
return true;
|
|
}
|
|
|
|
void deallocate_segments(Allocator& allocator)
|
|
{
|
|
Segment *segment = m_Segments;
|
|
while (segment)
|
|
{
|
|
Segment *nextSegment = segment->next;
|
|
allocator.deallocate(reinterpret_cast<void *>(segment->data), segment->size);
|
|
segment = nextSegment;
|
|
}
|
|
}
|
|
|
|
void reset_segments()
|
|
{
|
|
if (m_Segments)
|
|
{
|
|
m_Segments = nullptr;
|
|
m_Capacity = 0;
|
|
m_FreeBlocks.~mpmc_node_queue<baselib::mpmc_node>();
|
|
new(&m_FreeBlocks) mpmc_node_queue<baselib::mpmc_node>();
|
|
}
|
|
}
|
|
|
|
bool reserve(size_t size, size_t capacity, Allocator& allocator)
|
|
{
|
|
bool result;
|
|
m_CapacityLock.AcquireScoped([&] {
|
|
result = capacity > m_Capacity ? ExpandCapacity(capacity - m_Capacity, size, allocator) : true;
|
|
});
|
|
return result;
|
|
}
|
|
|
|
bool increase_capacity(size_t size, Allocator& allocator)
|
|
{
|
|
bool result = true;
|
|
m_CapacityLock.AcquireScoped([&] {
|
|
if (m_FreeBlocks.empty())
|
|
result = ExpandCapacity(m_Capacity == 0 ? size : m_Capacity, size, allocator);
|
|
});
|
|
return result;
|
|
}
|
|
|
|
size_t capacity() const
|
|
{
|
|
return baselib::atomic_load_explicit(m_Capacity, baselib::memory_order_relaxed);
|
|
}
|
|
|
|
static constexpr size_t optimal_size(const size_t size)
|
|
{
|
|
return baselib::Algorithm::CeilAligned(size, alignment);
|
|
}
|
|
};
|
|
|
|
template<size_t min_size, size_t max_size, size_t linear_subdivisions, class BaseAllocator>
|
|
class tlsf_allocator : private BaseAllocator
|
|
{
|
|
using BlockAllocator = detail::tlsf_block_allocator<BaseAllocator>;
|
|
|
|
public:
|
|
static constexpr uint32_t alignment = BaseAllocator::alignment;
|
|
|
|
// non-copyable
|
|
tlsf_allocator(const tlsf_allocator& other) = delete;
|
|
tlsf_allocator& operator=(const tlsf_allocator& other) = delete;
|
|
|
|
// non-movable (strictly speaking not needed but listed to signal intent)
|
|
tlsf_allocator(tlsf_allocator&& other) = delete;
|
|
tlsf_allocator& operator=(tlsf_allocator&& other) = delete;
|
|
|
|
tlsf_allocator() : m_Allocators() {}
|
|
~tlsf_allocator() { DeallocateSegmentsImpl(); }
|
|
|
|
void* try_allocate(size_t size)
|
|
{
|
|
return getAllocator(size).allocate();
|
|
}
|
|
|
|
void* allocate(size_t size)
|
|
{
|
|
BlockAllocator& allocator = getAllocator(size);
|
|
do
|
|
{
|
|
void* p;
|
|
if (OPTIMIZER_LIKELY(p = allocator.allocate()))
|
|
return p;
|
|
if (!allocator.increase_capacity(AllocatorSize(size), static_cast<BaseAllocator&>(*this)))
|
|
return nullptr;
|
|
}
|
|
while (true);
|
|
}
|
|
|
|
void* try_reallocate(void* ptr, size_t old_size, size_t new_size)
|
|
{
|
|
return ReallocateImpl<true>(ptr, old_size, new_size);
|
|
}
|
|
|
|
void* reallocate(void* ptr, size_t old_size, size_t new_size)
|
|
{
|
|
return ReallocateImpl<false>(ptr, old_size, new_size);
|
|
}
|
|
|
|
bool deallocate(void* ptr, size_t size)
|
|
{
|
|
return ptr == nullptr ? true : getAllocator(size).deallocate(ptr);
|
|
}
|
|
|
|
void deallocate_all()
|
|
{
|
|
atomic_thread_fence(memory_order_acquire);
|
|
DeallocateSegmentsImpl();
|
|
for (auto& pow2Allocators : m_Allocators)
|
|
for (auto& blockAllocator : pow2Allocators)
|
|
blockAllocator.reset_segments();
|
|
atomic_thread_fence(memory_order_release);
|
|
}
|
|
|
|
bool batch_deallocate(void* ptr_first, void* ptr_last, size_t size)
|
|
{
|
|
return ((ptr_first == nullptr) || (ptr_last == nullptr)) ? false : getAllocator(size).deallocate(ptr_first, ptr_last);
|
|
}
|
|
|
|
void batch_deallocate_link(void* ptr, void* ptr_next)
|
|
{
|
|
reinterpret_cast<baselib::mpmc_node*>(ptr)->next = reinterpret_cast<baselib::mpmc_node*>(ptr_next);
|
|
}
|
|
|
|
bool reserve(size_t size, size_t capacity)
|
|
{
|
|
return getAllocator(size).reserve(AllocatorSize(size), capacity, static_cast<BaseAllocator&>(*this));
|
|
}
|
|
|
|
size_t capacity(size_t size)
|
|
{
|
|
return getAllocator(size).capacity();
|
|
}
|
|
|
|
static constexpr size_t optimal_size(const size_t size)
|
|
{
|
|
return size == 0 ? 0 : BlockAllocator::optimal_size(AllocatorSize(size));
|
|
}
|
|
|
|
private:
|
|
struct CompileTime
|
|
{
|
|
static constexpr size_t Log2Base(size_t value, size_t offset) { return (value > 1) ? Log2Base(value >> (size_t)1, offset + 1) : offset; }
|
|
static constexpr size_t Log2Base(size_t value) { return Log2Base(value, 0); }
|
|
static constexpr size_t Max(size_t a, size_t b) { return a > b ? a : b; }
|
|
};
|
|
|
|
static constexpr size_t m_MinSize = CompileTime::Max(min_size, CompileTime::Max(CompileTime::Max(sizeof(void*), linear_subdivisions), alignment));
|
|
static constexpr size_t m_MinSizePow2 = baselib::Algorithm::CeilPowerOfTwo(m_MinSize);
|
|
static constexpr size_t m_MaxSizePow2 = baselib::Algorithm::CeilPowerOfTwo(CompileTime::Max(max_size, m_MinSize));
|
|
static constexpr size_t m_MinSizeMask = static_cast<size_t>(1) << CompileTime::Log2Base(m_MinSizePow2 - 1);
|
|
static constexpr size_t m_AllocatorCount = (CompileTime::Log2Base(m_MaxSizePow2) - CompileTime::Log2Base(m_MinSizePow2)) + 1;
|
|
static constexpr size_t m_AllocatorBaseOffsetLog2 = CompileTime::Log2Base(m_MinSizePow2) - 1;
|
|
static constexpr size_t m_LinearSubdivisionsLog2 = CompileTime::Log2Base(linear_subdivisions);
|
|
|
|
static constexpr size_t AllocatorSizeLog2(size_t size) { return baselib::Algorithm::HighestBitNonZero(size | m_MinSizeMask); }
|
|
static constexpr size_t LinearAllocatorSizeLog2(size_t size, size_t sizeLog2) { return (size & ((size_t)1 << sizeLog2) - 1) >> (sizeLog2 - m_LinearSubdivisionsLog2); }
|
|
|
|
template<int value = ((m_AllocatorCount == 1 && linear_subdivisions == 1) ? 1 : 2), typename std::enable_if<(value == 1), int>::type = 0>
|
|
static constexpr FORCE_INLINE size_t AllocatorSize(size_t size)
|
|
{
|
|
return m_MinSizePow2;
|
|
}
|
|
|
|
template<int value = ((m_AllocatorCount != 1 && linear_subdivisions == 1) ? 3 : 4), typename std::enable_if<(value == 3), int>::type = 0>
|
|
static constexpr FORCE_INLINE size_t AllocatorSize(size_t size)
|
|
{
|
|
return (size_t)1 << (AllocatorSizeLog2(size - 1) + 1);
|
|
}
|
|
|
|
template<int value = (linear_subdivisions == 1) ? 0 : 1, typename std::enable_if<(value), int>::type = 0>
|
|
static FORCE_INLINE size_t AllocatorSize(size_t size)
|
|
{
|
|
const size_t subDivSize = ((size_t)1 << baselib::Algorithm::HighestBitNonZero(size)) >> m_LinearSubdivisionsLog2;
|
|
return (size - 1 & ~(subDivSize - 1)) + subDivSize;
|
|
}
|
|
|
|
template<int value = ((m_AllocatorCount == 1 && linear_subdivisions == 1) ? 1 : 2), typename std::enable_if<(value == 1), int>::type = 0>
|
|
BlockAllocator& getAllocator(size_t)
|
|
{
|
|
return m_Allocators[0][0];
|
|
}
|
|
|
|
template<int value = ((m_AllocatorCount != 1 && linear_subdivisions == 1) ? 3 : 4), typename std::enable_if<(value == 3), int>::type = 0>
|
|
BlockAllocator& getAllocator(const size_t size)
|
|
{
|
|
return m_Allocators[AllocatorSizeLog2(size - 1) - m_AllocatorBaseOffsetLog2][0];
|
|
}
|
|
|
|
template<int value = ((m_AllocatorCount == 1 && linear_subdivisions != 1) ? 5 : 6), typename std::enable_if<(value == 5), int>::type = 0>
|
|
BlockAllocator& getAllocator(size_t size)
|
|
{
|
|
--size;
|
|
return m_Allocators[0][LinearAllocatorSizeLog2(size, AllocatorSizeLog2(size))];
|
|
}
|
|
|
|
template<int value = ((m_AllocatorCount != 1 && linear_subdivisions != 1) ? 7 : 8), typename std::enable_if<(value == 7), int>::type = 0>
|
|
BlockAllocator& getAllocator(size_t size)
|
|
{
|
|
--size;
|
|
const size_t sizeLog2 = AllocatorSizeLog2(size);
|
|
return m_Allocators[sizeLog2 - m_AllocatorBaseOffsetLog2][LinearAllocatorSizeLog2(size, sizeLog2)];
|
|
}
|
|
|
|
template<typename T> struct has_deallocate_all
|
|
{
|
|
template<typename U, void (U::*)()> struct Check;
|
|
template<typename U> static constexpr bool test(Check<U, &U::deallocate_all> *) { return true; }
|
|
template<typename U> static constexpr bool test(...) { return false; }
|
|
static constexpr bool value = test<T>(nullptr);
|
|
};
|
|
|
|
template<bool value = has_deallocate_all<BaseAllocator>::value, typename std::enable_if<(value), int>::type = 0>
|
|
void DeallocateSegmentsImpl()
|
|
{
|
|
BaseAllocator::deallocate_all();
|
|
}
|
|
|
|
template<bool value = has_deallocate_all<BaseAllocator>::value, typename std::enable_if<(!value), int>::type = 0>
|
|
void DeallocateSegmentsImpl()
|
|
{
|
|
for (auto& pow2Allocators : m_Allocators)
|
|
for (auto& blockAllocator : pow2Allocators)
|
|
blockAllocator.deallocate_segments(static_cast<BaseAllocator&>(*this));
|
|
}
|
|
|
|
template<bool use_try_allocate>
|
|
void* ReallocateImpl(void* ptr, size_t old_size, size_t new_size)
|
|
{
|
|
if (ptr == nullptr)
|
|
return use_try_allocate ? try_allocate(new_size) : allocate(new_size);
|
|
|
|
BlockAllocator& oldAllocator = getAllocator(old_size);
|
|
BlockAllocator& newAllocator = getAllocator(new_size);
|
|
if (&oldAllocator == &newAllocator)
|
|
return ptr;
|
|
|
|
void* newPtr = newAllocator.allocate();
|
|
if ((!use_try_allocate) && (newPtr == nullptr))
|
|
newPtr = allocate(new_size);
|
|
|
|
if (newPtr)
|
|
{
|
|
std::memcpy(newPtr, ptr, std::min(new_size, old_size));
|
|
oldAllocator.deallocate(ptr);
|
|
}
|
|
return newPtr;
|
|
}
|
|
|
|
BlockAllocator m_Allocators[m_AllocatorCount][linear_subdivisions];
|
|
};
|
|
}
|
|
}
|
|
}
|