diff --git a/Recast/Include/RecastAlloc.h b/Recast/Include/RecastAlloc.h index 3cdd450..c71c035 100644 --- a/Recast/Include/RecastAlloc.h +++ b/Recast/Include/RecastAlloc.h @@ -20,6 +20,9 @@ #define RECASTALLOC_H #include +#include + +#include /// Provides hint values to the memory allocator on how long the /// memory is expected to be used. @@ -58,64 +61,247 @@ void* rcAlloc(size_t size, rcAllocHint hint); /// @see rcAlloc void rcFree(void* ptr); +/// An implementation of operator new usable for placement new. The default one is part of STL (which we don't use). +/// rcNewTag is a dummy type used to differentiate our operator from the STL one, in case users import both Recast +/// and STL. +struct rcNewTag {}; +inline void* operator new(size_t, const rcNewTag&, void* p) { return p; } -/// A simple dynamic array of integers. +/// Signed to avoid warnnings when comparing to int loop indexes, and common error with comparing to zero. +/// MSVC2010 has a bug where ssize_t is unsigned (!!!). +typedef intptr_t rcSizeType; +#define RC_SIZE_MAX INTPTR_MAX + +/// Macros to hint to the compiler about the likeliest branch. Please add a benchmark that demonstrates a performance +/// improvement before intrudcing use cases. +#if defined(__GNUC__) || defined(__clang__) +#define rcLikely(x) __builtin_expect((x), true) +#define rcUnlikely(x) __builtin_expect((x), false) +#else +#define rcLikely(x) (x) +#define rcUnlikely(x) (x) +#endif + +/// Variable-sized storage type. Mimics the interface of std::vector with some notable differences: +/// * Uses rcAlloc()/rcFree() to handle storage. +/// * No support for a custom allocator. +/// * Uses signed size instead of size_t to avoid warnings in for loops: "for (int i = 0; i < foo.size(); i++)" +/// * Omits methods of limited utility: insert/erase, (bad performance), at (we don't use exceptions), operator=. +/// * assign() and the pre-sizing constructor follow C++11 semantics -- they don't construct a temporary if no value is provided. +/// * push_back() and resize() support adding values from the current vector. Range-based constructors and assign(begin, end) do not. +/// * No specialization for bool. +template +class rcVectorBase { + rcSizeType m_size; + rcSizeType m_cap; + T* m_data; + // Constructs a T at the give address with either the copy constructor or the default. + static void construct(T* p, const T& v) { ::new(rcNewTag(), (void*)p) T(v); } + static void construct(T* p) { ::new(rcNewTag(), (void*)p) T; } + static void construct_range(T* begin, T* end); + static void construct_range(T* begin, T* end, const T& value); + static void copy_range(T* dst, const T* begin, const T* end); + void destroy_range(rcSizeType begin, rcSizeType end); + // Creates an array of the given size, copies all of this vector's data into it, and returns it. + T* allocate_and_copy(rcSizeType size); + void resize_impl(rcSizeType size, const T* value); + public: + typedef rcSizeType size_type; + typedef T value_type; + + rcVectorBase() : m_size(0), m_cap(0), m_data(0) {}; + rcVectorBase(const rcVectorBase& other) : m_size(0), m_cap(0), m_data(0) { assign(other.begin(), other.end()); } + explicit rcVectorBase(rcSizeType count) : m_size(0), m_cap(0), m_data(0) { resize(count); } + rcVectorBase(rcSizeType count, const T& value) : m_size(0), m_cap(0), m_data(0) { resize(count, value); } + rcVectorBase(const T* begin, const T* end) : m_size(0), m_cap(0), m_data(0) { assign(begin, end); } + ~rcVectorBase() { destroy_range(0, m_size); rcFree(m_data); } + + void reserve(rcSizeType size); + + void assign(rcSizeType count, const T& value) { clear(); resize(count, value); } + void assign(const T* begin, const T* end); + + void resize(rcSizeType size) { resize_impl(size, NULL); } + void resize(rcSizeType size, const T& value) { resize_impl(size, &value); } + + void push_back(const T& value); + void pop_back() { rcAssert(m_size > 0); back().~T(); m_size--; } + void clear() { resize(0); } + rcSizeType size() const { return m_size; } + rcSizeType capacity() const { return m_cap; } + bool empty() const { return size() == 0; } + + const T& operator[](rcSizeType i) const { rcAssert(i >= 0 && i < m_size); return m_data[i]; } + T& operator[](rcSizeType i) { rcAssert(i >= 0 && i < m_size); return m_data[i]; } + + const T& front() const { rcAssert(m_size); return m_data[0]; } + T& front() { rcAssert(m_size); return m_data[0]; } + const T& back() const { rcAssert(m_size); return m_data[m_size - 1]; }; + T& back() { rcAssert(m_size); return m_data[m_size - 1]; }; + const T* data() const { return m_data; } + T* data() { return m_data; } + + T* begin() { return m_data; } + T* end() { return m_data + m_size; } + const T* begin() const { return m_data; } + const T* end() const { return m_data + m_size; } + + void swap(rcVectorBase& other); + + // Explicitly deleted. + rcVectorBase& operator=(const rcVectorBase& other); +}; + +template +void rcVectorBase::reserve(rcSizeType count) { + if (count <= m_cap) { + return; + } + T* new_data = allocate_and_copy(count); + destroy_range(0, m_size); + rcFree(m_data); + m_data = new_data; + m_cap = count; +} +template +T* rcVectorBase::allocate_and_copy(rcSizeType size) { + rcAssert(RC_SIZE_MAX / sizeof(T) >= size); + T* new_data = static_cast(rcAlloc(sizeof(T) * size, H)); + copy_range(new_data, m_data, m_data + m_size); + return new_data; +} +template +void rcVectorBase::assign(const T* begin, const T* end) { + clear(); + reserve(end - begin); + m_size = end - begin; + copy_range(m_data, begin, end); +} +template +void rcVectorBase::push_back(const T& value) { + // rcLikely increases performance by ~50% on BM_rcVector_PushPreallocated, + // and by ~2-5% on BM_rcVector_Push. + if (rcLikely(m_size < m_cap)) { + construct(m_data + m_size++, value); + return; + } + + rcAssert(RC_SIZE_MAX / 2 >= m_size); + rcSizeType new_cap = m_size ? 2*m_size : 1; + T* data = allocate_and_copy(new_cap); + // construct between allocate and destroy+free in case value is + // in this vector. + construct(data + m_size, value); + destroy_range(0, m_size); + m_size++; + m_cap = new_cap; + rcFree(m_data); + m_data = data; +} +template +void rcVectorBase::resize_impl(rcSizeType size, const T* value) { + if (size < m_size) { + destroy_range(size, m_size); + m_size = size; + } else if (size > m_size) { + T* new_data = allocate_and_copy(size); + // We defer deconstructing/freeing old data until after constructing + // new elements in case "value" is there. + if (value) { + construct_range(new_data + m_size, new_data + size, *value); + } else { + construct_range(new_data + m_size, new_data + size); + } + destroy_range(0, m_size); + rcFree(m_data); + m_data = new_data; + m_cap = size; + m_size = size; + } +} +template +void rcVectorBase::swap(rcVectorBase& other) { + // TODO: Reorganize headers so we can use rcSwap here. + rcSizeType tmp_cap = other.m_cap; + rcSizeType tmp_size = other.m_size; + T* tmp_data = other.m_data; + + other.m_cap = m_cap; + other.m_size = m_size; + other.m_data = m_data; + + m_cap = tmp_cap; + m_size = tmp_size; + m_data = tmp_data; +} +// static +template +void rcVectorBase::construct_range(T* begin, T* end) { + for (T* p = begin; p < end; p++) { + construct(p); + } +} +// static +template +void rcVectorBase::construct_range(T* begin, T* end, const T& value) { + for (T* p = begin; p < end; p++) { + construct(p, value); + } +} +// static +template +void rcVectorBase::copy_range(T* dst, const T* begin, const T* end) { + for (rcSizeType i = 0 ; i < end - begin; i++) { + construct(dst + i, begin[i]); + } +} +template +void rcVectorBase::destroy_range(rcSizeType begin, rcSizeType end) { + for (rcSizeType i = begin; i < end; i++) { + m_data[i].~T(); + } +} + +template +class rcTempVector : public rcVectorBase { + typedef rcVectorBase Base; +public: + rcTempVector() : Base() {} + explicit rcTempVector(rcSizeType size) : Base(size) {} + rcTempVector(rcSizeType size, const T& value) : Base(size, value) {} + rcTempVector(const rcTempVector& other) : Base(other) {} + rcTempVector(const T* begin, const T* end) : Base(begin, end) {} +}; +template +class rcPermVector : public rcVectorBase { + typedef rcVectorBase Base; +public: + rcPermVector() : Base() {} + explicit rcPermVector(rcSizeType size) : Base(size) {} + rcPermVector(rcSizeType size, const T& value) : Base(size, value) {} + rcPermVector(const rcPermVector& other) : Base(other) {} + rcPermVector(const T* begin, const T* end) : Base(begin, end) {} +}; + + +/// Legacy class. Prefer rcVector. class rcIntArray { - int* m_data; - int m_size, m_cap; - - void doResize(int n); - - // Explicitly disabled copy constructor and copy assignment operator. - rcIntArray(const rcIntArray&); - rcIntArray& operator=(const rcIntArray&); - + rcTempVector m_impl; public: - /// Constructs an instance with an initial array size of zero. - rcIntArray() : m_data(0), m_size(0), m_cap(0) {} - - /// Constructs an instance initialized to the specified size. - /// @param[in] n The initial size of the integer array. - rcIntArray(int n) : m_data(0), m_size(0), m_cap(0) { resize(n); } - ~rcIntArray() { rcFree(m_data); } - - /// Specifies the new size of the integer array. - /// @param[in] n The new size of the integer array. - void resize(int n) - { - if (n > m_cap) - doResize(n); - - m_size = n; - } - - /// Push the specified integer onto the end of the array and increases the size by one. - /// @param[in] item The new value. - void push(int item) { resize(m_size+1); m_data[m_size-1] = item; } - - /// Returns the value at the end of the array and reduces the size by one. - /// @return The value at the end of the array. + rcIntArray() {} + rcIntArray(int n) : m_impl(n, 0) {} + void push(int item) { m_impl.push_back(item); } + void resize(int size) { m_impl.resize(size); } int pop() { - if (m_size > 0) - m_size--; - - return m_data[m_size]; + int v = m_impl.back(); + m_impl.pop_back(); + return v; } - - /// The value at the specified array index. - /// @warning Does not provide overflow protection. - /// @param[in] i The index of the value. - const int& operator[](int i) const { return m_data[i]; } - - /// The value at the specified array index. - /// @warning Does not provide overflow protection. - /// @param[in] i The index of the value. - int& operator[](int i) { return m_data[i]; } - - /// The current size of the integer array. - int size() const { return m_size; } + int size() const { return m_impl.size(); } + int& operator[](int index) { return m_impl[index]; } + int operator[](int index) const { return m_impl[index]; } }; /// A simple helper class used to delete an array when it goes out of scope. diff --git a/Recast/Source/RecastAlloc.cpp b/Recast/Source/RecastAlloc.cpp index 453b5fa..bdc3661 100644 --- a/Recast/Source/RecastAlloc.cpp +++ b/Recast/Source/RecastAlloc.cpp @@ -58,29 +58,3 @@ void rcFree(void* ptr) if (ptr) sRecastFreeFunc(ptr); } - -/// @class rcIntArray -/// -/// While it is possible to pre-allocate a specific array size during -/// construction or by using the #resize method, certain methods will -/// automatically resize the array as needed. -/// -/// @warning The array memory is not initialized to zero when the size is -/// manually set during construction or when using #resize. - -/// @par -/// -/// Using this method ensures the array is at least large enough to hold -/// the specified number of elements. This can improve performance by -/// avoiding auto-resizing during use. -void rcIntArray::doResize(int n) -{ - if (!m_cap) m_cap = n; - while (m_cap < n) m_cap *= 2; - int* newData = (int*)rcAlloc(m_cap*sizeof(int), RC_ALLOC_TEMP); - rcAssert(newData); - if (m_size && newData) memcpy(newData, m_data, m_size*sizeof(int)); - rcFree(m_data); - m_data = newData; -} - diff --git a/Tests/Recast/Tests_Recast.cpp b/Tests/Recast/Tests_Recast.cpp index b917622..11def67 100644 --- a/Tests/Recast/Tests_Recast.cpp +++ b/Tests/Recast/Tests_Recast.cpp @@ -1,6 +1,14 @@ +#include +#include + #include "catch.hpp" #include "Recast.h" +#include "RecastAlloc.h" +#include "RecastAssert.h" + +// For comparing to rcVector in benchmarks. +#include TEST_CASE("rcSwap") { @@ -828,3 +836,395 @@ TEST_CASE("rcRasterizeTriangles") REQUIRE(!solid.spans[1 + 2 * width]->next); } } + +// Used to verify that rcVector constructs/destroys objects correctly. +struct Incrementor { + static int constructions; + static int destructions; + static int copies; + Incrementor() { constructions++; } + ~Incrementor() { destructions++; } + Incrementor(const Incrementor&) { copies++; } + Incrementor& operator=(const Incrementor&); // Deleted assignment. + + static void Reset() { + constructions = 0; + destructions = 0; + copies = 0; + } +}; +int Incrementor::constructions = 0; +int Incrementor::destructions = 0; +int Incrementor::copies = 0; + +const int kMaxAllocSize = 1024; +const unsigned char kClearValue = 0xff; +// Simple alloc/free that clears the memory on free.. +void* AllocAndInit(size_t size, rcAllocHint) { + rcAssert(kMaxAllocSize >= size); + return memset(malloc(kMaxAllocSize), 0, kMaxAllocSize); +} +void FreeAndClear(void* mem) { + if (mem) { + memset(mem, kClearValue, kMaxAllocSize); + } + free(mem); +} +// Verifies that memory has been initialized by AllocAndInit, and not cleared by FreeAndClear. +struct Copier { + const static int kAlive; + const static int kDead; + Copier() : value(kAlive) {} + + // checks that the source of the copy is valid. + Copier(const Copier& other) : value(kAlive) { + other.Verify(); + } + Copier& operator=(const Copier&); + + // Marks the value as dead. + ~Copier() { value = kDead; } + void Verify() const { + REQUIRE(value == kAlive); + } + volatile int value; +}; +const int Copier::kAlive = 0x1f; +const int Copier::kDead = 0xde; + +TEST_CASE("rcVector") +{ + SECTION("Vector basics.") + { + rcTempVector vec; + REQUIRE(vec.size() == 0); + vec.push_back(10); + vec.push_back(12); + REQUIRE(vec.size() == 2); + REQUIRE(vec.capacity() >= 2); + REQUIRE(vec[0] == 10); + REQUIRE(vec[1] == 12); + vec.pop_back(); + REQUIRE(vec.size() == 1); + REQUIRE(vec[0] == 10); + vec.pop_back(); + REQUIRE(vec.size() == 0); + vec.resize(100, 5); + REQUIRE(vec.size() == 100); + for (int i = 0; i < 100; i++) { + REQUIRE(vec[i] == 5); + vec[i] = i; + } + for (int i = 0; i < 100; i++) { + REQUIRE(vec[i] == i); + } + } + + SECTION("Constructors/Destructors") + { + Incrementor::Reset(); + rcTempVector vec; + REQUIRE(Incrementor::constructions == 0); + REQUIRE(Incrementor::destructions == 0); + REQUIRE(Incrementor::copies == 0); + vec.push_back(Incrementor()); + // push_back() may create and copy objects internally. + REQUIRE(Incrementor::constructions == 1); + REQUIRE(Incrementor::destructions >= 1); + // REQUIRE(Incrementor::copies >= 2); + + vec.clear(); + Incrementor::Reset(); + vec.resize(100); + // Initialized with default instance. Temporaries may be constructed, then destroyed. + REQUIRE(Incrementor::constructions == 100); + REQUIRE(Incrementor::destructions == 0); + REQUIRE(Incrementor::copies == 0); + + Incrementor::Reset(); + for (int i = 0; i < 100; i++) { + REQUIRE(Incrementor::destructions == i); + vec.pop_back(); + } + REQUIRE(Incrementor::constructions == 0); + REQUIRE(Incrementor::destructions == 100); + REQUIRE(Incrementor::copies == 0); + + vec.resize(100); + Incrementor::Reset(); + vec.clear(); + // One temp object is constructed for the default argumnet of resize(). + REQUIRE(Incrementor::constructions == 0); + REQUIRE(Incrementor::destructions == 100); + REQUIRE(Incrementor::copies == 0); + + Incrementor::Reset(); + vec.resize(100, Incrementor()); + REQUIRE(Incrementor::constructions == 1); + REQUIRE(Incrementor::destructions == 1); + REQUIRE(Incrementor::copies == 100); + } + + SECTION("Copying Contents") + { + + // veriyf event counts after doubling size -- should require a lot of copying and destorying. + rcTempVector vec; + Incrementor::Reset(); + vec.resize(100); + REQUIRE(Incrementor::constructions == 100); + REQUIRE(Incrementor::destructions == 0); + REQUIRE(Incrementor::copies == 0); + Incrementor::Reset(); + vec.resize(200); + REQUIRE(vec.size() == vec.capacity()); + REQUIRE(Incrementor::constructions == 100); // Construc new elements. + REQUIRE(Incrementor::destructions == 100); // Destroy old contents. + REQUIRE(Incrementor::copies == 100); // Copy old elements into new array. + } + + SECTION("Swap") + { + rcTempVector a(10, 0xa); + rcTempVector b; + + int* a_data = a.data(); + int* b_data = b.data(); + + a.swap(b); + REQUIRE(a.size() == 0); + REQUIRE(b.size() == 10); + REQUIRE(b[0] == 0xa); + REQUIRE(b[9] == 0xa); + REQUIRE(a.data() == b_data); + REQUIRE(b.data() == a_data); + } + + SECTION("Overlapping init") + { + rcAllocSetCustom(&AllocAndInit, &FreeAndClear); + rcTempVector vec; + // Force a realloc during push_back(). + vec.resize(64); + REQUIRE(vec.capacity() == vec.size()); + REQUIRE(vec.capacity() > 0); + REQUIRE(vec.size() == vec.capacity()); + + // Don't crash. + vec.push_back(vec[0]); + rcAllocSetCustom(NULL, NULL); + } + + SECTION("Vector Destructor") + { + { + rcTempVector vec; + vec.resize(10); + Incrementor::Reset(); + } + REQUIRE(Incrementor::destructions == 10); + } + + SECTION("Assign") + { + rcTempVector a(10, 0xa); + a.assign(5, 0xb); + REQUIRE(a.size() == 5); + REQUIRE(a[0] == 0xb); + REQUIRE(a[4] == 0xb); + a.assign(15, 0xc); + REQUIRE(a.size() == 15); + REQUIRE(a[0] == 0xc); + REQUIRE(a[14] == 0xc); + + rcTempVector b; + b.assign(a.data(), a.data() + a.size()); + REQUIRE(b.size() == a.size()); + REQUIRE(b[0] == a[0]); + } + + SECTION("Copy") + { + rcTempVector a(10, 0xa); + rcTempVector b(a); + REQUIRE(a.size() == 10); + REQUIRE(a.size() == b.size()); + REQUIRE(a[0] == b[0]); + REQUIRE(a.data() != b.data()); + rcTempVector c(a.data(), a.data() + a.size()); + REQUIRE(c.size() == a.size()); + REQUIRE(c[0] == a[0]); + + rcTempVector d(10); + Incrementor::Reset(); + rcTempVector e(d); + REQUIRE(Incrementor::constructions == 0); + REQUIRE(Incrementor::destructions == 0); + REQUIRE(Incrementor::copies == 10); + + Incrementor::Reset(); + rcTempVector f(d.data(), d.data() + d.size()); + REQUIRE(Incrementor::constructions == 0); + REQUIRE(Incrementor::destructions == 0); + REQUIRE(Incrementor::copies == 10); + } +} + +// TODO: Implement benchmarking for platforms other than posix. +#ifdef __unix__ +#include +#ifdef _POSIX_TIMERS +#include +#include + +int64_t NowNanos() { + struct timespec tp; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tp); + return tp.tv_nsec + 1000000000LL * tp.tv_sec; +} + +#define BM(name, iterations) \ + struct BM_ ## name { \ + static void Run() { \ + int64_t begin_time = NowNanos(); \ + for (int i = 0 ; i < iterations; i++) { \ + Body(); \ + } \ + int64_t nanos = NowNanos() - begin_time; \ + printf("BM_%-35s %ld iterations in %10ld nanos: %10.2f nanos/it\n", #name ":", (int64_t)iterations, nanos, double(nanos) / iterations); \ + } \ + static void Body(); \ + }; \ + TEST_CASE(#name) { \ + BM_ ## name::Run(); \ + } \ + void BM_ ## name::Body() + +const int64_t kNumLoops = 100; +const int64_t kNumInserts = 100000; + +// Prevent compiler from eliding a calculation. +// TODO: Implement for MSVC. +template +void DoNotOptimize(T* v) { + asm volatile ("" : "+r" (v)); +} + +BM(FlatArray_Push, kNumLoops) +{ + int cap = 64; + int* v = (int*)rcAlloc(cap * sizeof(int), RC_ALLOC_TEMP); + for (int j = 0; j < kNumInserts; j++) { + if (j == cap) { + cap *= 2; + int* tmp = (int*)rcAlloc(sizeof(int) * cap, RC_ALLOC_TEMP); + memcpy(tmp, v, j * sizeof(int)); + rcFree(v); + v = tmp; + } + v[j] = 2; + } + + DoNotOptimize(v); + rcFree(v); +} +BM(FlatArray_Fill, kNumLoops) +{ + int* v = (int*)rcAlloc(sizeof(int) * kNumInserts, RC_ALLOC_TEMP); + for (int j = 0; j < kNumInserts; j++) { + v[j] = 2; + } + + DoNotOptimize(v); + rcFree(v); +} +BM(FlatArray_Memset, kNumLoops) +{ + int* v = (int*)rcAlloc(sizeof(int) * kNumInserts, RC_ALLOC_TEMP); + memset(v, 0, kNumInserts * sizeof(int)); + + DoNotOptimize(v); + rcFree(v); +} + +BM(rcVector_Push, kNumLoops) +{ + rcTempVector v; + for (int j = 0; j < kNumInserts; j++) { + v.push_back(2); + } + DoNotOptimize(v.data()); +} +BM(rcVector_PushPreallocated, kNumLoops) +{ + rcTempVector v; + v.reserve(kNumInserts); + for (int j = 0; j < kNumInserts; j++) { + v.push_back(2); + } + DoNotOptimize(v.data()); +} +BM(rcVector_Assign, kNumLoops) +{ + rcTempVector v; + v.assign(kNumInserts, 2); + DoNotOptimize(v.data()); +} +BM(rcVector_AssignIndices, kNumLoops) +{ + rcTempVector v; + v.resize(kNumInserts); + for (int j = 0; j < kNumInserts; j++) { + v[j] = 2; + } + DoNotOptimize(v.data()); +} +BM(rcVector_Resize, kNumLoops) +{ + rcTempVector v; + v.resize(kNumInserts, 2); + DoNotOptimize(v.data()); +} + +BM(stdvector_Push, kNumLoops) +{ + std::vector v; + for (int j = 0; j < kNumInserts; j++) { + v.push_back(2); + } + DoNotOptimize(v.data()); +} +BM(stdvector_PushPreallocated, kNumLoops) +{ + std::vector v; + v.reserve(kNumInserts); + for (int j = 0; j < kNumInserts; j++) { + v.push_back(2); + } + DoNotOptimize(v.data()); +} +BM(stdvector_Assign, kNumLoops) +{ + std::vector v; + v.assign(kNumInserts, 2); + DoNotOptimize(v.data()); +} +BM(stdvector_AssignIndices, kNumLoops) +{ + std::vector v; + v.resize(kNumInserts); + for (int j = 0; j < kNumInserts; j++) { + v[j] = 2; + } + DoNotOptimize(v.data()); +} +BM(stdvector_Resize, kNumLoops) +{ + std::vector v; + v.resize(kNumInserts, 2); + DoNotOptimize(v.data()); +} + +#undef BM +#endif // _POSIX_TIMERS +#endif // __unix__