Import the source code of mimalloc v2.2.7

Update to newer mimalloc versions like this:

  update_mimalloc ()
  {
      test $# = 1 || {
          echo "Need a mimalloc version" 1>&2;
          return 1
      };
      for oneline in 'mimalloc: adjust for building inside Git' 'Import the source code of mimalloc';
      do
          git revert -n HEAD^{/^"$oneline"} && git checkout HEAD -- Makefile && git commit -sm "Temporarily revert \"$oneline\"" -m 'In preparation for upgrading to a newer mimalloc version.' || return 1;
      done;
      for file in $(git show --format='%n' --name-only --diff-filter=A HEAD^{/^"Import the source code of mimalloc "}) compat/mimalloc/arena-abandon.c compat/mimalloc/free.c compat/mimalloc/libc.c compat/mimalloc/prim/prim.c compat/mimalloc/mimalloc-stats.h;
      do
          file2=${file#compat/mimalloc/};
          case "$file2" in
              segment-cache.c)
                  : no longer needed;
                  continue
              ;;
              bitmap.h | *.c)
                  file2=src/$file2
              ;;
              *.h)
                  file2=include/$file2
              ;;
          esac;
          mkdir -p "${file%/*}" && git -C /usr/src/mimalloc/ show "$1":$file2 > "$file" && git add "$file" || {
              echo "Failed: $file2 -> $file" 1>&2;
              return 1
          };
      done;
      conv_sed='sed -n "/^ *eval/d;/      /p"' && git commit -sm "Import the source code of mimalloc $1" -m "Update to newer mimalloc versions like this:" -m "$(set | sed -n '/^update_mimalloc *() *$/,/^}/{s/^./  &/;p}')" -m '  update_mimalloc $MIMALLOC_VERSION' -m 'For convenience, you can set `MIMALLOC_VERSION` and then run:' -m '  eval "$(git show -s <this-commit> | '"$conv_sed"')"' || return 1;
      git cherry-pick HEAD^{/^'mimalloc: adjust for building inside Git'} || return 1
  }

  update_mimalloc $MIMALLOC_VERSION

For convenience, you can set `MIMALLOC_VERSION` and then run:

  eval "$(git show -s <this-commit> | sed -n "/^ *eval/d;/      /p")"

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2026-01-16 15:47:57 +01:00
parent cff64d03c8
commit 2add928bae
30 changed files with 16691 additions and 0 deletions

21
compat/mimalloc/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018-2025 Microsoft Corporation, Daan Leijen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,371 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h" // mi_prim_get_default_heap
#include <string.h> // memset
// ------------------------------------------------------
// Aligned Allocation
// ------------------------------------------------------
static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
// objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
if (alignment > size) return false;
if (alignment <= MI_MAX_ALIGN_SIZE) return true;
const size_t bsize = mi_good_size(size);
return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
}
#if MI_GUARDED
static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, size_t size, size_t alignment, bool zero) mi_attr_noexcept {
// use over allocation for guarded blocksl
mi_assert_internal(alignment > 0 && alignment < MI_BLOCK_ALIGNMENT_MAX);
const size_t oversize = size + alignment - 1;
void* base = _mi_heap_malloc_guarded(heap, oversize, zero);
void* p = mi_align_up_ptr(base, alignment);
mi_track_align(base, p, (uint8_t*)p - (uint8_t*)base, size);
mi_assert_internal(mi_usable_size(p) >= size);
mi_assert_internal(_mi_is_aligned(p, alignment));
return p;
}
static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero, size_t* usable) {
const size_t rate = heap->guarded_sample_rate;
// only write if `rate!=0` so we don't write to the constant `_mi_heap_empty`
if (rate != 0) { heap->guarded_sample_rate = 0; }
void* p = _mi_heap_malloc_zero_ex(heap, size, zero, 0, usable);
if (rate != 0) { heap->guarded_sample_rate = rate; }
return p;
}
#else
static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero, size_t* usable) {
return _mi_heap_malloc_zero_ex(heap, size, zero, 0, usable);
}
#endif
// Fallback aligned allocation that over-allocates -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero, size_t* usable) mi_attr_noexcept
{
mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE));
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
void* p;
size_t oversize;
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
// use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
// This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
// first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
if mi_unlikely(offset != 0) {
// todo: cannot support offset alignment for very large alignments yet
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
#endif
return NULL;
}
oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
// note: no guarded as alignment > 0
p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment, usable); // the page block size should be large enough to align in the single huge page block
// zero afterwards as only the area from the aligned_p may be committed!
if (p == NULL) return NULL;
}
else {
// otherwise over-allocate
oversize = (size < MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : size) + alignment - 1; // adjust for size <= 16; with size 0 and aligment 64k, we would allocate a 64k block and pointing just beyond that.
p = mi_heap_malloc_zero_no_guarded(heap, oversize, zero, usable);
if (p == NULL) return NULL;
}
mi_page_t* page = _mi_ptr_page(p);
// .. and align within the allocation
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
mi_assert_internal(adjust < alignment);
void* aligned_p = (void*)((uintptr_t)p + adjust);
if (aligned_p != p) {
mi_page_set_has_aligned(page, true);
#if MI_GUARDED
// set tag to aligned so mi_usable_size works with guard pages
if (adjust >= sizeof(mi_block_t)) {
mi_block_t* const block = (mi_block_t*)p;
block->next = MI_BLOCK_TAG_ALIGNED;
}
#endif
_mi_padding_shrink(page, (mi_block_t*)p, adjust + size);
}
// todo: expand padding if overallocated ?
mi_assert_internal(mi_page_usable_block_size(page) >= adjust + size);
mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
mi_assert_internal(mi_usable_size(aligned_p)>=size);
mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
#if MI_DEBUG > 1
mi_page_t* const apage = _mi_ptr_page(aligned_p);
void* unalign_p = _mi_page_ptr_unalign(apage, aligned_p);
mi_assert_internal(p == unalign_p);
#endif
// now zero the block if needed
if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
// for the tracker, on huge aligned allocations only the memory from the start of the large block is defined
mi_track_mem_undefined(aligned_p, size);
if (zero) {
_mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
}
}
if (p != aligned_p) {
mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
#if MI_GUARDED
mi_track_mem_defined(p, sizeof(mi_block_t));
#endif
}
return aligned_p;
}
// Generic primitive aligned allocation -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero, size_t* usable) mi_attr_noexcept
{
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
// we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) {
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
// use regular allocation if it is guaranteed to fit the alignment constraints.
// this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist
// a page with the right block size, and if we always use the over-alloc fallback that would never happen.
if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) {
void* p = mi_heap_malloc_zero_no_guarded(heap, size, zero, usable);
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;
if mi_likely(is_aligned_or_null) {
return p;
}
else {
// this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
mi_assert(false);
mi_free(p);
}
}
// fall back to over-allocation
return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero,usable);
}
// Primitive aligned allocation
static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size,
const size_t alignment, const size_t offset, const bool zero,
size_t* usable) mi_attr_noexcept
{
// note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
#if MI_GUARDED
if (offset==0 && alignment < MI_BLOCK_ALIGNMENT_MAX && mi_heap_malloc_use_guarded(heap,size)) {
return mi_heap_malloc_guarded_aligned(heap, size, alignment, zero);
}
#endif
// try first if there happens to be a small block available with just the right alignment
if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
if mi_likely(page->free != NULL) {
const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
if mi_likely(is_aligned)
{
if (usable!=NULL) { *usable = mi_page_usable_block_size(page); }
void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
mi_track_malloc(p,size,zero);
return p;
}
}
}
// fallback to generic aligned allocation
return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero, usable);
}
// ------------------------------------------------------
// Optimized mi_heap_malloc_aligned / mi_malloc_aligned
// ------------------------------------------------------
mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false, NULL);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
}
// ensure a definition is emitted
#if defined(__cplusplus)
void* _mi_extern_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned;
#endif
// ------------------------------------------------------
// Aligned Allocation
// ------------------------------------------------------
mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true, NULL);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_zalloc_aligned_at(heap, size, alignment, 0);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count, size, &total)) return NULL;
return mi_heap_zalloc_aligned_at(heap, total, alignment, offset);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_calloc_aligned_at(heap,count,size,alignment,0);
}
mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset);
}
mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment);
}
mi_decl_nodiscard mi_decl_restrict void* mi_umalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(mi_prim_get_default_heap(), size, alignment, 0, false, block_size);
}
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset);
}
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment);
}
mi_decl_nodiscard mi_decl_restrict void* mi_uzalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(mi_prim_get_default_heap(), size, alignment, 0, true, block_size);
}
mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset);
}
mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment);
}
// ------------------------------------------------------
// Aligned re-allocation
// ------------------------------------------------------
static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
mi_assert(alignment > 0);
if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero,NULL,NULL);
if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero,NULL);
size_t size = mi_usable_size(p);
if (newsize <= size && newsize >= (size - (size / 2))
&& (((uintptr_t)p + offset) % alignment) == 0) {
return p; // reallocation still fits, is aligned and not more than 50% waste
}
else {
// note: we don't zero allocate upfront so we only zero initialize the expanded part
void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset);
if (newp != NULL) {
if (zero && newsize > size) {
// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
_mi_memzero((uint8_t*)newp + start, newsize - start);
}
_mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
mi_free(p); // only free if successful
}
return newp;
}
}
static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept {
mi_assert(alignment > 0);
if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero,NULL,NULL);
size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL)
return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero);
}
mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false);
}
mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false);
}
mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true);
}
mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true);
}
mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(newcount, size, &total)) return NULL;
return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset);
}
mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(newcount, size, &total)) return NULL;
return mi_heap_rezalloc_aligned(heap, p, total, alignment);
}
mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset);
}
mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment);
}
mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset);
}
mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment);
}
mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset);
}
mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment);
}

734
compat/mimalloc/alloc.c Normal file
View File

@ -0,0 +1,734 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE // for realpath() on Linux
#endif
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // _mi_prim_thread_id()
#include <string.h> // memset, strlen (for mi_strdup)
#include <stdlib.h> // malloc, abort
#define MI_IN_ALLOC_C
#include "alloc-override.c"
#include "free.c"
#undef MI_IN_ALLOC_C
// ------------------------------------------------------
// Allocation
// ------------------------------------------------------
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero, size_t* usable) mi_attr_noexcept
{
mi_assert_internal(size >= MI_PADDING_SIZE);
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
// check the free list
mi_block_t* const block = page->free;
if mi_unlikely(block == NULL) {
return _mi_malloc_generic(heap, size, zero, 0, usable);
}
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
if (usable != NULL) { *usable = mi_page_usable_block_size(page); };
// pop from the free list
page->free = mi_block_next(page, block);
page->used++;
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
mi_assert_internal(page->block_size < MI_MAX_ALIGN_SIZE || _mi_is_aligned(block, MI_MAX_ALIGN_SIZE));
#if MI_DEBUG>3
if (page->free_is_zero && size > sizeof(*block)) {
mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block)));
}
#endif
// allow use of the block internally
// note: when tracking we need to avoid ever touching the MI_PADDING since
// that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`)
mi_track_mem_undefined(block, mi_page_usable_block_size(page));
// zero the block? note: we need to zero the full block size (issue #63)
if mi_unlikely(zero) {
mi_assert_internal(page->block_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
#if MI_PADDING
mi_assert_internal(page->block_size >= MI_PADDING_SIZE);
#endif
if (page->free_is_zero) {
block->next = 0;
mi_track_mem_defined(block, page->block_size - MI_PADDING_SIZE);
}
else {
_mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE);
}
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
if (!zero && !mi_page_is_huge(page)) {
memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
}
#elif (MI_SECURE!=0)
if (!zero) { block->next = 0; } // don't leak internal data
#endif
#if (MI_STAT>0)
const size_t bsize = mi_page_usable_block_size(page);
if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
mi_heap_stat_increase(heap, malloc_normal, bsize);
mi_heap_stat_counter_increase(heap, malloc_normal_count, 1);
#if (MI_STAT>1)
const size_t bin = _mi_bin(bsize);
mi_heap_stat_increase(heap, malloc_bins[bin], 1);
mi_heap_stat_increase(heap, malloc_requested, size - MI_PADDING_SIZE);
#endif
}
#endif
#if MI_PADDING // && !MI_TRACK_ENABLED
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
#if (MI_DEBUG>=2)
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
#endif
mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess
padding->canary = mi_ptr_encode_canary(page,block,page->keys);
padding->delta = (uint32_t)(delta);
#if MI_PADDING_CHECK
if (!mi_page_is_huge(page)) {
uint8_t* fill = (uint8_t*)padding - delta;
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
}
#endif
#endif
return block;
}
// extra entries for improved efficiency in `alloc-aligned.c`.
extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
return _mi_page_malloc_zero(heap,page,size,false,NULL);
}
extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
return _mi_page_malloc_zero(heap,page,size,true,NULL);
}
#if MI_GUARDED
mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
#endif
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero, size_t* usable) mi_attr_noexcept {
mi_assert(heap != NULL);
mi_assert(size <= MI_SMALL_SIZE_MAX);
#if MI_DEBUG
const uintptr_t tid = _mi_thread_id();
mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
#endif
#if (MI_PADDING || MI_GUARDED)
if (size == 0) { size = sizeof(void*); }
#endif
#if MI_GUARDED
if (mi_heap_malloc_use_guarded(heap,size)) {
return _mi_heap_malloc_guarded(heap, size, zero);
}
#endif
// get page in constant time, and allocate from it
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero, usable);
mi_track_malloc(p,size,zero);
#if MI_DEBUG>3
if (p != NULL && zero) {
mi_assert_expensive(mi_mem_is_zero(p, size));
}
#endif
return p;
}
// allocate a small block
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return mi_heap_malloc_small_zero(heap, size, false, NULL);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_prim_get_default_heap(), size);
}
// The main allocation function
extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment, size_t* usable) mi_attr_noexcept {
// fast path for small objects
if mi_likely(size <= MI_SMALL_SIZE_MAX) {
mi_assert_internal(huge_alignment == 0);
return mi_heap_malloc_small_zero(heap, size, zero, usable);
}
#if MI_GUARDED
else if (huge_alignment==0 && mi_heap_malloc_use_guarded(heap,size)) {
return _mi_heap_malloc_guarded(heap, size, zero);
}
#endif
else {
// regular allocation
mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment, usable); // note: size can overflow but it is detected in malloc_generic
mi_track_malloc(p,size,zero);
#if MI_DEBUG>3
if (p != NULL && zero) {
mi_assert_expensive(mi_mem_is_zero(p, size));
}
#endif
return p;
}
}
extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
return _mi_heap_malloc_zero_ex(heap, size, zero, 0, NULL);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return _mi_heap_malloc_zero(heap, size, false);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
return mi_heap_malloc(mi_prim_get_default_heap(), size);
}
// zero initialized small block
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, true, NULL);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return _mi_heap_malloc_zero(heap, size, true);
}
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept {
return mi_heap_zalloc(mi_prim_get_default_heap(),size);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count,size,&total)) return NULL;
return mi_heap_zalloc(heap,total);
}
mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_calloc(mi_prim_get_default_heap(),count,size);
}
// Return usable size
mi_decl_nodiscard mi_decl_restrict void* mi_umalloc_small(size_t size, size_t* usable) mi_attr_noexcept {
return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, false, usable);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_umalloc(mi_heap_t* heap, size_t size, size_t* usable) mi_attr_noexcept {
return _mi_heap_malloc_zero_ex(heap, size, false, 0, usable);
}
mi_decl_nodiscard mi_decl_restrict void* mi_umalloc(size_t size, size_t* usable) mi_attr_noexcept {
return mi_heap_umalloc(mi_prim_get_default_heap(), size, usable);
}
mi_decl_nodiscard mi_decl_restrict void* mi_uzalloc(size_t size, size_t* usable) mi_attr_noexcept {
return _mi_heap_malloc_zero_ex(mi_prim_get_default_heap(), size, true, 0, usable);
}
mi_decl_nodiscard mi_decl_restrict void* mi_ucalloc(size_t count, size_t size, size_t* usable) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count,size,&total)) return NULL;
return mi_uzalloc(total, usable);
}
// Uninitialized `calloc`
mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count, size, &total)) return NULL;
return mi_heap_malloc(heap, total);
}
mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_mallocn(mi_prim_get_default_heap(),count,size);
}
// Expand (or shrink) in place (or fail)
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
#if MI_PADDING
// we do not shrink/expand with padding enabled
MI_UNUSED(p); MI_UNUSED(newsize);
return NULL;
#else
if (p == NULL) return NULL;
const mi_page_t* const page = mi_validate_ptr_page(p,"mi_expand");
const size_t size = _mi_usable_size(p,page);
if (newsize > size) return NULL;
return p; // it fits
#endif
}
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero, size_t* usable_pre, size_t* usable_post) mi_attr_noexcept {
// if p == NULL then behave as malloc.
// else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)).
// (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.)
const mi_page_t* page;
size_t size;
if (p==NULL) {
page = NULL;
size = 0;
if (usable_pre!=NULL) { *usable_pre = 0; }
}
else {
page = mi_validate_ptr_page(p,"mi_realloc");
size = _mi_usable_size(p,page);
if (usable_pre!=NULL) { *usable_pre = mi_page_usable_block_size(page); }
}
if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
mi_assert_internal(p!=NULL);
// todo: do not track as the usable size is still the same in the free; adjust potential padding?
// mi_track_resize(p,size,newsize)
// if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); }
if (usable_post!=NULL) { *usable_post = mi_page_usable_block_size(page); }
return p; // reallocation still fits and not more than 50% waste
}
void* newp = mi_heap_umalloc(heap,newsize,usable_post);
if mi_likely(newp != NULL) {
if (zero && newsize > size) {
// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
_mi_memzero((uint8_t*)newp + start, newsize - start);
}
else if (newsize == 0) {
((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725)
}
if mi_likely(p != NULL) {
const size_t copysize = (newsize > size ? size : newsize);
mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking..
_mi_memcpy(newp, p, copysize);
mi_free(p); // only free the original pointer if successful
}
}
return newp;
}
mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
return _mi_heap_realloc_zero(heap, p, newsize, false, NULL, NULL);
}
mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count, size, &total)) return NULL;
return mi_heap_realloc(heap, p, total);
}
// Reallocate but free `p` on errors
mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
void* newp = mi_heap_realloc(heap, p, newsize);
if (newp==NULL && p!=NULL) mi_free(p);
return newp;
}
mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
return _mi_heap_realloc_zero(heap, p, newsize, true, NULL, NULL);
}
mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count, size, &total)) return NULL;
return mi_heap_rezalloc(heap, p, total);
}
mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
return mi_heap_realloc(mi_prim_get_default_heap(),p,newsize);
}
mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
return mi_heap_reallocn(mi_prim_get_default_heap(),p,count,size);
}
mi_decl_nodiscard void* mi_urealloc(void* p, size_t newsize, size_t* usable_pre, size_t* usable_post) mi_attr_noexcept {
return _mi_heap_realloc_zero(mi_prim_get_default_heap(),p,newsize, false, usable_pre, usable_post);
}
// Reallocate but free `p` on errors
mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
return mi_heap_reallocf(mi_prim_get_default_heap(),p,newsize);
}
mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
return mi_heap_rezalloc(mi_prim_get_default_heap(), p, newsize);
}
mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
return mi_heap_recalloc(mi_prim_get_default_heap(), p, count, size);
}
// ------------------------------------------------------
// strdup, strndup, and realpath
// ------------------------------------------------------
// `strdup` using mi_malloc
mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
if (s == NULL) return NULL;
size_t len = _mi_strlen(s);
char* t = (char*)mi_heap_malloc(heap,len+1);
if (t == NULL) return NULL;
_mi_memcpy(t, s, len);
t[len] = 0;
return t;
}
mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept {
return mi_heap_strdup(mi_prim_get_default_heap(), s);
}
// `strndup` using mi_malloc
mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
if (s == NULL) return NULL;
const size_t len = _mi_strnlen(s,n); // len <= n
char* t = (char*)mi_heap_malloc(heap, len+1);
if (t == NULL) return NULL;
_mi_memcpy(t, s, len);
t[len] = 0;
return t;
}
mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
return mi_heap_strndup(mi_prim_get_default_heap(),s,n);
}
#ifndef __wasi__
// `realpath` using mi_malloc
#ifdef _WIN32
#ifndef PATH_MAX
#define PATH_MAX MAX_PATH
#endif
mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
// todo: use GetFullPathNameW to allow longer file names
char buf[PATH_MAX];
DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL);
if (res == 0) {
errno = GetLastError(); return NULL;
}
else if (res > PATH_MAX) {
errno = EINVAL; return NULL;
}
else if (resolved_name != NULL) {
return resolved_name;
}
else {
return mi_heap_strndup(heap, buf, PATH_MAX);
}
}
#else
/*
#include <unistd.h> // pathconf
static size_t mi_path_max(void) {
static size_t path_max = 0;
if (path_max <= 0) {
long m = pathconf("/",_PC_PATH_MAX);
if (m <= 0) path_max = 4096; // guess
else if (m < 256) path_max = 256; // at least 256
else path_max = m;
}
return path_max;
}
*/
char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
if (resolved_name != NULL) {
return realpath(fname,resolved_name);
}
else {
char* rname = realpath(fname, NULL);
if (rname == NULL) return NULL;
char* result = mi_heap_strdup(heap, rname);
mi_cfree(rname); // use checked free (which may be redirected to our free but that's ok)
// note: with ASAN realpath is intercepted and mi_cfree may leak the returned pointer :-(
return result;
}
/*
const size_t n = mi_path_max();
char* buf = (char*)mi_malloc(n+1);
if (buf == NULL) {
errno = ENOMEM;
return NULL;
}
char* rname = realpath(fname,buf);
char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL`
mi_free(buf);
return result;
}
*/
}
#endif
mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept {
return mi_heap_realpath(mi_prim_get_default_heap(),fname,resolved_name);
}
#endif
/*-------------------------------------------------------
C++ new and new_aligned
The standard requires calling into `get_new_handler` and
throwing the bad_alloc exception on failure. If we compile
with a C++ compiler we can implement this precisely. If we
use a C compiler we cannot throw a `bad_alloc` exception
but we call `exit` instead (i.e. not returning).
-------------------------------------------------------*/
#ifdef __cplusplus
#include <new>
static bool mi_try_new_handler(bool nothrow) {
#if defined(_MSC_VER) || (__cplusplus >= 201103L)
std::new_handler h = std::get_new_handler();
#else
std::new_handler h = std::set_new_handler();
std::set_new_handler(h);
#endif
if (h==NULL) {
_mi_error_message(ENOMEM, "out of memory in 'new'");
#if defined(_CPPUNWIND) || defined(__cpp_exceptions) // exceptions are not always enabled
if (!nothrow) {
throw std::bad_alloc();
}
#else
MI_UNUSED(nothrow);
#endif
return false;
}
else {
h();
return true;
}
}
#else
typedef void (*std_new_handler_t)(void);
#if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER))) // exclude clang-cl, see issue #631
std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) {
return NULL;
}
static std_new_handler_t mi_get_new_handler(void) {
return _ZSt15get_new_handlerv();
}
#else
// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`.
static std_new_handler_t mi_get_new_handler(void) {
return NULL;
}
#endif
static bool mi_try_new_handler(bool nothrow) {
std_new_handler_t h = mi_get_new_handler();
if (h==NULL) {
_mi_error_message(ENOMEM, "out of memory in 'new'");
if (!nothrow) {
abort(); // cannot throw in plain C, use abort
}
return false;
}
else {
h();
return true;
}
}
#endif
mi_decl_export mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) {
void* p = NULL;
while(p == NULL && mi_try_new_handler(nothrow)) {
p = mi_heap_malloc(heap,size);
}
return p;
}
static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
return mi_heap_try_new(mi_prim_get_default_heap(), size, nothrow);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
void* p = mi_heap_malloc(heap,size);
if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
return p;
}
mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
return mi_heap_alloc_new(mi_prim_get_default_heap(), size);
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
size_t total;
if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
return NULL;
}
else {
return mi_heap_alloc_new(heap,total);
}
}
mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
return mi_heap_alloc_new_n(mi_prim_get_default_heap(), count, size);
}
mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
void* p = mi_malloc(size);
if mi_unlikely(p == NULL) return mi_try_new(size, true);
return p;
}
mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) {
void* p;
do {
p = mi_malloc_aligned(size, alignment);
}
while(p == NULL && mi_try_new_handler(false));
return p;
}
mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept {
void* p;
do {
p = mi_malloc_aligned(size, alignment);
}
while(p == NULL && mi_try_new_handler(true));
return p;
}
mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) {
void* q;
do {
q = mi_realloc(p, newsize);
} while (q == NULL && mi_try_new_handler(false));
return q;
}
mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
size_t total;
if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) {
mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
return NULL;
}
else {
return mi_new_realloc(p, total);
}
}
#if MI_GUARDED
// We always allocate a guarded allocation at an offset (`mi_page_has_aligned` will be true).
// We then set the first word of the block to `0` for regular offset aligned allocations (in `alloc-aligned.c`)
// and the first word to `~0` for guarded allocations to have a correct `mi_usable_size`
static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
// TODO: we can still make padding work by moving it out of the guard page area
mi_page_t* const page = _mi_ptr_page(block);
mi_page_set_has_aligned(page, true);
block->next = MI_BLOCK_TAG_GUARDED;
// set guard page at the end of the block
mi_segment_t* const segment = _mi_page_segment(page);
const size_t block_size = mi_page_block_size(page); // must use `block_size` to match `mi_free_local`
const size_t os_page_size = _mi_os_page_size();
mi_assert_internal(block_size >= obj_size + os_page_size + sizeof(mi_block_t));
if (block_size < obj_size + os_page_size + sizeof(mi_block_t)) {
// should never happen
mi_free(block);
return NULL;
}
uint8_t* guard_page = (uint8_t*)block + block_size - os_page_size;
mi_assert_internal(_mi_is_aligned(guard_page, os_page_size));
if mi_likely(segment->allow_decommit && _mi_is_aligned(guard_page, os_page_size)) {
const bool ok = _mi_os_protect(guard_page, os_page_size);
if mi_unlikely(!ok) {
_mi_warning_message("failed to set a guard page behind an object (object %p of size %zu)\n", block, block_size);
}
}
else {
_mi_warning_message("unable to set a guard page behind an object due to pinned memory (large OS pages?) (object %p of size %zu)\n", block, block_size);
}
// align pointer just in front of the guard page
size_t offset = block_size - os_page_size - obj_size;
mi_assert_internal(offset > sizeof(mi_block_t));
if (offset > MI_BLOCK_ALIGNMENT_MAX) {
// give up to place it right in front of the guard page if the offset is too large for unalignment
offset = MI_BLOCK_ALIGNMENT_MAX;
}
void* p = (uint8_t*)block + offset;
mi_track_align(block, p, offset, obj_size);
mi_track_mem_defined(block, sizeof(mi_block_t));
return p;
}
mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
{
#if defined(MI_PADDING_SIZE)
mi_assert(MI_PADDING_SIZE==0);
#endif
// allocate multiple of page size ending in a guard page
// ensure minimal alignment requirement?
const size_t os_page_size = _mi_os_page_size();
const size_t obj_size = (mi_option_is_enabled(mi_option_guarded_precise) ? size : _mi_align_up(size, MI_MAX_ALIGN_SIZE));
const size_t bsize = _mi_align_up(_mi_align_up(obj_size, MI_MAX_ALIGN_SIZE) + sizeof(mi_block_t), MI_MAX_ALIGN_SIZE);
const size_t req_size = _mi_align_up(bsize + os_page_size, os_page_size);
mi_block_t* const block = (mi_block_t*)_mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */, NULL);
if (block==NULL) return NULL;
void* const p = mi_block_ptr_set_guarded(block, obj_size);
// stats
mi_track_malloc(p, size, zero);
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
#if MI_STAT>1
mi_heap_stat_adjust_decrease(heap, malloc_requested, req_size);
mi_heap_stat_increase(heap, malloc_requested, size);
#endif
_mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1);
}
#if MI_DEBUG>3
if (p != NULL && zero) {
mi_assert_expensive(mi_mem_is_zero(p, size));
}
#endif
return p;
}
#endif
// ------------------------------------------------------
// ensure explicit external inline definitions are emitted!
// ------------------------------------------------------
#ifdef __cplusplus
void* _mi_externs[] = {
(void*)&_mi_page_malloc,
(void*)&_mi_page_malloc_zero,
(void*)&_mi_heap_malloc_zero,
(void*)&_mi_heap_malloc_zero_ex,
(void*)&mi_malloc,
(void*)&mi_malloc_small,
(void*)&mi_zalloc_small,
(void*)&mi_heap_malloc,
(void*)&mi_heap_zalloc,
(void*)&mi_heap_malloc_small,
// (void*)&mi_heap_alloc_new,
// (void*)&mi_heap_alloc_new_n
};
#endif

View File

@ -0,0 +1,346 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#if !defined(MI_IN_ARENA_C)
#error "this file should be included from 'arena.c' (so mi_arena_t is visible)"
// add includes help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
#endif
// Minimal exports for arena-abandoned.
size_t mi_arena_id_index(mi_arena_id_t id);
mi_arena_t* mi_arena_from_index(size_t idx);
size_t mi_arena_get_count(void);
void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex);
bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index);
/* -----------------------------------------------------------
Abandoned blocks/segments:
_mi_arena_segment_clear_abandoned
_mi_arena_segment_mark_abandoned
This is used to atomically abandon/reclaim segments
(and crosses the arena API but it is convenient to have here).
Abandoned segments still have live blocks; they get reclaimed
when a thread frees a block in it, or when a thread needs a fresh
segment.
Abandoned segments are atomically marked in the `block_abandoned`
bitmap of arenas. Any segments allocated outside arenas are put
in the sub-process `abandoned_os_list`. This list is accessed
using locks but this should be uncommon and generally uncontended.
Reclaim and visiting either scan through the `block_abandoned`
bitmaps of the arena's, or visit the `abandoned_os_list`
A potentially nicer design is to use arena's for everything
and perhaps have virtual arena's to map OS allocated memory
but this would lack the "density" of our current arena's. TBC.
----------------------------------------------------------- */
// reclaim a specific OS abandoned segment; `true` on success.
// sets the thread_id.
static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena, remove from list of abandoned os segments
mi_subproc_t* const subproc = segment->subproc;
if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) {
return false; // failed to acquire the lock, we just give up
}
// remove atomically from the abandoned os list (if possible!)
bool reclaimed = false;
mi_segment_t* const next = segment->abandoned_os_next;
mi_segment_t* const prev = segment->abandoned_os_prev;
if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) {
#if MI_DEBUG>3
// find ourselves in the abandoned list (and check the count)
bool found = false;
size_t count = 0;
for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) {
if (current == segment) { found = true; }
count++;
}
mi_assert_internal(found);
mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count));
#endif
// remove (atomically) from the list and reclaim
if (prev != NULL) { prev->abandoned_os_next = next; }
else { subproc->abandoned_os_list = next; }
if (next != NULL) { next->abandoned_os_prev = prev; }
else { subproc->abandoned_os_list_tail = prev; }
segment->abandoned_os_next = NULL;
segment->abandoned_os_prev = NULL;
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count);
if (take_lock) { // don't reset the thread_id when iterating
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
reclaimed = true;
}
if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); }
return reclaimed;
}
// reclaim a specific abandoned segment; `true` on success.
// sets the thread_id.
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) {
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */);
}
// arena segment: use the blocks_abandoned bitmap.
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
mi_assert_internal(arena != NULL);
// reclaim atomically
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
if (was_marked) {
mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0);
mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
// mi_assert_internal(was_marked);
mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return was_marked;
}
// mark a specific OS segment as abandoned
static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena; we use a list of abandoned segments
mi_subproc_t* const subproc = segment->subproc;
mi_lock(&subproc->abandoned_os_lock) {
// push on the tail of the list (important for the visitor)
mi_segment_t* prev = subproc->abandoned_os_list_tail;
mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL);
mi_assert_internal(segment->abandoned_os_prev == NULL);
mi_assert_internal(segment->abandoned_os_next == NULL);
if (prev != NULL) { prev->abandoned_os_next = segment; }
else { subproc->abandoned_os_list = segment; }
subproc->abandoned_os_list_tail = segment;
segment->abandoned_os_prev = prev;
segment->abandoned_os_next = NULL;
mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count);
mi_atomic_increment_relaxed(&subproc->abandoned_count);
// and release the lock
}
return;
}
// mark a specific segment as abandoned
// clears the thread_id.
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
{
mi_assert_internal(segment->used == segment->abandoned);
mi_atomic_store_release(&segment->thread_id, (uintptr_t)0); // mark as abandoned for multi-thread free's
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
mi_arena_segment_os_mark_abandoned(segment);
return;
}
// segment is in an arena, mark it in the arena `blocks_abandoned` bitmap
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
mi_assert_internal(arena != NULL);
// set abandonment atomically
mi_subproc_t* const subproc = segment->subproc; // don't access the segment after setting it abandoned
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
if (was_unmarked) { mi_atomic_increment_relaxed(&subproc->abandoned_count); }
mi_assert_internal(was_unmarked);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
}
/* -----------------------------------------------------------
Iterate through the abandoned blocks/segments using a cursor.
This is used for reclaiming and abandoned block visiting.
----------------------------------------------------------- */
// start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) {
mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc);
current->bitmap_idx = 0;
current->subproc = subproc;
current->visit_all = visit_all;
current->hold_visit_lock = false;
const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count);
const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count);
const size_t max_arena = mi_arena_get_count();
if (heap != NULL && heap->arena_id != _mi_arena_id_none()) {
// for a heap that is bound to one arena, only visit that arena
current->start = mi_arena_id_index(heap->arena_id);
current->end = current->start + 1;
current->os_list_count = 0;
}
else {
// otherwise visit all starting at a random location
if (abandoned_count > abandoned_list_count && max_arena > 0) {
current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
current->end = current->start + max_arena;
}
else {
current->start = 0;
current->end = 0;
}
current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list
}
mi_assert_internal(current->start <= max_arena);
}
void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) {
if (current->hold_visit_lock) {
mi_lock_release(&current->subproc->abandoned_os_visit_lock);
current->hold_visit_lock = false;
}
}
static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) {
// try to reclaim an abandoned segment in the arena atomically
if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
// check that the segment belongs to our sub-process
// note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled.
// without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process.
// for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock.
if (segment->subproc != subproc) {
// it is from another sub-process, re-mark it and continue searching
const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
mi_assert_internal(was_zero); MI_UNUSED(was_zero);
return NULL;
}
else {
// success, we unabandoned a segment in our sub-process
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
return segment;
}
}
static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) {
const size_t max_arena = mi_arena_get_count();
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx);
// visit arena's (from the previous cursor)
for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) {
// index wraps around
size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
if (arena != NULL) {
bool has_lock = false;
// visit the abandoned fields (starting at previous_idx)
for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// we only take the arena lock if there are actually abandoned segments present
if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
if (!has_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock");
}
// skip to next arena
break;
}
}
mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned));
// visit each set bit in the field (todo: maybe use `ctz` here?)
for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, bitmap_idx);
if (segment != NULL) {
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
previous->bitmap_idx = mi_bitmap_index_create_ex(field_idx, bit_idx + 1); // start at next one for the next iteration
return segment;
}
}
}
}
}
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
}
}
return NULL;
}
static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) {
// go through the abandoned_os_list
// we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`.
// The lock is released when the cursor is released.
if (!previous->hold_visit_lock) {
previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true)
: mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock));
if (!previous->hold_visit_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock");
}
return NULL; // we cannot get the lock, give up
}
}
// One list entry at a time
while (previous->os_list_count > 0) {
previous->os_list_count--;
mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free`
mi_segment_t* segment = previous->subproc->abandoned_os_list;
// pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries)
if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) {
mi_lock_release(&previous->subproc->abandoned_os_lock);
return segment;
}
// already abandoned, try again
mi_lock_release(&previous->subproc->abandoned_os_lock);
}
// done
mi_assert_internal(previous->os_list_count == 0);
return NULL;
}
// reclaim abandoned segments
// this does not set the thread id (so it appears as still abandoned)
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) {
if (previous->start < previous->end) {
// walk the arena
mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous);
if (segment != NULL) { return segment; }
}
// no entries in the arena's anymore, walk the abandoned OS list
mi_assert_internal(previous->start == previous->end);
return mi_arena_segment_clear_abandoned_next_list(previous);
}
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
// (unfortunately) the visit_abandoned option must be enabled from the start.
// This is to avoid taking locks if abandoned list visiting is not required (as for most programs)
if (!mi_option_is_enabled(mi_option_visit_abandoned)) {
_mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON");
return false;
}
mi_arena_field_cursor_t current;
_mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, &current);
mi_segment_t* segment;
bool ok = true;
while (ok && (segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg);
_mi_arena_segment_mark_abandoned(segment);
}
_mi_arena_field_cursor_done(&current);
return ok;
}

1045
compat/mimalloc/arena.c Normal file

File diff suppressed because it is too large Load Diff

441
compat/mimalloc/bitmap.c Normal file
View File

@ -0,0 +1,441 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
---------------------------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
// The bit mask for a given number of blocks at a specified bit index.
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
mi_assert_internal(count > 0);
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
if (count == 0) return 0;
return ((((size_t)1 << count) - 1) << bitidx);
}
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
mi_assert_internal(count > 0);
mi_bitmap_field_t* field = &bitmap[idx];
size_t map = mi_atomic_load_relaxed(field);
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
// search for 0-bit sequence of length count
const size_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#ifdef MI_HAVE_FAST_BITSCAN
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
const size_t mapm = (map & m);
if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const size_t newmap = (map | m);
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
// no success, another thread claimed concurrently.. keep going (with updated `map`)
continue;
}
else {
// success, we claimed the bits!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
return true;
}
}
else {
// on to the next bit range
#ifdef MI_HAVE_FAST_BITSCAN
mi_assert_internal(mapm != 0);
const size_t shift = (count == 1 ? 1 : (MI_SIZE_BITS - mi_clz(mapm) - bitidx));
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no bits found
return false;
}
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
return false;
}
// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields,
const size_t start_field_idx, const size_t count,
mi_bitmap_pred_fun_t pred_fun, void* pred_arg,
mi_bitmap_index_t* bitmap_idx) {
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) idx = 0; // wrap
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) {
return true;
}
// predicate returned false, unclaim and look further
_mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx);
}
}
return false;
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == mask);
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
return ((prev & mask) == 0);
}
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
return ((field & mask) == mask);
}
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
do {
if ((expected & mask) != 0) return false;
}
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
mi_assert_internal((expected & mask) == 0);
return true;
}
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
}
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
// check initial trailing zeros
mi_bitmap_field_t* field = &bitmap[idx];
size_t map = mi_atomic_load_relaxed(field);
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
// scan ahead
size_t found = initial;
size_t mask = 0; // mask bits for the final field
while(found < count) {
field++;
map = mi_atomic_load_relaxed(field);
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
mask = mi_bitmap_mask_(mask_bits, 0);
if ((map & mask) != 0) return false; // some part is already claimed
found += mask_bits;
}
mi_assert_internal(field < &bitmap[bitmap_fields]);
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
// now try to claim the range atomically
mi_bitmap_field_t* const final_field = field;
const size_t final_mask = mask;
mi_bitmap_field_t* const initial_field = &bitmap[idx];
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
// initial field
size_t newmap;
field = initial_field;
map = mi_atomic_load_relaxed(field);
do {
newmap = (map | initial_mask);
if ((map & initial_mask) != 0) { goto rollback; };
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// intermediate fields
while (++field < final_field) {
newmap = MI_BITMAP_FIELD_FULL;
map = 0;
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
}
// final field
mi_assert_internal(field == final_field);
map = mi_atomic_load_relaxed(field);
do {
newmap = (map | final_mask);
if ((map & final_mask) != 0) { goto rollback; }
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
return true;
rollback:
// roll back intermediate fields
// (we just failed to claim `field` so decrement first)
while (--field > initial_field) {
newmap = 0;
map = MI_BITMAP_FIELD_FULL;
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
mi_atomic_store_release(field, newmap);
}
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
map = mi_atomic_load_relaxed(field);
do {
mi_assert_internal((map & initial_mask) == initial_mask);
newmap = (map & ~initial_mask);
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
}
mi_stat_counter_increase(_mi_stats_main.arena_rollback_count,1);
// retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries <= 2) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
}
else {
return false;
}
}
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
mi_assert_internal(count > 0);
if (count <= 2) {
// we don't bother with crossover fields for small counts
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
}
// visit the fields
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
// first try to claim inside a field
/*
if (count <= MI_BITMAP_FIELD_BITS) {
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
*/
// if that fails, then try to claim across fields
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
return true;
}
}
return false;
}
// Helper for masks across fields; returns the mid count, post_mask may be 0
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
MI_UNUSED(bitmap_fields);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
*pre_mask = mi_bitmap_mask_(count, bitidx);
*mid_mask = 0;
*post_mask = 0;
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
return 0;
}
else {
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
mi_assert_internal(pre_bits < count);
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
count -= pre_bits;
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
*mid_mask = MI_BITMAP_FIELD_FULL;
count %= MI_BITMAP_FIELD_BITS;
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
return mid_count;
}
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_one = true;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
if ((prev & pre_mask) != pre_mask) all_one = false;
while(mid_count-- > 0) {
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
if ((prev & mid_mask) != mid_mask) all_one = false;
}
if (post_mask!=0) {
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
if ((prev & post_mask) != post_mask) all_one = false;
}
return all_one;
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_zero = true;
bool any_zero = false;
size_t one_count = 0;
_Atomic(size_t)*field = &bitmap[idx];
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
if ((prev & pre_mask) != pre_mask) any_zero = true;
while (mid_count-- > 0) {
prev = mi_atomic_or_acq_rel(field++, mid_mask);
if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
if ((prev & mid_mask) != mid_mask) any_zero = true;
}
if (post_mask!=0) {
prev = mi_atomic_or_acq_rel(field, post_mask);
if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
if ((prev & post_mask) != post_mask) any_zero = true;
}
if (pany_zero != NULL) { *pany_zero = any_zero; }
if (already_set != NULL) { *already_set = one_count; };
mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
return all_zero;
}
// Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_ones = true;
bool any_ones = false;
size_t one_count = 0;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_load_relaxed(field++);
if ((prev & pre_mask) != pre_mask) all_ones = false;
if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
while (mid_count-- > 0) {
prev = mi_atomic_load_relaxed(field++);
if ((prev & mid_mask) != mid_mask) all_ones = false;
if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
}
if (post_mask!=0) {
prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
}
if (pany_ones != NULL) { *pany_ones = any_ones; }
if (already_set != NULL) { *already_set = one_count; }
mi_assert_internal(all_ones ? one_count == count : one_count < count);
return all_ones;
}
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
}
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
return any_ones;
}

119
compat/mimalloc/bitmap.h Normal file
View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
// An atomic bitmap of `size_t` fields
typedef _Atomic(size_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return mi_bitmap_index_create_ex(idx,bitidx);
}
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) {
return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS);
}
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg);
bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#endif

588
compat/mimalloc/free.c Normal file
View File

@ -0,0 +1,588 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#if !defined(MI_IN_ALLOC_C)
#error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)"
// add includes help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h" // _mi_prim_thread_id()
#endif
// forward declarations
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block);
static bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block);
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block);
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
// ------------------------------------------------------
// Free
// ------------------------------------------------------
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
// regular free of a (thread local) block pointer
// fast path written carefully to prevent spilling on the stack
static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full)
{
// checks
if mi_unlikely(mi_check_is_double_free(page, block)) return;
mi_check_padding(page, block);
if (track_stats) { mi_stat_free(page, block); }
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN && !MI_GUARDED
if (!mi_page_is_huge(page)) { // huge page content may be already decommitted
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
}
#endif
if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned
// actual free: push on the local free list
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
if mi_unlikely(--page->used == 0) {
_mi_page_retire(page);
}
else if mi_unlikely(check_full && mi_page_is_in_full(page)) {
_mi_page_unfull(page);
}
}
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the
// `page_start` and `block_size` fields; however these are constant and the page won't be
// deallocated (as the block we are freeing keeps it alive) and thus safe to read concurrently.
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - page->page_start;
size_t adjust;
if mi_likely(page->block_size_shift != 0) {
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
}
else {
adjust = diff % mi_page_block_size(page);
}
return (mi_block_t*)((uintptr_t)p - adjust);
}
// forward declaration for a MI_GUARDED build
#if MI_GUARDED
static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p); // forward declaration
static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) {
if (mi_block_ptr_is_guarded(block, p)) { mi_block_unguard(page, block, p); }
}
#else
static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) {
MI_UNUSED(page); MI_UNUSED(block); MI_UNUSED(p);
}
#endif
// free a local pointer (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
MI_UNUSED(segment);
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
mi_block_check_unguard(page, block, p);
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
}
// free a pointer owned by another thread (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_block_check_unguard(page, block, p);
mi_free_block_mt(page, segment, block);
}
// generic free (for runtime integration)
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,segment,p);
else mi_free_generic_mt(page,segment,p);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in release mode but does further checks in debug mode
// (and secure mode) to see if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
{
MI_UNUSED(msg);
#if (MI_DEBUG>0)
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
if mi_unlikely(segment==NULL) return segment;
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
#if (MI_INTPTR_SIZE == 8 && defined(__linux__))
if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
#else
{
#endif
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
}
}
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
return NULL;
}
#endif
return segment;
}
// Free a block
// Fast path written carefully to prevent register spilling on the stack
static inline void mi_free_ex(void* p, size_t* usable) mi_attr_noexcept
{
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if mi_unlikely(segment==NULL) return;
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_page_t* const page = _mi_segment_page_of(segment, p);
if (usable!=NULL) { *usable = mi_page_usable_block_size(page); }
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
}
else {
// page is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, segment, p);
}
}
else {
// not thread-local; use generic path
mi_free_generic_mt(page, segment, p);
}
}
void mi_free(void* p) mi_attr_noexcept {
mi_free_ex(p,NULL);
}
void mi_ufree(void* p, size_t* usable) mi_attr_noexcept {
mi_free_ex(p,usable);
}
// return true if successful
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
const mi_segment_t* const segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* const page = _mi_segment_page_of(segment, block);
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
// some blocks may end up in the page `thread_free` list with no blocks in the
// heap `thread_delayed_free` list which may cause the page to be never freed!
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
return false;
}
// collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since `used` is updated)
mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */);
return true;
}
// ------------------------------------------------------
// Multi-threaded Free (`_mt`)
// ------------------------------------------------------
// Push a block that is owned by another thread on its page-local thread free
// list or it's heap delayed free list. Such blocks are later collected by
// the owning thread in `_mi_free_delayed_block`.
static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block )
{
// Try to put the block on either the page-local thread free list,
// or the heap delayed free list (if this is the first non-local free in that page)
mi_thread_free_t tfreex;
bool use_delayed;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if mi_unlikely(use_delayed) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block);
}
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
// If this was the first non-local free, we need to push it on the heap delayed free list instead
if mi_unlikely(use_delayed) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
// and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
}
}
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
{
// first see if the segment was abandoned and if we can reclaim it into our thread
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
#if MI_HUGE_PAGE_ABANDON
segment->page_kind != MI_PAGE_HUGE &&
#endif
mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
{
// the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
}
}
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
if (segment->kind == MI_SEGMENT_HUGE) {
#if MI_HUGE_PAGE_ABANDON
// huge page segments are always abandoned and can be freed immediately
_mi_segment_huge_page_free(segment, page, block);
return;
#else
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_segment_huge_page_reset(segment, page, block);
#endif
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
#endif
}
// and finally free the actual block by pushing it on the owning heap
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);
}
// ------------------------------------------------------
// Usable size
// ------------------------------------------------------
// Bytes available in a block
static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* page, const void* p) mi_attr_noexcept {
const mi_block_t* block = _mi_page_ptr_unalign(page, p);
const size_t size = mi_page_usable_size_of(page, block);
const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
const size_t aligned_size = (size - adjust);
#if MI_GUARDED
if (mi_block_ptr_is_guarded(block, p)) {
return aligned_size - _mi_os_page_size();
}
#endif
return aligned_size;
}
static inline mi_page_t* mi_validate_ptr_page(const void* p, const char* msg) {
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
if mi_unlikely(segment==NULL) return NULL;
mi_page_t* const page = _mi_segment_page_of(segment, p);
return page;
}
static inline size_t _mi_usable_size(const void* p, const mi_page_t* page) mi_attr_noexcept {
if mi_unlikely(page==NULL) return 0;
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
}
else {
// split out to separate routine for improved code generation
return mi_page_usable_aligned_size_of(page, p);
}
}
mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
const mi_page_t* const page = mi_validate_ptr_page(p,"mi_usable_size");
return _mi_usable_size(p,page);
}
// ------------------------------------------------------
// Free variants
// ------------------------------------------------------
void mi_free_size(void* p, size_t size) mi_attr_noexcept {
MI_UNUSED_RELEASE(size);
#if MI_DEBUG
const mi_page_t* const page = mi_validate_ptr_page(p,"mi_free_size");
const size_t available = _mi_usable_size(p,page);
mi_assert(p == NULL || size <= available || available == 0 /* invalid pointer */ );
#endif
mi_free(p);
}
void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
MI_UNUSED_RELEASE(alignment);
mi_assert(((uintptr_t)p % alignment) == 0);
mi_free_size(p,size);
}
void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
MI_UNUSED_RELEASE(alignment);
mi_assert(((uintptr_t)p % alignment) == 0);
mi_free(p);
}
// ------------------------------------------------------
// Check for double free in secure and debug mode
// This is somewhat expensive so only enabled for secure mode 4
// ------------------------------------------------------
#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
// linear check if the free list contains a specific element
static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
while (list != NULL) {
if (elem==list) return true;
list = mi_block_next(page, list);
}
return false;
}
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
// The decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, mi_page_thread_free(page), block))
{
_mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
return true;
}
return false;
}
#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
bool is_double_free = false;
mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
{
// Suspicious: decoded value a in block is in the same page (or NULL) -- maybe a double free?
// (continue in separate function to improve code generation)
is_double_free = mi_check_is_double_freex(page, block);
}
return is_double_free;
}
#else
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page);
MI_UNUSED(block);
return false;
}
#endif
// ---------------------------------------------------------------------------
// Check for heap block overflow by setting up padding at the end of the block
// ---------------------------------------------------------------------------
#if MI_PADDING // && !MI_TRACK_ENABLED
static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
*bsize = mi_page_usable_block_size(page);
const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
mi_track_mem_defined(padding,sizeof(mi_padding_t));
*delta = padding->delta;
uint32_t canary = padding->canary;
uintptr_t keys[2];
keys[0] = page->keys[0];
keys[1] = page->keys[1];
bool ok = (mi_ptr_encode_canary(page,block,keys) == canary && *delta <= *bsize);
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
return ok;
}
// Return the exact usable size of a block.
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
return (ok ? bsize - delta : 0);
}
// When a non-thread-local block is freed, it becomes part of the thread delayed free
// list that is freed later by the owning heap. If the exact usable size is too small to
// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
// so it will later not trigger an overflow error in `mi_free_block`.
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok);
if (!ok || (bsize - delta) >= min_size) return; // usually already enough space
mi_assert_internal(bsize >= min_size);
if (bsize < min_size) return; // should never happen
size_t new_delta = (bsize - min_size);
mi_assert_internal(new_delta < bsize);
mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
mi_track_mem_defined(padding,sizeof(mi_padding_t));
padding->delta = (uint32_t)new_delta;
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
}
#else
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(block);
return mi_page_usable_block_size(page);
}
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
MI_UNUSED(page);
MI_UNUSED(block);
MI_UNUSED(min_size);
}
#endif
#if MI_PADDING && MI_PADDING_CHECK
static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
*size = *wrong = bsize;
if (!ok) return false;
mi_assert_internal(bsize >= delta);
*size = bsize - delta;
if (!mi_page_is_huge(page)) {
uint8_t* fill = (uint8_t*)block + bsize - delta;
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
mi_track_mem_defined(fill, maxpad);
for (size_t i = 0; i < maxpad; i++) {
if (fill[i] != MI_DEBUG_PADDING) {
*wrong = bsize - delta + i;
ok = false;
break;
}
}
mi_track_mem_noaccess(fill, maxpad);
}
return ok;
}
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
size_t size;
size_t wrong;
if (!mi_verify_padding(page,block,&size,&wrong)) {
_mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
}
}
#else
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page);
MI_UNUSED(block);
}
#endif
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(block);
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
// #if (MI_STAT>1)
// const size_t usize = mi_page_usable_size_of(page, block);
// mi_heap_stat_decrease(heap, malloc_requested, usize);
// #endif
if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, malloc_normal, bsize);
#if (MI_STAT > 1)
mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], 1);
#endif
}
//else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
// mi_heap_stat_decrease(heap, malloc_large, bsize);
//}
else {
mi_heap_stat_decrease(heap, malloc_huge, bsize);
}
}
#else
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page); MI_UNUSED(block);
}
#endif
// Remove guard page when building with MI_GUARDED
#if MI_GUARDED
static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p) {
MI_UNUSED(p);
mi_assert_internal(mi_block_ptr_is_guarded(block, p));
mi_assert_internal(mi_page_has_aligned(page));
mi_assert_internal((uint8_t*)p - (uint8_t*)block >= (ptrdiff_t)sizeof(mi_block_t));
mi_assert_internal(block->next == MI_BLOCK_TAG_GUARDED);
const size_t bsize = mi_page_block_size(page);
const size_t psize = _mi_os_page_size();
mi_assert_internal(bsize > psize);
mi_assert_internal(_mi_page_segment(page)->allow_decommit);
void* gpage = (uint8_t*)block + bsize - psize;
mi_assert_internal(_mi_is_aligned(gpage, psize));
_mi_os_unprotect(gpage, psize);
}
#endif

737
compat/mimalloc/heap.c Normal file
View File

@ -0,0 +1,737 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // mi_prim_get_default_heap
#include <string.h> // memset, memcpy
#if defined(_MSC_VER) && (_MSC_VER < 1920)
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
/* -----------------------------------------------------------
Helpers
----------------------------------------------------------- */
// return `true` if ok, `false` to break
typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2);
// Visit all pages in a heap; returns `false` if break was called.
static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)
{
if (heap==NULL || heap->page_count==0) return 0;
// visit all pages
#if MI_DEBUG>1
size_t total = heap->page_count;
size_t count = 0;
#endif
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_t* page = pq->first;
while(page != NULL) {
mi_page_t* next = page->next; // save next in case the page gets removed from the queue
mi_assert_internal(mi_page_heap(page) == heap);
#if MI_DEBUG>1
count++;
#endif
if (!fn(heap, pq, page, arg1, arg2)) return false;
page = next; // and continue
}
}
mi_assert_internal(count == total);
return true;
}
#if MI_DEBUG>=2
static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
MI_UNUSED(arg1);
MI_UNUSED(arg2);
MI_UNUSED(pq);
mi_assert_internal(mi_page_heap(page) == heap);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == heap->thread_id);
mi_assert_expensive(_mi_page_is_valid(page));
return true;
}
#endif
#if MI_DEBUG>=3
static bool mi_heap_is_valid(mi_heap_t* heap) {
mi_assert_internal(heap!=NULL);
mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
return true;
}
#endif
/* -----------------------------------------------------------
"Collect" pages by migrating `local_free` and `thread_free`
lists and freeing empty pages. This is done when a thread
stops (and in that case abandons pages if there are still
blocks alive)
----------------------------------------------------------- */
typedef enum mi_collect_e {
MI_NORMAL,
MI_FORCE,
MI_ABANDON
} mi_collect_t;
static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
MI_UNUSED(arg2);
MI_UNUSED(heap);
mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
mi_collect_t collect = *((mi_collect_t*)arg_collect);
_mi_page_free_collect(page, collect >= MI_FORCE);
if (collect == MI_FORCE) {
// note: call before a potential `_mi_page_free` as the segment may be freed if this was the last used page in that segment.
mi_segment_t* segment = _mi_page_segment(page);
_mi_segment_collect(segment, true /* force? */);
}
if (mi_page_all_free(page)) {
// no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
}
else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page
_mi_page_abandon(page, pq);
}
return true; // don't break
}
static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
MI_UNUSED(arg1);
MI_UNUSED(arg2);
MI_UNUSED(heap);
MI_UNUSED(pq);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
return true; // don't break
}
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
const bool force = (collect >= MI_FORCE);
_mi_deferred_free(heap, force);
// python/cpython#112532: we may be called from a thread that is not the owner of the heap
const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id());
// note: never reclaim on collect but leave it to threads that need storage to reclaim
const bool force_main =
#ifdef NDEBUG
collect == MI_FORCE
#else
collect >= MI_FORCE
#endif
&& is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim;
if (force_main) {
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed.
// note: this only collects in the current subprocess
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
}
// free all current thread delayed blocks.
// (if abandoning, after this there are no more thread-delayed references into the pages.)
_mi_heap_delayed_free_all(heap);
// collect retired pages
_mi_heap_collect_retired(heap, force);
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list)
// note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment
_mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);
// if forced, collect thread data cache on program-exit (or shared library unload)
if (force && is_main_thread && mi_heap_is_backing(heap)) {
_mi_thread_data_collect(); // collect thread data cache
}
// collect arenas (this is program wide so don't force purges on abandonment of threads)
_mi_arenas_collect(collect == MI_FORCE /* force purge? */);
// merge statistics
if (collect <= MI_FORCE) { _mi_stats_merge_thread(heap->tld); }
}
void _mi_heap_collect_abandon(mi_heap_t* heap) {
mi_heap_collect_ex(heap, MI_ABANDON);
}
void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept {
mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL));
}
void mi_collect(bool force) mi_attr_noexcept {
mi_heap_collect(mi_prim_get_default_heap(), force);
}
/* -----------------------------------------------------------
Heap new
----------------------------------------------------------- */
mi_heap_t* mi_heap_get_default(void) {
mi_thread_init();
return mi_prim_get_default_heap();
}
static bool mi_heap_is_default(const mi_heap_t* heap) {
return (heap == mi_prim_get_default_heap());
}
mi_heap_t* mi_heap_get_backing(void) {
mi_heap_t* heap = mi_heap_get_default();
mi_assert_internal(heap!=NULL);
mi_heap_t* bheap = heap->tld->heap_backing;
mi_assert_internal(bheap!=NULL);
mi_assert_internal(bheap->thread_id == _mi_thread_id());
return bheap;
}
void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) {
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = tld;
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
heap->no_reclaim = noreclaim;
heap->tag = tag;
if (heap == tld->heap_backing) {
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&heap->random); // prevent allocation failure during bcrypt dll initialization with static linking (issue #1185)
#else
_mi_random_init(&heap->random);
#endif
}
else {
_mi_random_split(&tld->heap_backing->random, &heap->random);
}
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
_mi_heap_guarded_init(heap);
// push on the thread local heaps list
heap->next = heap->tld->heaps;
heap->tld->heaps = heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
mi_assert(heap_tag >= 0 && heap_tag < 256);
_mi_heap_init(heap, bheap->tld, arena_id, allow_destroy /* no reclaim? */, (uint8_t)heap_tag /* heap tag */);
return heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
return mi_heap_new_ex(0 /* default heap tag */, false /* don't allow `mi_heap_destroy` */, arena_id);
}
mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
// don't reclaim abandoned memory or otherwise destroy is unsafe
return mi_heap_new_ex(0 /* default heap tag */, true /* no reclaim */, _mi_arena_id_none());
}
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) {
return _mi_arena_memid_is_suitable(memid, heap->arena_id);
}
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
return _mi_random_next(&heap->random);
}
// zero out the page queues
static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
// TODO: copy full empty heap instead?
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
heap->page_count = 0;
}
// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
static void mi_heap_free(mi_heap_t* heap) {
mi_assert(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
if (mi_heap_is_backing(heap)) return; // dont free the backing heap
// reset default
if (mi_heap_is_default(heap)) {
_mi_heap_set_default_direct(heap->tld->heap_backing);
}
// remove ourselves from the thread local heaps list
// linear search but we expect the number of heaps to be relatively small
mi_heap_t* prev = NULL;
mi_heap_t* curr = heap->tld->heaps;
while (curr != heap && curr != NULL) {
prev = curr;
curr = curr->next;
}
mi_assert_internal(curr == heap);
if (curr == heap) {
if (prev != NULL) { prev->next = heap->next; }
else { heap->tld->heaps = heap->next; }
}
mi_assert_internal(heap->tld->heaps != NULL);
// and free the used memory
mi_free(heap);
}
// return a heap on the same thread as `heap` specialized for the specified tag (if it exists)
mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag) {
if (heap->tag == tag) {
return heap;
}
for (mi_heap_t *curr = heap->tld->heaps; curr != NULL; curr = curr->next) {
if (curr->tag == tag) {
return curr;
}
}
return NULL;
}
/* -----------------------------------------------------------
Heap destroy
----------------------------------------------------------- */
static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
MI_UNUSED(arg1);
MI_UNUSED(arg2);
MI_UNUSED(heap);
MI_UNUSED(pq);
// ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// stats
const size_t bsize = mi_page_block_size(page);
if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
//if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
// mi_heap_stat_decrease(heap, malloc_large, bsize);
//}
//else
{
mi_heap_stat_decrease(heap, malloc_huge, bsize);
}
}
#if (MI_STAT>0)
_mi_page_free_collect(page, false); // update used count
const size_t inuse = page->used;
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse);
#if (MI_STAT>1)
mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse);
#endif
}
// mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks...
#endif
/// pretend it is all free now
mi_assert_internal(mi_page_thread_free(page) == NULL);
page->used = 0;
// and free the page
// mi_page_free(page,false);
page->next = NULL;
page->prev = NULL;
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
return true; // keep going
}
void _mi_heap_destroy_pages(mi_heap_t* heap) {
mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL);
mi_heap_reset_pages(heap);
}
#if MI_TRACK_HEAP_DESTROY
static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size);
mi_track_free_size(block,mi_usable_size(block));
return true;
}
#endif
void mi_heap_destroy(mi_heap_t* heap) {
mi_assert(heap != NULL);
mi_assert(mi_heap_is_initialized(heap));
mi_assert(heap->no_reclaim);
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
#if MI_GUARDED
// _mi_warning_message("'mi_heap_destroy' called but MI_GUARDED is enabled -- using `mi_heap_delete` instead (heap at %p)\n", heap);
mi_heap_delete(heap);
return;
#else
if (!heap->no_reclaim) {
_mi_warning_message("'mi_heap_destroy' called but ignored as the heap was not created with 'allow_destroy' (heap at %p)\n", heap);
// don't free in case it may contain reclaimed pages
mi_heap_delete(heap);
}
else {
// track all blocks as freed
#if MI_TRACK_HEAP_DESTROY
mi_heap_visit_blocks(heap, true, mi_heap_track_block_free, NULL);
#endif
// free all pages
_mi_heap_destroy_pages(heap);
mi_heap_free(heap);
}
#endif
}
// forcefully destroy all heaps in the current thread
void _mi_heap_unsafe_destroy_all(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
if (heap == NULL) return;
mi_heap_t* curr = heap->tld->heaps;
while (curr != NULL) {
mi_heap_t* next = curr->next;
if (curr->no_reclaim) {
mi_heap_destroy(curr);
}
else {
_mi_heap_destroy_pages(curr);
}
curr = next;
}
}
/* -----------------------------------------------------------
Safe Heap delete
----------------------------------------------------------- */
// Transfer the pages from one heap to the other
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
mi_assert_internal(heap!=NULL);
if (from==NULL || from->page_count == 0) return;
// reduce the size of the delayed frees
_mi_heap_delayed_free_partial(from);
// transfer all pages by appending the queues; this will set a new heap field
// so threads may do delayed frees in either heap for a while.
// note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
// so after this only the new heap will get delayed frees
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_queue_t* append = &from->pages[i];
size_t pcount = _mi_page_queue_append(heap, pq, append);
heap->page_count += pcount;
from->page_count -= pcount;
}
mi_assert_internal(from->page_count == 0);
// and do outstanding delayed frees in the `from` heap
// note: be careful here as the `heap` field in all those pages no longer point to `from`,
// turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
// the regular `_mi_free_delayed_block` which is safe.
_mi_heap_delayed_free_all(from);
#if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
#endif
// and reset the `from` heap
mi_heap_reset_pages(from);
}
// are two heaps compatible with respect to heap-tag, exclusive arena etc.
static bool mi_heaps_are_compatible(mi_heap_t* heap1, mi_heap_t* heap2) {
return (heap1->tag == heap2->tag && // store same kind of objects
heap1->arena_id == heap2->arena_id); // same arena preference
}
// Safe delete a heap without freeing any still allocated blocks in that heap.
void mi_heap_delete(mi_heap_t* heap)
{
mi_assert(heap != NULL);
mi_assert(mi_heap_is_initialized(heap));
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
mi_heap_t* bheap = heap->tld->heap_backing;
if (bheap != heap && mi_heaps_are_compatible(bheap,heap)) {
// transfer still used pages to the backing heap
mi_heap_absorb(bheap, heap);
}
else {
// the backing heap abandons its pages
_mi_heap_collect_abandon(heap);
}
mi_assert_internal(heap->page_count==0);
mi_heap_free(heap);
}
mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
mi_assert(heap != NULL);
mi_assert(mi_heap_is_initialized(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL;
mi_assert_expensive(mi_heap_is_valid(heap));
mi_heap_t* old = mi_prim_get_default_heap();
_mi_heap_set_default_direct(heap);
return old;
}
/* -----------------------------------------------------------
Analysis
----------------------------------------------------------- */
// static since it is not thread safe to access heaps from other threads.
static mi_heap_t* mi_heap_of_block(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p);
bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(valid);
if mi_unlikely(!valid) return NULL;
return mi_page_heap(_mi_segment_page_of(segment,p));
}
bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
mi_assert(heap != NULL);
if (heap==NULL || !mi_heap_is_initialized(heap)) return false;
return (heap == mi_heap_of_block(p));
}
static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) {
MI_UNUSED(heap);
MI_UNUSED(pq);
bool* found = (bool*)vfound;
void* start = mi_page_start(page);
void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page));
*found = (p >= start && p < end);
return (!*found); // continue if not found
}
bool mi_heap_check_owned(mi_heap_t* heap, const void* p) {
mi_assert(heap != NULL);
if (heap==NULL || !mi_heap_is_initialized(heap)) return false;
if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false; // only aligned pointers
bool found = false;
mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found);
return found;
}
bool mi_check_owned(const void* p) {
return mi_heap_check_owned(mi_prim_get_default_heap(), p);
}
/* -----------------------------------------------------------
Visit all heap blocks and areas
Todo: enable visiting abandoned pages, and
enable visiting all blocks of all heaps across threads
----------------------------------------------------------- */
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page);
area->reserved = page->reserved * bsize;
area->committed = page->capacity * bsize;
area->blocks = mi_page_start(page);
area->used = page->used; // number of blocks in use (#553)
area->block_size = ubsize;
area->full_block_size = bsize;
area->heap_tag = page->heap_tag;
}
static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
*shift = MI_SIZE_BITS - mi_clz(divisor - 1);
*magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
}
static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) {
mi_assert_internal(n <= UINT32_MAX);
const uint64_t hi = ((uint64_t)n * magic) >> 32;
return (size_t)((hi + n) >> shift);
}
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
mi_assert(area != NULL);
if (area==NULL) return true;
mi_assert(page != NULL);
if (page == NULL) return true;
_mi_page_free_collect(page,true); // collect both thread_delayed and local_free
mi_assert_internal(page->local_free == NULL);
if (page->used == 0) return true;
size_t psize;
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
mi_heap_t* const heap = mi_page_heap(page);
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding
// optimize page with one block
if (page->capacity == 1) {
mi_assert_internal(page->used == 1 && page->free == NULL);
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
}
mi_assert(bsize <= UINT32_MAX);
// optimize full pages
if (page->used == page->capacity) {
uint8_t* block = pstart;
for (size_t i = 0; i < page->capacity; i++) {
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
return true;
}
// create a bitmap of free blocks.
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
const uintptr_t bmapsize = _mi_divide_up(page->capacity, MI_INTPTR_BITS);
memset(free_map, 0, bmapsize * sizeof(intptr_t));
if (page->capacity % MI_INTPTR_BITS != 0) {
// mark left-over bits at the end as free
size_t shift = (page->capacity % MI_INTPTR_BITS);
uintptr_t mask = (UINTPTR_MAX << shift);
free_map[bmapsize - 1] = mask;
}
// fast repeated division by the block size
uint64_t magic;
size_t shift;
mi_get_fast_divisor(bsize, &magic, &shift);
#if MI_DEBUG>1
size_t free_count = 0;
#endif
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
#if MI_DEBUG>1
free_count++;
#endif
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
size_t offset = (uint8_t*)block - pstart;
mi_assert_internal(offset % bsize == 0);
mi_assert_internal(offset <= UINT32_MAX);
size_t blockidx = mi_fast_divide(offset, magic, shift);
mi_assert_internal(blockidx == offset / bsize);
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / MI_INTPTR_BITS);
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
free_map[bitidx] |= ((uintptr_t)1 << bit);
}
mi_assert_internal(page->capacity == (free_count + page->used));
// walk through all blocks skipping the free ones
#if MI_DEBUG>1
size_t used_count = 0;
#endif
uint8_t* block = pstart;
for (size_t i = 0; i < bmapsize; i++) {
if (free_map[i] == 0) {
// every block is in use
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
#if MI_DEBUG>1
used_count++;
#endif
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
}
else {
// visit the used blocks in the mask
uintptr_t m = ~free_map[i];
while (m != 0) {
#if MI_DEBUG>1
used_count++;
#endif
size_t bitidx = mi_ctz(m);
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
m &= m - 1; // clear least significant bit
}
block += bsize * MI_INTPTR_BITS;
}
}
mi_assert_internal(page->used == used_count);
return true;
}
// Separate struct to keep `mi_page_t` out of the public interface
typedef struct mi_heap_area_ex_s {
mi_heap_area_t area;
mi_page_t* page;
} mi_heap_area_ex_t;
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
MI_UNUSED(heap);
MI_UNUSED(pq);
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea;
xarea.page = page;
_mi_heap_area_init(&xarea.area, page);
return fun(heap, &xarea, arg);
}
// Visit all heap pages as areas
static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) {
if (visitor == NULL) return false;
return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{
}
// Just to pass arguments
typedef struct mi_visit_blocks_args_s {
bool visit_blocks;
mi_block_visit_fun* visitor;
void* arg;
} mi_visit_blocks_args_t;
static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* xarea, void* arg) {
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
if (args->visit_blocks) {
return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
}
else {
return true;
}
}
// Visit all blocks in a heap
bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
}

715
compat/mimalloc/init.c Normal file
View File

@ -0,0 +1,715 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h"
#include <string.h> // memcpy, memset
#include <stdlib.h> // atexit
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0,
false, false, false, false,
0, // capacity
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free
NULL, // local_free
0, // used
0, // block size shift
0, // heap tag
0, // block_size
NULL, // page_start
#if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 },
#endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
NULL, NULL
, { 0 } // padding
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
#if (MI_SMALL_WSIZE_MAX==128)
#if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
#elif (MI_PADDING>0)
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
#else
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
#endif
#else
#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX"
#endif
// Empty page queues for every bin
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \
QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0}
// Empty statistics
#define MI_STATS_NULL \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0 }, { 0 }, \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0 }, { 0 }, { 0 }, { 0 }, \
{ 0 }, { 0 }, { 0 }, { 0 }, \
\
{ 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, \
MI_INIT4(MI_STAT_COUNT_NULL), \
{ 0 }, { 0 }, { 0 }, { 0 }, \
\
{ MI_INIT4(MI_STAT_COUNT_NULL) }, \
{ { 0 }, { 0 }, { 0 }, { 0 } }, \
\
{ MI_INIT74(MI_STAT_COUNT_NULL) }, \
{ MI_INIT74(MI_STAT_COUNT_NULL) }
// Empty slice span queues for every bin
#define SQNULL(sz) { NULL, NULL, sz }
#define MI_SEGMENT_SPAN_QUEUES_EMPTY \
{ SQNULL(1), \
SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \
SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \
SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \
SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \
SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ }
// --------------------------------------------------------
// Statically allocate an empty heap as the initial
// thread local value for the default heap,
// and statically allocate the backing heap for the main
// thread so it can function without doing any allocation
// itself (as accessing a thread local for the first time
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
MI_ATOMIC_VAR_INIT(NULL),
0, // tid
0, // cookie
0, // arena id
{ 0, 0 }, // keys
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, 0, // generic count
NULL, // next
false, // can reclaim
0, // tag
#if MI_GUARDED
0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
};
static mi_decl_cache_align mi_subproc_t mi_subproc_default;
#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
mi_decl_cache_align static const mi_tld_t tld_empty = {
0,
false,
NULL, NULL,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, tld_empty_stats }, // segments
{ MI_STAT_VERSION, MI_STATS_NULL } // stats
};
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
return _mi_prim_thread_id();
}
// the thread-local default heap for allocation
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
extern mi_decl_hidden mi_heap_t _mi_heap_main;
static mi_decl_cache_align mi_tld_t tld_main = {
0, false,
&_mi_heap_main, & _mi_heap_main,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, &tld_main.stats }, // segments
{ MI_STAT_VERSION, MI_STATS_NULL } // stats
};
mi_decl_cache_align mi_heap_t _mi_heap_main = {
&tld_main,
MI_ATOMIC_VAR_INIT(NULL),
0, // thread id
0, // initial cookie
0, // arena id
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, 0, // generic count
NULL, // next heap
false, // can reclaim
0, // tag
#if MI_GUARDED
0, 0, 0, 0,
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
};
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL };
#if MI_GUARDED
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
heap->guarded_sample_rate = sample_rate;
heap->guarded_sample_count = sample_rate; // count down samples
if (heap->guarded_sample_rate > 1) {
if (seed == 0) {
seed = _mi_heap_random_next(heap);
}
heap->guarded_sample_count = (seed % heap->guarded_sample_rate) + 1; // start at random count between 1 and `sample_rate`
}
}
mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
heap->guarded_size_min = min;
heap->guarded_size_max = (min > max ? min : max);
}
void _mi_heap_guarded_init(mi_heap_t* heap) {
mi_heap_guarded_set_sample_rate(heap,
(size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX),
(size_t)mi_option_get(mi_option_guarded_sample_seed));
mi_heap_guarded_set_size_bound(heap,
(size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX),
(size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) );
}
#else
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
MI_UNUSED(heap); MI_UNUSED(sample_rate); MI_UNUSED(seed);
}
mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
MI_UNUSED(heap); MI_UNUSED(min); MI_UNUSED(max);
}
void _mi_heap_guarded_init(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif
static void mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = 1;
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking
#else
_mi_random_init(&_mi_heap_main.random);
#endif
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
mi_lock_init(&mi_subproc_default.abandoned_os_lock);
mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock);
_mi_heap_guarded_init(&_mi_heap_main);
}
}
mi_heap_t* _mi_heap_main_get(void) {
mi_heap_main_init();
return &_mi_heap_main;
}
/* -----------------------------------------------------------
Sub process
----------------------------------------------------------- */
mi_subproc_id_t mi_subproc_main(void) {
return NULL;
}
mi_subproc_id_t mi_subproc_new(void) {
mi_memid_t memid = _mi_memid_none();
mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
if (subproc == NULL) return NULL;
subproc->memid = memid;
subproc->abandoned_os_list = NULL;
mi_lock_init(&subproc->abandoned_os_lock);
mi_lock_init(&subproc->abandoned_os_visit_lock);
return subproc;
}
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
}
void mi_subproc_delete(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return;
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
// check if there are no abandoned segments still..
bool safe_to_delete = false;
mi_lock(&subproc->abandoned_os_lock) {
if (subproc->abandoned_os_list == NULL) {
safe_to_delete = true;
}
}
if (!safe_to_delete) return;
// safe to release
// todo: should we refcount subprocesses?
mi_lock_done(&subproc->abandoned_os_lock);
mi_lock_done(&subproc->abandoned_os_visit_lock);
_mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
}
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
mi_heap_t* heap = mi_heap_get_default();
if (heap == NULL) return;
mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
if (heap->tld->segments.subproc != &mi_subproc_default) return;
heap->tld->segments.subproc = _mi_subproc_from_id(subproc_id);
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
----------------------------------------------------------- */
// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
mi_memid_t memid; // must come last due to zero'ing
} mi_thread_data_t;
// Thread meta-data is allocated directly from the OS. For
// some programs that do not use thread pools and allocate and
// destroy many OS threads, this may causes too much overhead
// per thread so we maintain a small cache of recently freed metadata.
#define TD_CACHE_SIZE (32)
static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
static mi_thread_data_t* mi_thread_data_zalloc(void) {
// try to find thread metadata in the cache
mi_thread_data_t* td = NULL;
for (int i = 0; i < TD_CACHE_SIZE; i++) {
td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
// found cached allocation, try use it
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
_mi_memzero(td, offsetof(mi_thread_data_t,memid));
return td;
}
}
}
// if that fails, allocate as meta data
mi_memid_t memid;
td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid);
if (td == NULL) {
// if this fails, try once more. (issue #257)
td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid);
if (td == NULL) {
// really out of memory
_mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
return NULL;
}
}
td->memid = memid;
return td;
}
static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
// try to add the thread metadata to the cache
for (int i = 0; i < TD_CACHE_SIZE; i++) {
mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td == NULL) {
mi_thread_data_t* expected = NULL;
if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) {
return;
}
}
}
// if that fails, just free it directly
_mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid);
}
void _mi_thread_data_collect(void) {
// free all thread metadata from the cache
for (int i = 0; i < TD_CACHE_SIZE; i++) {
mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
_mi_os_free(td, sizeof(mi_thread_data_t), td->memid);
}
}
}
}
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_thread_heap_init(void) {
if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true;
if (_mi_is_main_thread()) {
// mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// the main heap is statically allocated
mi_heap_main_init();
_mi_heap_set_default_direct(&_mi_heap_main);
//mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = mi_thread_data_zalloc();
if (td == NULL) return false;
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
_mi_tld_init(tld, heap); // must be before `_mi_heap_init`
_mi_heap_init(heap, tld, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */);
_mi_heap_set_default_direct(heap);
}
return false;
}
// initialize thread local data
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memcpy_aligned(tld, &tld_empty, sizeof(mi_tld_t));
tld->heap_backing = bheap;
tld->heaps = NULL;
tld->segments.subproc = &mi_subproc_default;
tld->segments.stats = &tld->stats;
}
// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_thread_heap_done(mi_heap_t* heap) {
if (!mi_heap_is_initialized(heap)) return true;
// reset default heap
_mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
// switch to backing heap
heap = heap->tld->heap_backing;
if (!mi_heap_is_initialized(heap)) return false;
// delete all non-backing heaps in this thread
mi_heap_t* curr = heap->tld->heaps;
while (curr != NULL) {
mi_heap_t* next = curr->next; // save `next` as `curr` will be freed
if (curr != heap) {
mi_assert_internal(!mi_heap_is_backing(curr));
mi_heap_delete(curr);
}
curr = next;
}
mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL);
mi_assert_internal(mi_heap_is_backing(heap));
// collect if not the main thread
if (heap != &_mi_heap_main) {
_mi_heap_collect_abandon(heap);
}
// merge stats
_mi_stats_done(&heap->tld->stats);
// free if not the main thread
if (heap != &_mi_heap_main) {
// the following assertion does not always hold for huge segments as those are always treated
// as abondened: one may allocate it in one thread, but deallocate in another in which case
// the count can be too large or negative. todo: perhaps not count huge segments? see issue #363
// mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
mi_thread_data_free((mi_thread_data_t*)heap);
}
else {
#if 0
// never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
// there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_heap_destroy_pages(heap);
mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
#endif
}
return false;
}
// --------------------------------------------------------
// Try to run `mi_thread_done()` automatically so any memory
// owned by the thread but not yet released can be abandoned
// and re-owned by another thread.
//
// 1. windows dynamic library:
// call from DllMain on DLL_THREAD_DETACH
// 2. windows static library:
// use `FlsAlloc` to call a destructor when the thread is done
// 3. unix, pthreads:
// use a pthread key to call a destructor when a pthread is done
//
// In the last two cases we also need to call `mi_process_init`
// to set up the thread local keys.
// --------------------------------------------------------
// Set up handlers so `mi_thread_done` is called automatically
static void mi_process_setup_auto_thread_done(void) {
static bool tls_initialized = false; // fine if it races
if (tls_initialized) return;
tls_initialized = true;
_mi_prim_thread_init_auto_done();
_mi_heap_set_default_direct(&_mi_heap_main);
}
bool _mi_is_main_thread(void) {
return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
}
static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
}
// This is called from the `mi_malloc_generic`
void mi_thread_init(void) mi_attr_noexcept
{
// ensure our process has started already
mi_process_init();
// initialize the thread local default heap
// (this will call `_mi_heap_set_default_direct` and thus set the
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
if (_mi_thread_heap_init()) return; // returns true if already initialized
_mi_stat_increase(&_mi_stats_main.threads, 1);
mi_atomic_increment_relaxed(&thread_count);
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
void mi_thread_done(void) mi_attr_noexcept {
_mi_thread_done(NULL);
}
void _mi_thread_done(mi_heap_t* heap)
{
// calling with NULL implies using the default heap
if (heap == NULL) {
heap = mi_prim_get_default_heap();
if (heap == NULL) return;
}
// prevent re-entrancy through heap_done/heap_set_default_direct (issue #699)
if (!mi_heap_is_initialized(heap)) {
return;
}
// adjust stats
mi_atomic_decrement_relaxed(&thread_count);
_mi_stat_decrease(&_mi_stats_main.threads, 1);
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
if (heap->thread_id != _mi_thread_id()) return;
// abandon the thread local heap
if (_mi_thread_heap_done(heap)) return; // returns true if already ran
}
void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
#if defined(MI_TLS_SLOT)
mi_prim_tls_slot_set(MI_TLS_SLOT,heap);
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
*mi_prim_tls_pthread_heap_slot() = heap;
#elif defined(MI_TLS_PTHREAD)
// we use _mi_heap_default_key
#else
_mi_heap_default = heap;
#endif
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
_mi_prim_thread_associate_default_heap(heap);
}
void mi_thread_set_in_threadpool(void) mi_attr_noexcept {
// nothing
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
// --------------------------------------------------------
static bool os_preloading = true; // true until this module is initialized
// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
bool mi_decl_noinline _mi_preloading(void) {
return os_preloading;
}
// Returns true if mimalloc was redirected
mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
return _mi_is_redirected();
}
// Called once by the process loader from `src/prim/prim.c`
void _mi_auto_process_init(void) {
mi_heap_main_init();
#if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697)
#endif
os_preloading = false;
mi_assert_internal(_mi_is_main_thread());
_mi_options_init();
mi_process_setup_auto_thread_done();
mi_process_init();
if (_mi_is_redirected()) _mi_verbose_message("malloc is redirected.\n");
// show message from the redirector (if present)
const char* msg = NULL;
_mi_allocator_init(&msg);
if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
_mi_fputs(NULL,NULL,NULL,msg);
}
// reseed random
_mi_random_reinit_if_weak(&_mi_heap_main.random);
}
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
#include <intrin.h>
mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
mi_decl_cache_align bool _mi_cpu_has_erms = false;
static void mi_detect_cpu_features(void) {
// FSRM for fast short rep movsb/stosb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
// EMRS for fast enhanced rep movsb/stosb support
int32_t cpu_info[4];
__cpuid(cpu_info, 7);
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
_mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
}
#else
static void mi_detect_cpu_features(void) {
// nothing
}
#endif
// Initialize the process; called by thread_init or the process loader
void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
static mi_atomic_once_t process_init;
#if _MSC_VER < 1920
mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main
#endif
if (!mi_atomic_once(&process_init)) return;
_mi_process_is_initialized = true;
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
mi_process_setup_auto_thread_done();
mi_detect_cpu_features();
_mi_os_init();
mi_heap_main_init();
mi_thread_init();
#if defined(_WIN32)
// On windows, when building as a static lib the FLS cleanup happens to early for the main thread.
// To avoid this, set the FLS value for the main thread to NULL so the fls cleanup
// will not call _mi_thread_done on the (still executing) main thread. See issue #508.
_mi_prim_thread_associate_default_heap(NULL);
#endif
mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL)
mi_track_init();
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024);
int reserve_at = (int)mi_option_get_clamp(mi_option_reserve_huge_os_pages_at, -1, INT_MAX);
if (reserve_at != -1) {
mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
} else {
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
}
if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
long ksize = mi_option_get(mi_option_reserve_os_memory);
if (ksize > 0) {
mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */);
}
}
}
// Called when the process is done (cdecl as it is used with `at_exit` on some platforms)
void mi_cdecl mi_process_done(void) mi_attr_noexcept {
// only shutdown if we were initialized
if (!_mi_process_is_initialized) return;
// ensure we are called once
static bool process_done = false;
if (process_done) return;
process_done = true;
// get the default heap so we don't need to acces thread locals anymore
mi_heap_t* heap = mi_prim_get_default_heap(); // use prim to not initialize any heap
mi_assert_internal(heap != NULL);
// release any thread specific resources and ensure _mi_thread_done is called on all but the main thread
_mi_prim_thread_done_auto_done();
#ifndef MI_SKIP_COLLECT_ON_EXIT
#if (MI_DEBUG || !defined(MI_SHARED_LIB))
// free all memory if possible on process exit. This is not needed for a stand-alone process
// but should be done if mimalloc is statically linked into another shared library which
// is repeatedly loaded/unloaded, see issue #281.
mi_heap_collect(heap, true /* force */ );
#endif
#endif
// Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free
// since after process_done there might still be other code running that calls `free` (like at_exit routines,
// or C-runtime termination code.
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
mi_heap_collect(heap, true /* force */);
_mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!)
_mi_arena_unsafe_destroy_all();
_mi_segment_map_unsafe_destroy();
}
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
mi_stats_print(NULL);
}
_mi_allocator_done();
_mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
os_preloading = true; // don't call the C runtime anymore
}
void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept {
if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return;
mi_process_done();
}

334
compat/mimalloc/libc.c Normal file
View File

@ -0,0 +1,334 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// --------------------------------------------------------
// This module defines various std libc functions to reduce
// the dependency on libc, and also prevent errors caused
// by some libc implementations when called before `main`
// executes (due to malloc redirection)
// --------------------------------------------------------
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h" // mi_prim_getenv
char _mi_toupper(char c) {
if (c >= 'a' && c <= 'z') return (c - 'a' + 'A');
else return c;
}
int _mi_strnicmp(const char* s, const char* t, size_t n) {
if (n == 0) return 0;
for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
if (_mi_toupper(*s) != _mi_toupper(*t)) break;
}
return (n == 0 ? 0 : *s - *t);
}
void _mi_strlcpy(char* dest, const char* src, size_t dest_size) {
if (dest==NULL || src==NULL || dest_size == 0) return;
// copy until end of src, or when dest is (almost) full
while (*src != 0 && dest_size > 1) {
*dest++ = *src++;
dest_size--;
}
// always zero terminate
*dest = 0;
}
void _mi_strlcat(char* dest, const char* src, size_t dest_size) {
if (dest==NULL || src==NULL || dest_size == 0) return;
// find end of string in the dest buffer
while (*dest != 0 && dest_size > 1) {
dest++;
dest_size--;
}
// and catenate
_mi_strlcpy(dest, src, dest_size);
}
size_t _mi_strlen(const char* s) {
if (s==NULL) return 0;
size_t len = 0;
while(s[len] != 0) { len++; }
return len;
}
size_t _mi_strnlen(const char* s, size_t max_len) {
if (s==NULL) return 0;
size_t len = 0;
while(s[len] != 0 && len < max_len) { len++; }
return len;
}
#ifdef MI_NO_GETENV
bool _mi_getenv(const char* name, char* result, size_t result_size) {
MI_UNUSED(name);
MI_UNUSED(result);
MI_UNUSED(result_size);
return false;
}
#else
bool _mi_getenv(const char* name, char* result, size_t result_size) {
if (name==NULL || result == NULL || result_size < 64) return false;
return _mi_prim_getenv(name,result,result_size);
}
#endif
// --------------------------------------------------------
// Define our own limited `_mi_vsnprintf` and `_mi_snprintf`
// This is mostly to avoid calling these when libc is not yet
// initialized (and to reduce dependencies)
//
// format: d i, p x u, s
// prec: z l ll L
// width: 10
// align-left: -
// fill: 0
// plus: +
// --------------------------------------------------------
static void mi_outc(char c, char** out, char* end) {
char* p = *out;
if (p >= end) return;
*p = c;
*out = p + 1;
}
static void mi_outs(const char* s, char** out, char* end) {
if (s == NULL) return;
char* p = *out;
while (*s != 0 && p < end) {
*p++ = *s++;
}
*out = p;
}
static void mi_out_fill(char fill, size_t len, char** out, char* end) {
char* p = *out;
for (size_t i = 0; i < len && p < end; i++) {
*p++ = fill;
}
*out = p;
}
static void mi_out_alignright(char fill, char* start, size_t len, size_t extra, char* end) {
if (len == 0 || extra == 0) return;
if (start + len + extra >= end) return;
// move `len` characters to the right (in reverse since it can overlap)
for (size_t i = 1; i <= len; i++) {
start[len + extra - i] = start[len - i];
}
// and fill the start
for (size_t i = 0; i < extra; i++) {
start[i] = fill;
}
}
static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end)
{
if (x == 0 || base == 0 || base > 16) {
if (prefix != 0) { mi_outc(prefix, out, end); }
mi_outc('0',out,end);
}
else {
// output digits in reverse
char* start = *out;
while (x > 0) {
char digit = (char)(x % base);
mi_outc((digit <= 9 ? '0' + digit : 'A' + digit - 10),out,end);
x = x / base;
}
if (prefix != 0) {
mi_outc(prefix, out, end);
}
size_t len = *out - start;
// and reverse in-place
for (size_t i = 0; i < (len / 2); i++) {
char c = start[len - i - 1];
start[len - i - 1] = start[i];
start[i] = c;
}
}
}
#define MI_NEXTC() c = *in; if (c==0) break; in++;
int _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
if (buf == NULL || bufsize == 0 || fmt == NULL) return 0;
buf[bufsize - 1] = 0;
char* const end = buf + (bufsize - 1);
const char* in = fmt;
char* out = buf;
while (true) {
if (out >= end) break;
char c;
MI_NEXTC();
if (c != '%') {
if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t') { // output visible ascii or standard control only
mi_outc(c, &out, end);
}
}
else {
MI_NEXTC();
char fill = ' ';
size_t width = 0;
char numtype = 'd';
char numplus = 0;
bool alignright = true;
if (c == '+' || c == ' ') { numplus = c; MI_NEXTC(); }
if (c == '-') { alignright = false; MI_NEXTC(); }
if (c == '0') { fill = '0'; MI_NEXTC(); }
if (c >= '1' && c <= '9') {
width = (c - '0'); MI_NEXTC();
while (c >= '0' && c <= '9') {
width = (10 * width) + (c - '0'); MI_NEXTC();
}
if (c == 0) break; // extra check due to while
}
if (c == 'z' || c == 't' || c == 'L') { numtype = c; MI_NEXTC(); }
else if (c == 'l') {
numtype = c; MI_NEXTC();
if (c == 'l') { numtype = 'L'; MI_NEXTC(); }
}
char* start = out;
if (c == 's') {
// string
const char* s = va_arg(args, const char*);
mi_outs(s, &out, end);
}
else if (c == 'p' || c == 'x' || c == 'u') {
// unsigned
uintmax_t x = 0;
if (c == 'x' || c == 'u') {
if (numtype == 'z') x = va_arg(args, size_t);
else if (numtype == 't') x = va_arg(args, uintptr_t); // unsigned ptrdiff_t
else if (numtype == 'L') x = va_arg(args, unsigned long long);
else if (numtype == 'l') x = va_arg(args, unsigned long);
else x = va_arg(args, unsigned int);
}
else if (c == 'p') {
x = va_arg(args, uintptr_t);
mi_outs("0x", &out, end);
start = out;
width = (width >= 2 ? width - 2 : 0);
}
if (width == 0 && (c == 'x' || c == 'p')) {
if (c == 'p') { width = 2 * (x <= UINT32_MAX ? 4 : ((x >> 16) <= UINT32_MAX ? 6 : sizeof(void*))); }
if (width == 0) { width = 2; }
fill = '0';
}
mi_out_num(x, (c == 'x' || c == 'p' ? 16 : 10), numplus, &out, end);
}
else if (c == 'i' || c == 'd') {
// signed
intmax_t x = 0;
if (numtype == 'z') x = va_arg(args, intptr_t );
else if (numtype == 't') x = va_arg(args, ptrdiff_t);
else if (numtype == 'L') x = va_arg(args, long long);
else if (numtype == 'l') x = va_arg(args, long);
else x = va_arg(args, int);
char pre = 0;
if (x < 0) {
pre = '-';
if (x > INTMAX_MIN) { x = -x; }
}
else if (numplus != 0) {
pre = numplus;
}
mi_out_num((uintmax_t)x, 10, pre, &out, end);
}
else if (c >= ' ' && c <= '~') {
// unknown format
mi_outc('%', &out, end);
mi_outc(c, &out, end);
}
// fill & align
mi_assert_internal(out <= end);
mi_assert_internal(out >= start);
const size_t len = out - start;
if (len < width) {
mi_out_fill(fill, width - len, &out, end);
if (alignright && out <= end) {
mi_out_alignright(fill, start, len, width - len, end);
}
}
}
}
mi_assert_internal(out <= end);
*out = 0;
return (int)(out - buf);
}
int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
const int written = _mi_vsnprintf(buf, buflen, fmt, args);
va_end(args);
return written;
}
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif

View File

@ -0,0 +1,104 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_STATS_H
#define MIMALLOC_STATS_H
#include <mimalloc.h>
#include <stdint.h>
#define MI_STAT_VERSION 3 // increased on every backward incompatible change
// count allocation over time
typedef struct mi_stat_count_s {
int64_t total; // total allocated
int64_t peak; // peak allocation
int64_t current; // current allocation
} mi_stat_count_t;
// counters only increase
typedef struct mi_stat_counter_s {
int64_t total; // total count
} mi_stat_counter_t;
#define MI_STAT_FIELDS() \
MI_STAT_COUNT(pages) /* count of mimalloc pages */ \
MI_STAT_COUNT(reserved) /* reserved memory bytes */ \
MI_STAT_COUNT(committed) /* committed bytes */ \
MI_STAT_COUNTER(reset) /* reset bytes */ \
MI_STAT_COUNTER(purged) /* purged bytes */ \
MI_STAT_COUNT(page_committed) /* committed memory inside pages */ \
MI_STAT_COUNT(pages_abandoned) /* abandonded pages count */ \
MI_STAT_COUNT(threads) /* number of threads */ \
MI_STAT_COUNT(malloc_normal) /* allocated bytes <= MI_LARGE_OBJ_SIZE_MAX */ \
MI_STAT_COUNT(malloc_huge) /* allocated bytes in huge pages */ \
MI_STAT_COUNT(malloc_requested) /* malloc requested bytes */ \
\
MI_STAT_COUNTER(mmap_calls) \
MI_STAT_COUNTER(commit_calls) \
MI_STAT_COUNTER(reset_calls) \
MI_STAT_COUNTER(purge_calls) \
MI_STAT_COUNTER(arena_count) /* number of memory arena's */ \
MI_STAT_COUNTER(malloc_normal_count) /* number of blocks <= MI_LARGE_OBJ_SIZE_MAX */ \
MI_STAT_COUNTER(malloc_huge_count) /* number of huge bloks */ \
MI_STAT_COUNTER(malloc_guarded_count) /* number of allocations with guard pages */ \
\
/* internal statistics */ \
MI_STAT_COUNTER(arena_rollback_count) \
MI_STAT_COUNTER(arena_purges) \
MI_STAT_COUNTER(pages_extended) /* number of page extensions */ \
MI_STAT_COUNTER(pages_retire) /* number of pages that are retired */ \
MI_STAT_COUNTER(page_searches) /* total pages searched for a fresh page */ \
MI_STAT_COUNTER(page_searches_count) /* searched count for a fresh page */ \
/* only on v1 and v2 */ \
MI_STAT_COUNT(segments) \
MI_STAT_COUNT(segments_abandoned) \
MI_STAT_COUNT(segments_cache) \
MI_STAT_COUNT(_segments_reserved) \
/* only on v3 */ \
MI_STAT_COUNTER(pages_reclaim_on_alloc) \
MI_STAT_COUNTER(pages_reclaim_on_free) \
MI_STAT_COUNTER(pages_reabandon_full) \
MI_STAT_COUNTER(pages_unabandon_busy_wait) \
// Define the statistics structure
#define MI_BIN_HUGE (73U) // see types.h
#define MI_STAT_COUNT(stat) mi_stat_count_t stat;
#define MI_STAT_COUNTER(stat) mi_stat_counter_t stat;
typedef struct mi_stats_s
{
int version;
MI_STAT_FIELDS()
// future extension
mi_stat_count_t _stat_reserved[4];
mi_stat_counter_t _stat_counter_reserved[4];
// size segregated statistics
mi_stat_count_t malloc_bins[MI_BIN_HUGE+1]; // allocation per size bin
mi_stat_count_t page_bins[MI_BIN_HUGE+1]; // pages allocated per size bin
} mi_stats_t;
#undef MI_STAT_COUNT
#undef MI_STAT_COUNTER
// Exported definitions
#ifdef __cplusplus
extern "C" {
#endif
mi_decl_export void mi_stats_get( size_t stats_size, mi_stats_t* stats ) mi_attr_noexcept;
mi_decl_export char* mi_stats_get_json( size_t buf_size, char* buf ) mi_attr_noexcept; // use mi_free to free the result if the input buf == NULL
#ifdef __cplusplus
}
#endif
#endif // MIMALLOC_STATS_H

629
compat/mimalloc/mimalloc.h Normal file
View File

@ -0,0 +1,629 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2026, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 227 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
// ------------------------------------------------------
#ifdef __cplusplus
#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11
#define mi_attr_noexcept noexcept
#else
#define mi_attr_noexcept throw()
#endif
#else
#define mi_attr_noexcept
#endif
#if defined(__cplusplus) && (__cplusplus >= 201703)
#define mi_decl_nodiscard [[nodiscard]]
#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl
#define mi_decl_nodiscard __attribute__((warn_unused_result))
#elif defined(_HAS_NODISCARD)
#define mi_decl_nodiscard _NODISCARD
#elif (_MSC_VER >= 1700)
#define mi_decl_nodiscard _Check_return_
#else
#define mi_decl_nodiscard
#endif
#if defined(_MSC_VER) || defined(__MINGW32__)
#if !defined(MI_SHARED_LIB)
#define mi_decl_export
#elif defined(MI_SHARED_LIB_EXPORT)
#define mi_decl_export __declspec(dllexport)
#else
#define mi_decl_export __declspec(dllimport)
#endif
#if defined(__MINGW32__)
#define mi_decl_restrict
#define mi_attr_malloc __attribute__((malloc))
#else
#if (_MSC_VER >= 1900) && !defined(__EDG__)
#define mi_decl_restrict __declspec(allocator) __declspec(restrict)
#else
#define mi_decl_restrict __declspec(restrict)
#endif
#define mi_attr_malloc
#endif
#define mi_cdecl __cdecl
#define mi_attr_alloc_size(s)
#define mi_attr_alloc_size2(s1,s2)
#define mi_attr_alloc_align(p)
#elif defined(__GNUC__) // includes clang and icc
#if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT)
#define mi_decl_export __attribute__((visibility("default")))
#else
#define mi_decl_export
#endif
#define mi_cdecl // leads to warnings... __attribute__((cdecl))
#define mi_decl_restrict
#define mi_attr_malloc __attribute__((malloc))
#if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5)
#define mi_attr_alloc_size(s)
#define mi_attr_alloc_size2(s1,s2)
#define mi_attr_alloc_align(p)
#elif defined(__INTEL_COMPILER)
#define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
#define mi_attr_alloc_align(p)
#else
#define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
#define mi_attr_alloc_align(p) __attribute__((alloc_align(p)))
#endif
#else
#define mi_cdecl
#define mi_decl_export
#define mi_decl_restrict
#define mi_attr_malloc
#define mi_attr_alloc_size(s)
#define mi_attr_alloc_size2(s1,s2)
#define mi_attr_alloc_align(p)
#endif
// ------------------------------------------------------
// Includes
// ------------------------------------------------------
#include <stddef.h> // size_t
#include <stdbool.h> // bool
#include <stdint.h> // INTPTR_MAX
#ifdef __cplusplus
extern "C" {
#endif
// ------------------------------------------------------
// Standard malloc interface
// ------------------------------------------------------
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
mi_decl_export void mi_free(void* p) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc;
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
// ------------------------------------------------------
// Extended functionality
// ------------------------------------------------------
#define MI_SMALL_WSIZE_MAX (128)
#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*))
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept;
// ------------------------------------------------------
// Internals
// ------------------------------------------------------
typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept;
typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg);
mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
typedef void (mi_cdecl mi_error_fun)(int err, void* arg);
mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg);
mi_decl_export void mi_collect(bool force) mi_attr_noexcept;
mi_decl_export int mi_version(void) mi_attr_noexcept;
mi_decl_export void mi_stats_reset(void) mi_attr_noexcept;
mi_decl_export void mi_stats_merge(void) mi_attr_noexcept;
mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL
mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
mi_decl_export void mi_options_print(void) mi_attr_noexcept;
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
size_t* current_rss, size_t* peak_rss,
size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
// Generally do not use the following as these are usually called automatically
mi_decl_export void mi_process_init(void) mi_attr_noexcept;
mi_decl_export void mi_cdecl mi_process_done(void) mi_attr_noexcept;
mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
// -------------------------------------------------------------------------------------
// Aligned allocation
// Note that `alignment` always follows `size` for consistency with unaligned
// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
// -------------------------------------------------------------------------------------
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
// -----------------------------------------------------------------
// Return allocated block size (if the return value is not NULL)
// -----------------------------------------------------------------
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_umalloc(size_t size, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_ucalloc(size_t count, size_t size, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
mi_decl_nodiscard mi_decl_export void* mi_urealloc(void* p, size_t newsize, size_t* block_size_pre, size_t* block_size_post) mi_attr_noexcept mi_attr_alloc_size(2);
mi_decl_export void mi_ufree(void* p, size_t* block_size) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_umalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_uzalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_umalloc_small(size_t size, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_uzalloc_small(size_t size, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
// -------------------------------------------------------------------------------------
// Heaps: first-class, but can only allocate from the same thread that created it.
// -------------------------------------------------------------------------------------
struct mi_heap_s;
typedef struct mi_heap_s mi_heap_t;
mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void);
mi_decl_export void mi_heap_delete(mi_heap_t* heap);
mi_decl_export void mi_heap_destroy(mi_heap_t* heap);
mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap);
mi_decl_export mi_heap_t* mi_heap_get_default(void);
mi_decl_export mi_heap_t* mi_heap_get_backing(void);
mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc;
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
// --------------------------------------------------------------------------------
// Zero initialized re-allocation.
// Only valid on memory that was originally allocated with zero initialization too.
// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc.
// see <https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992>
// --------------------------------------------------------------------------------
mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4);
mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3);
mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5);
mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4);
// ------------------------------------------------------
// Analysis
// ------------------------------------------------------
mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
mi_decl_export bool mi_check_owned(const void* p);
// An area of heap space contains blocks of a single size.
typedef struct mi_heap_area_s {
void* blocks; // start of the area containing heap blocks
size_t reserved; // bytes reserved for this area (virtual)
size_t committed; // current available bytes for this area
size_t used; // number of allocated blocks
size_t block_size; // size in bytes of each block
size_t full_block_size; // size in bytes of a full block including padding and metadata.
int heap_tag; // heap tag associated with this area
} mi_heap_area_t;
typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// Experimental
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
mi_decl_export void mi_arenas_print(void) mi_attr_noexcept;
// Experimental: heaps associated with specific memory arena's
typedef int mi_arena_id_t;
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
#if MI_MALLOC_VERSION >= 182
// Create a heap that only allocates in the specified arena
mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
#endif
// Experimental: allow sub-processes whose memory areas stay separated (and no reclamation between them)
// Used for example for separate interpreters in one process.
typedef void* mi_subproc_id_t;
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
mi_decl_export mi_subproc_id_t mi_subproc_new(void);
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
// Experimental: visit abandoned heap areas (that are not owned by a specific heap)
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// Experimental: objects followed by a guard page.
// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed);
mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max);
// Experimental: communicate that the thread is part of a threadpool
mi_decl_export void mi_thread_set_in_threadpool(void) mi_attr_noexcept;
// Experimental: create a new heap with a specified heap tag. Set `allow_destroy` to false to allow the thread
// to reclaim abandoned memory (with a compatible heap_tag and arena_id) but in that case `mi_heap_destroy` will
// fall back to `mi_heap_delete`.
mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id);
// deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexcept;
// ------------------------------------------------------
// Convenience
// ------------------------------------------------------
#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp)))
#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp)))
#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp)))
#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp)))
#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp)))
#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp)))
#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp)))
#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp)))
// ------------------------------------------------------
// Options
// ------------------------------------------------------
typedef enum mi_option_e {
// stable options
mi_option_show_errors, // print error messages
mi_option_show_stats, // print statistics on termination
mi_option_verbose, // print verbose messages
// advanced options
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
mi_option_purge_decommits, // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit)
mi_option_allow_large_os_pages, // allow use of large (2 or 4 MiB) OS pages, implies eager commit.
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup (internally, this value is in KiB; use `mi_option_get_size`)
mi_option_deprecated_segment_cache,
mi_option_deprecated_page_reset,
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
mi_option_deprecated_segment_reset,
mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
mi_option_disallow_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100)
mi_option_max_errors, // issue at most N error messages
mi_option_max_warnings, // issue at most N warning messages
mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%)
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
mi_option_arena_reserve, // initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`)
mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10)
mi_option_purge_extend_delay,
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0)
mi_option_guarded_min, // only used when building with MI_GUARDED: minimal rounded object size for guarded objects (=0)
mi_option_guarded_max, // only used when building with MI_GUARDED: maximal rounded object size for guarded objects (=0)
mi_option_guarded_precise, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000)
mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
mi_option_target_segments_per_thread, // experimental (=0)
mi_option_generic_collect, // collect heaps every N (=10000) generic allocation calls
mi_option_allow_thp, // allow transparent huge pages? (=1) (on Android =0 by default). Set to 0 to disable THP for the process.
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,
mi_option_eager_region_commit = mi_option_arena_eager_commit,
mi_option_reset_decommits = mi_option_purge_decommits,
mi_option_reset_delay = mi_option_purge_delay,
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge,
mi_option_limit_os_alloc = mi_option_disallow_os_alloc
} mi_option_t;
mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option);
mi_decl_export void mi_option_enable(mi_option_t option);
mi_decl_export void mi_option_disable(mi_option_t option);
mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable);
mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable);
mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option);
mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max);
mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option);
mi_decl_export void mi_option_set(mi_option_t option, long value);
mi_decl_export void mi_option_set_default(mi_option_t option, long value);
// -------------------------------------------------------------------------------------------------------
// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions.
// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.)
// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing.
// -------------------------------------------------------------------------------------------------------
mi_decl_export void mi_cfree(void* p) mi_attr_noexcept;
mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept;
mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);
mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc;
mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc;
mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept;
mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept;
mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept;
mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept;
mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept;
// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`.
// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception).
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2);
mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
#ifdef __cplusplus
}
#endif
// ---------------------------------------------------------------------------------------------
// Implement the C++ std::allocator interface for use in STL containers.
// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally)
// ---------------------------------------------------------------------------------------------
#ifdef __cplusplus
#include <cstddef> // std::size_t
#include <cstdint> // PTRDIFF_MAX
#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11
#include <type_traits> // std::true_type
#include <utility> // std::forward
#endif
template<class T> struct _mi_stl_allocator_common {
typedef T value_type;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef value_type& reference;
typedef value_type const& const_reference;
typedef value_type* pointer;
typedef value_type const* const_pointer;
#if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
using propagate_on_container_copy_assignment = std::true_type;
using propagate_on_container_move_assignment = std::true_type;
using propagate_on_container_swap = std::true_type;
template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
#else
void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
void destroy(pointer p) { p->~value_type(); }
#endif
size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
pointer address(reference x) const { return &x; }
const_pointer address(const_reference x) const { return &x; }
};
template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
using typename _mi_stl_allocator_common<T>::size_type;
using typename _mi_stl_allocator_common<T>::value_type;
using typename _mi_stl_allocator_common<T>::pointer;
template <class U> struct rebind { typedef mi_stl_allocator<U> other; };
mi_stl_allocator() mi_attr_noexcept = default;
mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default;
template<class U> mi_stl_allocator(const mi_stl_allocator<U>&) mi_attr_noexcept { }
mi_stl_allocator select_on_container_copy_construction() const { return *this; }
void deallocate(T* p, size_type) { mi_free(p); }
#if (__cplusplus >= 201703L) // C++17
mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_new_n(count, sizeof(T))); }
mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
#else
mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_new_n(count, sizeof(value_type))); }
#endif
#if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
using is_always_equal = std::true_type;
#endif
};
template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11
#define MI_HAS_HEAP_STL_ALLOCATOR 1
#include <memory> // std::shared_ptr
// Common base class for STL allocators in a specific heap
template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
using typename _mi_stl_allocator_common<T>::size_type;
using typename _mi_stl_allocator_common<T>::value_type;
using typename _mi_stl_allocator_common<T>::pointer;
_mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp, [](mi_heap_t*) {}) {} /* will not delete nor destroy the passed in heap */
#if (__cplusplus >= 201703L) // C++17
mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
#else
mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
#endif
#if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
using is_always_equal = std::false_type;
#endif
void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); }
protected:
std::shared_ptr<mi_heap_t> heap;
template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
_mi_heap_stl_allocator_common() {
mi_heap_t* hp = mi_heap_new();
this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
}
_mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { }
private:
static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } }
static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
};
// STL allocator allocation in a specific heap
template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
using typename _mi_heap_stl_allocator_common<T, false>::size_type;
mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap
template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
void deallocate(T* p, size_type) { mi_free(p); }
template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
};
template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
// STL allocator allocation in a specific heap, where `free` does nothing and
// the heap is destroyed in one go on destruction -- use with care!
template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
using typename _mi_heap_stl_allocator_common<T, true>::size_type;
mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap
template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ }
template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
};
template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
#endif // C++11
#endif // __cplusplus
#endif

View File

@ -0,0 +1,557 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
// include windows.h or pthreads.h
#if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#elif !defined(__wasi__) && (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
// We base the primitives on the C/C++ atomics and create a minimal wrapper for MSVC in C compilation mode.
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
// instead of passing the memory order as a parameter.
// -----------------------------------------------------------------------------------------------
#if defined(__cplusplus)
// Use C++ atomics
#include <atomic>
#define _Atomic(tp) std::atomic<tp>
#define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name
#if (__cplusplus >= 202002L) // c++20, see issue #571
#define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x
#else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif
#elif defined(_MSC_VER)
// Use MSVC C wrapper for C11 atomics
#define _Atomic(tp) tp
#define MI_ATOMIC_VAR_INIT(x) x
#define mi_atomic(name) mi_atomic_##name
#define mi_memory_order(name) mi_memory_order_##name
#else
// Use C11 atomics
#include <stdatomic.h>
#define mi_atomic(name) atomic_##name
#define mi_memory_order(name) memory_order_##name
#if (__STDC_VERSION__ >= 201710L) // c17, see issue #735
#define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x
#else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif
#endif
// Various defines for all used memory orders in mimalloc
#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_exchange_relaxed(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1)
#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1)
#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1)
static inline void mi_atomic_yield(void);
static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add);
static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#if defined(__cplusplus) || !defined(_MSC_VER)
// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value)
// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
// In C++ we need to add casts to help resolve templates if NULL is passed
#if defined(__cplusplus)
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x)
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x)
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
#else
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
#endif
// These are used by the statistics
static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
}
static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
const int64_t add = mi_atomic_load_relaxed((_Atomic(int64_t)*)padd);
if (add != 0) {
mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
}
}
static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
}
// Used by timers
#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d)
#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i)
#elif defined(_MSC_VER)
// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics.
#include <intrin.h>
#ifdef _WIN64
typedef LONG64 msc_intptr_t;
#define MI_64(f) f##64
#else
typedef LONG msc_intptr_t;
#define MI_64(f) f
#endif
typedef enum mi_memory_order_e {
mi_memory_order_relaxed,
mi_memory_order_consume,
mi_memory_order_acquire,
mi_memory_order_release,
mi_memory_order_acq_rel,
mi_memory_order_seq_cst
} mi_memory_order;
static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) {
(void)(mo);
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
}
static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
(void)(mo);
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
}
static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
(void)(mo);
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
(void)(mo);
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
(void)(mo1); (void)(mo2);
uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
if (read == *expected) {
return true;
}
else {
*expected = read;
return false;
}
}
static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
}
static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) {
(void)(mo);
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
}
static inline void mi_atomic_thread_fence(mi_memory_order mo) {
(void)(mo);
_Atomic(uintptr_t) x = 0;
mi_atomic_exchange_explicit(&x, 1, mo);
}
static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
(void)(mo);
#if defined(_M_IX86) || defined(_M_X64)
return *p;
#else
uintptr_t x = *p;
if (mo > mi_memory_order_relaxed) {
while (!mi_atomic_compare_exchange_weak_explicit((_Atomic(uintptr_t)*)p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
}
return x;
#endif
}
static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
(void)(mo);
#if defined(_M_IX86) || defined(_M_X64)
*p = x;
#else
mi_atomic_exchange_explicit(p, x, mo);
#endif
}
static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) {
(void)(mo);
#if defined(_M_X64)
return *p;
#else
int64_t old = *p;
int64_t x = old;
while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
x = old;
}
return x;
#endif
}
static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) {
(void)(mo);
#if defined(x_M_IX86) || defined(_M_X64)
*p = x;
#else
InterlockedExchange64(p, x);
#endif
}
// These are used by the statistics
static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {
#ifdef _WIN64
return (int64_t)mi_atomic_addi((int64_t*)p, add);
#else
int64_t current;
int64_t sum;
do {
current = *p;
sum = current + add;
} while (_InterlockedCompareExchange64(p, sum, current) != current);
return current;
#endif
}
static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
const int64_t add = *padd;
if (add != 0) {
mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add);
}
}
static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
int64_t current;
do {
current = *p;
} while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
}
static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) {
mi_atomic_addi64_relaxed(p, i);
}
static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) {
int64_t read = _InterlockedCompareExchange64(p, des, *exp);
if (read == *exp) {
return true;
}
else {
*exp = read;
return false;
}
}
// The pointer macros cast to `uintptr_t`.
#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire))
#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed))
#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed))
#endif
// Atomically add a signed value; returns the previous value.
static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
}
// Atomically subtract a signed value; returns the previous value.
static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p, -sub);
}
// ----------------------------------------------------------------------
// Once and Guard
// ----------------------------------------------------------------------
typedef _Atomic(uintptr_t) mi_atomic_once_t;
// Returns true only on the first invocation
static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
}
typedef _Atomic(uintptr_t) mi_atomic_guard_t;
// Allows only one thread to execute at a time
#define mi_atomic_guard(guard) \
uintptr_t _mi_guard_expected = 0; \
for(bool _mi_guard_once = true; \
_mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \
(mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) )
// ----------------------------------------------------------------------
// Yield
// ----------------------------------------------------------------------
#if defined(__cplusplus)
#include <thread>
static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#elif defined(_WIN32)
static inline void mi_atomic_yield(void) {
YieldProcessor();
}
#elif defined(__SSE2__)
#include <emmintrin.h>
static inline void mi_atomic_yield(void) {
_mm_pause();
}
#elif (defined(__GNUC__) || defined(__clang__)) && \
(defined(__x86_64__) || defined(__i386__) || \
defined(__aarch64__) || defined(__arm__) || \
defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__))
#if defined(__x86_64__) || defined(__i386__)
static inline void mi_atomic_yield(void) {
__asm__ volatile ("pause" ::: "memory");
}
#elif defined(__aarch64__)
static inline void mi_atomic_yield(void) {
__asm__ volatile("wfe");
}
#elif defined(__arm__)
#if __ARM_ARCH >= 7
static inline void mi_atomic_yield(void) {
__asm__ volatile("yield" ::: "memory");
}
#else
static inline void mi_atomic_yield(void) {
__asm__ volatile ("nop" ::: "memory");
}
#endif
#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)
#ifdef __APPLE__
static inline void mi_atomic_yield(void) {
__asm__ volatile ("or r27,r27,r27" ::: "memory");
}
#else
static inline void mi_atomic_yield(void) {
__asm__ __volatile__ ("or 27,27,27" ::: "memory");
}
#endif
#endif
#elif defined(__sun)
// Fallback for other archs
#include <synch.h>
static inline void mi_atomic_yield(void) {
smt_pause();
}
#elif defined(__wasi__)
#include <sched.h>
static inline void mi_atomic_yield(void) {
sched_yield();
}
#else
#include <unistd.h>
static inline void mi_atomic_yield(void) {
sleep(0);
}
#endif
// ----------------------------------------------------------------------
// Locks
// These do not have to be recursive and should be light-weight
// in-process only locks. Only used for reserving arena's and to
// maintain the abandoned list.
// ----------------------------------------------------------------------
#if _MSC_VER
#pragma warning(disable:26110) // unlock with holding lock
#endif
#define mi_lock(lock) for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) )
#if defined(_WIN32)
#if 1
#define mi_lock_t SRWLOCK // slim reader-writer lock
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryAcquireSRWLockExclusive(lock);
}
static inline void mi_lock_acquire(mi_lock_t* lock) {
AcquireSRWLockExclusive(lock);
}
static inline void mi_lock_release(mi_lock_t* lock) {
ReleaseSRWLockExclusive(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
InitializeSRWLock(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#else
#define mi_lock_t CRITICAL_SECTION
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
static inline void mi_lock_acquire(mi_lock_t* lock) {
EnterCriticalSection(lock);
}
static inline void mi_lock_release(mi_lock_t* lock) {
LeaveCriticalSection(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
InitializeCriticalSection(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
DeleteCriticalSection(lock);
}
#endif
#elif defined(MI_USE_PTHREADS)
void _mi_error_message(int err, const char* fmt, ...);
#define mi_lock_t pthread_mutex_t
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
static inline void mi_lock_acquire(mi_lock_t* lock) {
const int err = pthread_mutex_lock(lock);
if (err != 0) {
_mi_error_message(err, "internal error: lock cannot be acquired\n");
}
}
static inline void mi_lock_release(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
pthread_mutex_init(lock, NULL);
}
static inline void mi_lock_done(mi_lock_t* lock) {
pthread_mutex_destroy(lock);
}
#elif defined(__cplusplus)
#include <mutex>
#define mi_lock_t std::mutex
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return lock->try_lock();
}
static inline void mi_lock_acquire(mi_lock_t* lock) {
lock->lock();
}
static inline void mi_lock_release(mi_lock_t* lock) {
lock->unlock();
}
static inline void mi_lock_init(mi_lock_t* lock) {
(void)(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#else
// fall back to poor man's locks.
// this should only be the case in a single-threaded environment (like __wasi__)
#define mi_lock_t _Atomic(uintptr_t)
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
}
static inline void mi_lock_acquire(mi_lock_t* lock) {
for (int i = 0; i < 1000; i++) { // for at most 1000 tries?
if (mi_lock_try_acquire(lock)) return;
mi_atomic_yield();
}
}
static inline void mi_lock_release(mi_lock_t* lock) {
mi_atomic_store_release(lock, (uintptr_t)0);
}
static inline void mi_lock_init(mi_lock_t* lock) {
mi_lock_release(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#endif
#endif // __MIMALLOC_ATOMIC_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,421 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_PRIM_H
#define MIMALLOC_PRIM_H
#include "internal.h" // mi_decl_hidden
// --------------------------------------------------------------------------
// This file specifies the primitive portability API.
// Each OS/host needs to implement these primitives, see `src/prim`
// for implementations on Window, macOS, WASI, and Linux/Unix.
//
// note: on all primitive functions, we always have result parameters != NULL, and:
// addr != NULL and page aligned
// size > 0 and page aligned
// the return value is an error code as an `int` where 0 is success
// --------------------------------------------------------------------------
// OS memory configuration
typedef struct mi_os_mem_config_s {
size_t page_size; // default to 4KiB
size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows)
size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB)
size_t physical_memory_in_kib; // physical memory size in KiB
size_t virtual_address_bits; // usually 48 or 56 bits on 64-bit systems. (used to determine secure randomization)
bool has_overcommit; // can we reserve more memory than can be actually committed?
bool has_partial_free; // can allocated blocks be freed partially? (true for mmap, false for VirtualAlloc)
bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory)
} mi_os_mem_config_t;
// Initialize
void _mi_prim_mem_init( mi_os_mem_config_t* config );
// Free OS memory
int _mi_prim_free(void* addr, size_t size );
// Allocate OS memory. Return NULL on error.
// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
// which will later be committed explicitly using `_mi_prim_commit`.
// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
// The `hint_addr` address is either `NULL` or a preferred allocation address but can be ignored.
// pre: !commit => !allow_large
// try_alignment >= _mi_os_page_size() and a power of 2
int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);
// Commit memory. Returns error code or 0 on success.
// For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows)
int _mi_prim_commit(void* addr, size_t size, bool* is_zero);
// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true
// if the memory would need to be re-committed. For example, on Windows this is always true,
// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit.
// pre: needs_recommit != NULL
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);
// Reset memory. The range keeps being accessible but the content might be reset to zero at any moment.
// Returns error code or 0 on success.
int _mi_prim_reset(void* addr, size_t size);
// Reuse memory. This is called for memory that is already committed but
// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false.
// Returns error code or 0 on success. On most platforms this is a no-op.
int _mi_prim_reuse(void* addr, size_t size);
// Protect memory. Returns error code or 0 on success.
int _mi_prim_protect(void* addr, size_t size, bool protect);
// Allocate huge (1GiB) pages possibly associated with a NUMA node.
// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
// pre: size > 0 and a multiple of 1GiB.
// numa_node is either negative (don't care), or a numa node number.
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr);
// Return the current NUMA node
size_t _mi_prim_numa_node(void);
// Return the number of logical NUMA nodes
size_t _mi_prim_numa_node_count(void);
// Clock ticks
mi_msecs_t _mi_prim_clock_now(void);
// Return process information (only for statistics)
typedef struct mi_process_info_s {
mi_msecs_t elapsed;
mi_msecs_t utime;
mi_msecs_t stime;
size_t current_rss;
size_t peak_rss;
size_t current_commit;
size_t peak_commit;
size_t page_faults;
} mi_process_info_t;
void _mi_prim_process_info(mi_process_info_t* pinfo);
// Default stderr output. (only for warnings etc. with verbose enabled)
// msg != NULL && _mi_strlen(msg) > 0
void _mi_prim_out_stderr( const char* msg );
// Get an environment variable. (only for options)
// name != NULL, result != NULL, result_size >= 64
bool _mi_prim_getenv(const char* name, char* result, size_t result_size);
// Fill a buffer with strong randomness; return `false` on error or if
// there is no strong randomization available.
bool _mi_prim_random_buf(void* buf, size_t buf_len);
// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination.
void _mi_prim_thread_init_auto_done(void);
// Called on process exit and may take action to clean up resources associated with the thread auto done.
void _mi_prim_thread_done_auto_done(void);
// Called when the default heap for a thread changes
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
//-------------------------------------------------------------------
// Access to TLS (thread local storage) slots.
// We need fast access to both a unique thread id (in `free.c:mi_free`) and
// to a thread-local heap pointer (in `alloc.c:mi_malloc`).
// To achieve this we use specialized code for various platforms.
//-------------------------------------------------------------------
// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
//
// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly,
// but unfortunately we can not detect support reliably (see issue #883)
// We also use it on Apple OS as we use a TLS slot for the default heap there.
#if defined(__GNUC__) && ( \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \
|| (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
)
#define MI_HAS_TLS_SLOT 1
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
void* res;
const size_t ofs = (slot*sizeof(void*));
#if defined(__i386__)
__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS
#elif defined(__APPLE__) && defined(__x86_64__)
__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
__asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI
#elif defined(__x86_64__)
__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
void** tcb; MI_UNUSED(ofs);
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
res = tcb[slot];
#elif defined(__aarch64__)
void** tcb; MI_UNUSED(ofs);
#if defined(__APPLE__) // M1, issue #343
__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
#else
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
#endif
res = tcb[slot];
#elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
MI_UNUSED(ofs);
res = pthread_getspecific(slot);
#endif
return res;
}
// setting a tls slot is only used on macOS for now
static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
const size_t ofs = (slot*sizeof(void*));
#if defined(__i386__)
__asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
#elif defined(__APPLE__) && defined(__x86_64__)
__asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
__asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI
#elif defined(__x86_64__)
__asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
void** tcb; MI_UNUSED(ofs);
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
tcb[slot] = value;
#elif defined(__aarch64__)
void** tcb; MI_UNUSED(ofs);
#if defined(__APPLE__) // M1, issue #343
__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
#else
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
#endif
tcb[slot] = value;
#elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
MI_UNUSED(ofs);
pthread_setspecific(slot, value);
#endif
}
#elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS)
// On windows we can store the thread-local heap at a fixed TLS slot to avoid
// thread-local initialization checks in the fast path.
// We allocate a user TLS slot at process initialization (see `windows/prim.c`)
// and store the offset `_mi_win_tls_offset`.
#define MI_HAS_TLS_SLOT 1 // 2 = we can reliably initialize the slot (saving a test on each malloc)
extern mi_decl_hidden size_t _mi_win_tls_offset;
#if MI_WIN_USE_FIXED_TLS > 1
#define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS)
#elif MI_SIZE_SIZE == 4
#define MI_TLS_SLOT (0x0E10 + _mi_win_tls_offset) // User TLS slots <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
#else
#define MI_TLS_SLOT (0x1480 + _mi_win_tls_offset) // User TLS slots <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
#endif
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
#if (_M_X64 || _M_AMD64) && !defined(_M_ARM64EC)
return (void*)__readgsqword((unsigned long)slot); // direct load at offset from gs
#elif _M_IX86 && !defined(_M_ARM64EC)
return (void*)__readfsdword((unsigned long)slot); // direct load at offset from fs
#else
return ((void**)NtCurrentTeb())[slot / sizeof(void*)];
#endif
}
static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
((void**)NtCurrentTeb())[slot / sizeof(void*)] = value;
}
#endif
//-------------------------------------------------------------------
// Get a fast unique thread id.
//
// Getting the thread id should be performant as it is called in the
// fast path of `_mi_free` and we specialize for various platforms as
// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
// We only require _mi_prim_thread_id() to return a unique id
// for each thread (unequal to zero).
//-------------------------------------------------------------------
// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
// Nevertheless, it seems needed on older graviton platforms (see issue #851).
// For now, we only enable this for specific platforms.
#if !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
&& !defined(__CYGWIN__) \
&& !defined(MI_LIBC_MUSL) \
&& (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */
#if (defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__)) /* aarch64 for older gcc versions (issue #851) */ \
|| (defined(__GNUC__) && (__GNUC__ >= 11) && defined(__x86_64__)) \
|| (defined(__clang_major__) && (__clang_major__ >= 14) && (defined(__aarch64__) || defined(__x86_64__)))
#define MI_USE_BUILTIN_THREAD_POINTER 1
#endif
#endif
// defined in `init.c`; do not use these directly
extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
// Get a unique id for the current thread.
#if defined(MI_PRIM_THREAD_ID)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
return MI_PRIM_THREAD_ID(); // used for example by CPython for a free threaded build (see python/cpython#115488)
}
#elif defined(_WIN32)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif MI_USE_BUILTIN_THREAD_POINTER
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms with recent compilers
return (uintptr_t)__builtin_thread_pointer();
}
#elif MI_HAS_TLS_SLOT
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#if defined(__BIONIC__)
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
// see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
return (uintptr_t)mi_prim_tls_slot(1);
#else
// in all our other targets, slot 0 is the thread id
// glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
// apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
return (uintptr_t)mi_prim_tls_slot(0);
#endif
}
#else
// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
return (uintptr_t)&_mi_heap_default;
}
#endif
/* ----------------------------------------------------------------------------------------
Get the thread local default heap: `_mi_prim_get_default_heap()`
This is inlined here as it is on the fast path for allocation functions.
On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
that the storage will always be available (allocated on the thread stacks).
On some platforms though we cannot use that when overriding `malloc` since the underlying
TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
We try to circumvent this in an efficient way:
- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
loader itself calls `malloc` even before the modules are initialized.
- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
------------------------------------------------------------------------------------------- */
static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
// #elif defined(__DragonFly__)
// #warning "mimalloc is not working correctly on DragonFly yet."
// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
#elif defined(__ANDROID__)
// See issue #381
#define MI_TLS_PTHREAD
#endif
#endif
#if MI_TLS_SLOT
# if !defined(MI_HAS_TLS_SLOT)
# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined"
# endif
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);
#if MI_HAS_TLS_SLOT == 1 // check if the TLS slot is initialized
if mi_unlikely(heap == NULL) {
#ifdef __GNUC__
__asm(""); // prevent conditional load of the address of _mi_heap_empty
#endif
heap = (mi_heap_t*)&_mi_heap_empty;
}
#endif
return heap;
}
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) {
pthread_t self = pthread_self();
#if defined(__DragonFly__)
if (self==NULL) return NULL;
#endif
return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
}
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot();
if mi_unlikely(pheap == NULL) return _mi_heap_main_get();
mi_heap_t* heap = *pheap;
if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty;
return heap;
}
#elif defined(MI_TLS_PTHREAD)
extern mi_decl_hidden pthread_key_t _mi_heap_default_key;
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
}
#else // default using a thread local variable; used on most platforms.
static inline mi_heap_t* mi_prim_get_default_heap(void) {
#if defined(MI_TLS_RECURSE_GUARD)
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
#endif
return _mi_heap_default;
}
#endif // mi_prim_get_default_heap()
#endif // MIMALLOC_PRIM_H

View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_TRACK_H
#define MIMALLOC_TRACK_H
/* ------------------------------------------------------------------------------------------------------
Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
These can be defined for tracking allocation:
#define mi_track_malloc_size(p,reqsize,size,zero)
#define mi_track_free_size(p,_size)
The macros are set up such that the size passed to `mi_track_free_size`
always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`).
The `reqsize` is what the user requested, and `size >= reqsize`.
The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled,
or otherwise it is the usable block size which may be larger than the original request.
Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc).
The `zero` parameter is `true` if the allocated block is zero initialized.
Optional:
#define mi_track_align(p,alignedp,offset,size)
#define mi_track_resize(p,oldsize,newsize)
#define mi_track_init()
The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block.
The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`).
The `mi_track_resize` is currently unused but could be called on reallocations within a block.
`mi_track_init` is called at program start.
The following macros are for tools like asan and valgrind to track whether memory is
defined, undefined, or not accessible at all:
#define mi_track_mem_defined(p,size)
#define mi_track_mem_undefined(p,size)
#define mi_track_mem_noaccess(p,size)
-------------------------------------------------------------------------------------------------------*/
#if MI_TRACK_VALGRIND
// valgrind tool
#define MI_TRACK_ENABLED 1
#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy
#define MI_TRACK_TOOL "valgrind"
#include <valgrind/valgrind.h>
#include <valgrind/memcheck.h>
#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size)
#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size)
#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size)
#elif MI_TRACK_ASAN
// address sanitizer
#define MI_TRACK_ENABLED 1
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_TOOL "asan"
#include <sanitizer/asan_interface.h>
#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size)
#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size)
#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size)
#elif MI_TRACK_ETW
// windows event tracing
#define MI_TRACK_ENABLED 1
#define MI_TRACK_HEAP_DESTROY 1
#define MI_TRACK_TOOL "ETW"
#include "../src/prim/windows/etw.h"
#define mi_track_init() EventRegistermicrosoft_windows_mimalloc();
#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size)
#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size)
#else
// no tracking
#define MI_TRACK_ENABLED 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_TOOL "none"
#define mi_track_malloc_size(p,reqsize,size,zero)
#define mi_track_free_size(p,_size)
#endif
// -------------------
// Utility definitions
#ifndef mi_track_resize
#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false)
#endif
#ifndef mi_track_align
#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset)
#endif
#ifndef mi_track_init
#define mi_track_init()
#endif
#ifndef mi_track_mem_defined
#define mi_track_mem_defined(p,size)
#endif
#ifndef mi_track_mem_undefined
#define mi_track_mem_undefined(p,size)
#endif
#ifndef mi_track_mem_noaccess
#define mi_track_mem_noaccess(p,size)
#endif
#if MI_PADDING
#define mi_track_malloc(p,reqsize,zero) \
if ((p)!=NULL) { \
mi_assert_internal(mi_usable_size(p)==(reqsize)); \
mi_track_malloc_size(p,reqsize,reqsize,zero); \
}
#else
#define mi_track_malloc(p,reqsize,zero) \
if ((p)!=NULL) { \
mi_assert_internal(mi_usable_size(p)>=(reqsize)); \
mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \
}
#endif
#endif

View File

@ -0,0 +1,687 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_TYPES_H
#define MIMALLOC_TYPES_H
// --------------------------------------------------------------------------
// This file contains the main type definitions for mimalloc:
// mi_heap_t : all data for a thread-local heap, contains
// lists of all managed heap pages.
// mi_segment_t : a larger chunk of memory (32GiB) from where pages
// are allocated. A segment is divided in slices (64KiB) from
// which pages are allocated.
// mi_page_t : a "mimalloc" page (usually 64KiB or 512KiB) from
// where objects are allocated.
// Note: we write "OS page" for OS memory pages while
// using plain "page" for mimalloc pages (`mi_page_t`).
// --------------------------------------------------------------------------
#include <mimalloc-stats.h>
#include <stddef.h> // ptrdiff_t
#include <stdint.h> // uintptr_t, uint16_t, etc
#include <stdbool.h> // bool
#include "atomic.h" // _Atomic
#ifdef _MSC_VER
#pragma warning(disable:4214) // bitfield is not int
#endif
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `sizeof(void*)`
#ifndef MI_MAX_ALIGN_SIZE
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
#endif
// ------------------------------------------------------
// Variants
// ------------------------------------------------------
// Define NDEBUG in the release version to disable assertions.
// #define NDEBUG
// Define MI_TRACK_<tool> to enable tracking support
// #define MI_TRACK_VALGRIND 1
// #define MI_TRACK_ASAN 1
// #define MI_TRACK_ETW 1
// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
// #define MI_STAT 1
// Define MI_SECURE to enable security mitigations
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
#if !defined(MI_SECURE)
#define MI_SECURE 0
#endif
// Define MI_DEBUG for debug mode
// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
// #define MI_DEBUG 2 // + internal assertion checks
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
#if !defined(MI_DEBUG)
#if defined(MI_BUILD_RELEASE) || defined(NDEBUG)
#define MI_DEBUG 0
#else
#define MI_DEBUG 2
#endif
#endif
// Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options)
// Padding should be disabled when using guard pages
// #define MI_GUARDED 1
#if defined(MI_GUARDED)
#define MI_PADDING 0
#endif
// Reserve extra padding at the end of each block to be more resilient against heap block overflows.
// The padding can detect buffer overflow on free.
#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW))
#define MI_PADDING 1
#endif
// Check padding bytes; allows byte-precise buffer overflow detection
#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1)
#define MI_PADDING_CHECK 1
#endif
// Encoded free lists allow detection of corrupted free lists
// and can detect buffer overflows, modify after free, and double `free`s.
#if (MI_SECURE>=3 || MI_DEBUG>=1)
#define MI_ENCODE_FREELIST 1
#endif
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
// the owning thread).
// #define MI_HUGE_PAGE_ABANDON 1
// ------------------------------------------------------
// Platform specific values
// ------------------------------------------------------
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
#else
#error platform pointers must be 32, 64, or 128 bits
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits
#endif
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
# define MI_ZI(x) x##LL
#else
# define MI_ZU(x) x##UL
# define MI_ZI(x) x##L
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
// ------------------------------------------------------
// Main internal data-structures
// ------------------------------------------------------
// Main tuning parameters for segment and page sizes
// Sizes for 64-bit (usually divide by two for 32-bit)
#ifndef MI_SEGMENT_SLICE_SHIFT
#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit)
#endif
#ifndef MI_SEGMENT_SHIFT
#if MI_INTPTR_SIZE > 4
#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB
#else
#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit
#endif
#endif
#ifndef MI_SMALL_PAGE_SHIFT
#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB
#endif
#ifndef MI_MEDIUM_PAGE_SHIFT
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
#endif
// Derived constants
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1))
#define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/8) // 8 KiB on 64-bit
#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64 KiB on 64-bit
#define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 16 MiB on 64-bit
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
#if MI_BIN_HUGE != 73U
#error "mimalloc internal: expecting 73 bins"
#endif
#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360)
#error "mimalloc internal: define more bins"
#endif
// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
// Maximum slice count (255) for which we can find the page for interior pointers
#define MI_MAX_SLICE_OFFSET_COUNT ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
// we never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
// on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877)
#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX))
#define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1))
#else
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
#endif
// ------------------------------------------------------
// Mimalloc pages contain allocated blocks
// ------------------------------------------------------
// The free lists use encoded next fields
// (Only actually encodes when MI_ENCODED_FREELIST is defined.)
typedef uintptr_t mi_encoded_t;
// thread id's
typedef size_t mi_threadid_t;
// free lists contain blocks
typedef struct mi_block_s {
mi_encoded_t next;
} mi_block_t;
#if MI_GUARDED
// we always align guarded pointers in a block at an offset
// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0))
#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED)
#endif
// The delayed flags are used for efficient multi-threaded free-ing
typedef enum mi_delayed_e {
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abandoned pages without a owning heap; this only resets on page reclaim
} mi_delayed_t;
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
#if !MI_TSAN
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
#else
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
typedef union mi_page_flags_s {
uint32_t full_aligned;
struct {
uint8_t in_full;
uint8_t has_aligned;
} x;
} mi_page_flags_t;
#endif
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
typedef uintptr_t mi_thread_free_t;
// A page contains blocks of one specific size (`block_size`).
// Each page has three list of free blocks:
// `free` for blocks that can be allocated,
// `local_free` for freed blocks that are not yet available to `mi_malloc`
// `thread_free` for freed blocks by other threads
// The `local_free` and `thread_free` lists are migrated to the `free` list
// when it is exhausted. The separate `local_free` list is necessary to
// implement a monotonic heartbeat. The `thread_free` list is needed for
// avoiding atomic operations in the common case.
//
// `used - |thread_free|` == actual blocks that are in use (alive)
// `used - |thread_free| + |free| + |local_free| == capacity`
//
// We don't count `freed` (as |free|) but use `used` to reduce
// the number of memory accesses in the `mi_page_all_free` function(s).
//
// Notes:
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
// - The size is 12 words on 64-bit which helps the page index calculations
// (and 14 words on 32-bit, and encoded free lists add 2 words)
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
// "owned" by the segment
uint32_t slice_count; // slices in this page (0 if not a page)
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_committed:1; // `true` if the page virtual memory is committed
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
uint8_t is_huge:1; // `true` if the page is in a huge segment (`segment->kind == MI_SEGMENT_HUGE`)
// padding
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// 64-bit 11 words, 32-bit 13 words, (+2 for secure)
void* padding[1];
} mi_page_t;
// ------------------------------------------------------
// Mimalloc segments contain mimalloc pages
// ------------------------------------------------------
typedef enum mi_page_kind_e {
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
} mi_page_kind_t;
typedef enum mi_segment_kind_e {
MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
MI_SEGMENT_HUGE, // segment with just one huge page inside.
} mi_segment_kind_t;
// ------------------------------------------------------
// A segment holds a commit mask where a bit is set if
// the corresponding MI_COMMIT_SIZE area is committed.
// The MI_COMMIT_SIZE must be a multiple of the slice
// size. If it is equal we have the most fine grained
// decommit (but setting it higher can be more efficient).
// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
// be committed in one go which can be set higher than
// MI_COMMIT_SIZE for efficiency (while the decommit mask
// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
// ------------------------------------------------------
#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE)
#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB
#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS
#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS))
#error "the segment size must be exactly divisible by the (commit size * size_t bits)"
#endif
typedef struct mi_commit_mask_s {
size_t mask[MI_COMMIT_MASK_FIELD_COUNT];
} mi_commit_mask_t;
typedef mi_page_t mi_slice_t;
typedef int64_t mi_msecs_t;
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
typedef struct mi_memid_os_info {
void* base; // actual base address of the block (used for offset aligned allocations)
size_t size; // full allocation size
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
size_t block_index; // index in the arena
mi_arena_id_t id; // arena id (>= 1)
bool is_exclusive; // this arena can only be used for specific arena allocations
} mi_memid_arena_info_t;
typedef struct mi_memid_s {
union {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
} mem;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
mi_memkind_t memkind;
} mi_memid_t;
// -----------------------------------------------------------------------------------------
// Segments are large allocated memory blocks (32mb on 64 bit) from arenas or the OS.
//
// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks.
// The start of a segment is this structure with a fixed number of slice entries (`slices`)
// usually followed by a guard OS page and the actual allocation area with pages.
// While a page is not allocated, we view it's data as a `mi_slice_t` (instead of a `mi_page_t`).
// Of any free area, the first slice has the info and `slice_offset == 0`; for any subsequent
// slices part of the area, the `slice_offset` is the byte offset back to the first slice
// (so we can quickly find the page info on a free, `internal.h:_mi_segment_page_of`).
// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`).
// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while
// large and huge pages span a variable amount of slices.
typedef struct mi_subproc_s mi_subproc_t;
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // memory id for arena/OS allocation
bool allow_decommit; // can we decommmit the memory
bool allow_purge; // can we purge the memory (reset or decommit)
size_t segment_size;
mi_subproc_t* subproc; // segment belongs to sub process
// segment fields
mi_msecs_t purge_expire; // purge slices in the `purge_mask` after this time
mi_commit_mask_t purge_mask; // slices that can be purged
mi_commit_mask_t commit_mask; // slices that are currently committed
// from here is zero initialized
struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation)
bool dont_free; // can be temporarily true to ensure the segment is not freed
bool free_is_zero; // if free spans are zero
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long)
size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
struct mi_segment_s* abandoned_os_prev;
size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages.
// layout like this to optimize access in `mi_free`
mi_segment_kind_t kind;
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment
} mi_segment_t;
// ------------------------------------------------------
// Heaps
// Provide first-class heaps to allocate from.
// A heap just owns a set of pages for allocation and
// can only be allocate/reallocate from the thread that created it.
// Freeing blocks can be done from any thread though.
// Per thread, the segments are shared among its heaps.
// Per thread, there is always a default heap that is
// used for allocation; it is initialized to statically
// point to an empty heap to avoid initialization checks
// in the fast path.
// ------------------------------------------------------
// Thread local data
typedef struct mi_tld_s mi_tld_t;
// Pages of a certain block size are held in a queue.
typedef struct mi_page_queue_s {
mi_page_t* first;
mi_page_t* last;
size_t block_size;
} mi_page_queue_t;
#define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context
typedef struct mi_random_cxt_s {
uint32_t input[16];
uint32_t output[16];
int output_available;
bool weak;
} mi_random_ctx_t;
// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
#if (MI_PADDING)
typedef struct mi_padding_s {
uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
} mi_padding_t;
#define MI_PADDING_SIZE (sizeof(mi_padding_t))
#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE)
#else
#define MI_PADDING_SIZE 0
#define MI_PADDING_WSIZE 0
#endif
#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1)
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
_Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
size_t page_count; // total number of pages in the `pages` queues.
size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues)
size_t page_retired_max; // largest retired index into the `pages` array.
long generic_count; // how often is `_mi_malloc_generic` called?
long generic_collect_count; // how often is `_mi_malloc_generic` called without collecting?
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
uint8_t tag; // custom tag, can be used for separating heaps based on the object types
#if MI_GUARDED
size_t guarded_size_min; // minimal size for guarded objects
size_t guarded_size_max; // maximal size for guarded objects
size_t guarded_sample_rate; // sample rate (set to 0 to disable guarded pages)
size_t guarded_sample_count; // current sample count (counting down to 0)
#endif
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
};
// ------------------------------------------------------
// Sub processes do not reclaim or visit segments
// from other sub processes. These are essentially the
// static variables of a process.
// ------------------------------------------------------
struct mi_subproc_s {
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sub-process
_Atomic(size_t) abandoned_os_list_count; // count of abandoned segments in the os-list
mi_lock_t abandoned_os_lock; // lock for the abandoned os segment list (outside of arena's) (this lock protect list operations)
mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list
mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
mi_segment_t* abandoned_os_list_tail; // the tail-end of the list
mi_memid_t memid; // provenance of this memory block
};
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------
// A "span" is is an available range of slices. The span queues keep
// track of slice spans of at most the given `slice_count` (but more than the previous size class).
typedef struct mi_span_queue_s {
mi_slice_t* first;
mi_slice_t* last;
size_t slice_count;
} mi_span_queue_t;
#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
// Segments thread local data
typedef struct mi_segments_tld_s {
mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments
size_t count; // current number of segments;
size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_stats_t* stats; // points to tld stats
} mi_segments_tld_t;
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
mi_segments_tld_t segments; // segment tld
mi_stats_t stats; // statistics
};
// ------------------------------------------------------
// Debug
// ------------------------------------------------------
#if !defined(MI_DEBUG_UNINIT)
#define MI_DEBUG_UNINIT (0xD0)
#endif
#if !defined(MI_DEBUG_FREED)
#define MI_DEBUG_FREED (0xDF)
#endif
#if !defined(MI_DEBUG_PADDING)
#define MI_DEBUG_PADDING (0xDE)
#endif
// ------------------------------------------------------
// Statistics
// ------------------------------------------------------
#ifndef MI_STAT
#if (MI_DEBUG>0)
#define MI_STAT 2
#else
#define MI_STAT 0
#endif
#endif
// add to stat keeping track of the peak
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
// counters can just be increased
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#if (MI_STAT)
#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount)
#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
#else
#define mi_stat_increase(stat,amount) ((void)0)
#define mi_stat_decrease(stat,amount) ((void)0)
#define mi_stat_adjust_decrease(stat,amount) ((void)0)
#define mi_stat_counter_increase(stat,amount) ((void)0)
#endif
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_adjust_decrease(heap,stat,amount) mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount)
#endif

670
compat/mimalloc/options.c Normal file
View File

@ -0,0 +1,670 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // mi_prim_out_stderr
#include <stdio.h> // stdin/stdout
#include <stdlib.h> // abort
static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit)
static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit)
static void mi_add_stderr_output(void);
int mi_version(void) mi_attr_noexcept {
return MI_MALLOC_VERSION;
}
// --------------------------------------------------------
// Options
// These can be accessed by multiple threads and may be
// concurrently initialized, but an initializing data race
// is ok since they resolve to the same value.
// --------------------------------------------------------
typedef enum mi_init_e {
UNINIT, // not yet initialized
DEFAULTED, // not found in the environment, use default value
INITIALIZED // found in environment or set explicitly
} mi_init_t;
typedef struct mi_option_desc_s {
long value; // the value
mi_init_t init; // is it initialized yet? (from the environment)
mi_option_t option; // for debugging: the option index should match the option
const char* name; // option name without `mimalloc_` prefix
const char* legacy_name; // potential legacy option name
} mi_option_desc_t;
#define MI_OPTION(opt) mi_option_##opt, #opt, NULL
#define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy
// Some options can be set at build time for statically linked libraries
// (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`)
//
// This is useful if we cannot pass them as environment variables
// (and setting them programmatically would be too late)
#ifndef MI_DEFAULT_VERBOSE
#define MI_DEFAULT_VERBOSE 0
#endif
#ifndef MI_DEFAULT_EAGER_COMMIT
#define MI_DEFAULT_EAGER_COMMIT 1
#endif
#ifndef MI_DEFAULT_ARENA_EAGER_COMMIT
#define MI_DEFAULT_ARENA_EAGER_COMMIT 2
#endif
// in KiB
#ifndef MI_DEFAULT_ARENA_RESERVE
#if (MI_INTPTR_SIZE>4)
#define MI_DEFAULT_ARENA_RESERVE 1024L*1024L
#else
#define MI_DEFAULT_ARENA_RESERVE 128L*1024L
#endif
#endif
#ifndef MI_DEFAULT_DISALLOW_ARENA_ALLOC
#define MI_DEFAULT_DISALLOW_ARENA_ALLOC 0
#endif
#ifndef MI_DEFAULT_ALLOW_LARGE_OS_PAGES
#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 0
#endif
#ifndef MI_DEFAULT_RESERVE_HUGE_OS_PAGES
#define MI_DEFAULT_RESERVE_HUGE_OS_PAGES 0
#endif
#ifndef MI_DEFAULT_RESERVE_OS_MEMORY
#define MI_DEFAULT_RESERVE_OS_MEMORY 0
#endif
#ifndef MI_DEFAULT_GUARDED_SAMPLE_RATE
#if MI_GUARDED
#define MI_DEFAULT_GUARDED_SAMPLE_RATE 4000
#else
#define MI_DEFAULT_GUARDED_SAMPLE_RATE 0
#endif
#endif
#ifndef MI_DEFAULT_ALLOW_THP
#if defined(__ANDROID__)
#define MI_DEFAULT_ALLOW_THP 0
#else
#define MI_DEFAULT_ALLOW_THP 1
#endif
#endif
// Static options
static mi_option_desc_t options[_mi_option_last] =
{
// stable options
#if MI_DEBUG || defined(MI_SHOW_ERRORS)
{ 1, UNINIT, MI_OPTION(show_errors) },
#else
{ 0, UNINIT, MI_OPTION(show_errors) },
#endif
{ 0, UNINIT, MI_OPTION(show_stats) },
{ MI_DEFAULT_VERBOSE, UNINIT, MI_OPTION(verbose) },
// some of the following options are experimental and not all combinations are allowed.
{ MI_DEFAULT_EAGER_COMMIT,
UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`)
{ MI_DEFAULT_ARENA_EAGER_COMMIT,
UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux)
{ 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit)
{ MI_DEFAULT_ALLOW_LARGE_OS_PAGES,
UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ MI_DEFAULT_RESERVE_HUGE_OS_PAGES,
UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
{-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
{ MI_DEFAULT_RESERVE_OS_MEMORY,
UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`)
{ 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit)
#if defined(__NetBSD__)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
#else
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
#endif
{ 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 32, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output
{ 32, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try.
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
{ MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
{ 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
#if defined(MI_VISIT_ABANDONED)
{ 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandoned segments; requires taking locks during reclaim.
#else
{ 0, UNINIT, MI_OPTION(visit_abandoned) },
#endif
{ 0, UNINIT, MI_OPTION(guarded_min) }, // only used when building with MI_GUARDED: minimal rounded object size for guarded objects
{ MI_GiB, UNINIT, MI_OPTION(guarded_max) }, // only used when building with MI_GUARDED: maximal rounded object size for guarded objects
{ 0, UNINIT, MI_OPTION(guarded_precise) }, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
{ MI_DEFAULT_GUARDED_SAMPLE_RATE,
UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded (=4000)
{ 0, UNINIT, MI_OPTION(guarded_sample_seed)},
{ 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable.
{ 10000, UNINIT, MI_OPTION(generic_collect) }, // collect heaps every N (=10000) generic allocation calls
{ MI_DEFAULT_ALLOW_THP,
UNINIT, MI_OPTION(allow_thp) } // allow transparent huge pages?
};
static void mi_option_init(mi_option_desc_t* desc);
static bool mi_option_has_size_in_kib(mi_option_t option) {
return (option == mi_option_reserve_os_memory || option == mi_option_arena_reserve);
}
void _mi_options_init(void) {
// called on process load
mi_add_stderr_output(); // now it safe to use stderr for output
for(int i = 0; i < _mi_option_last; i++ ) {
mi_option_t option = (mi_option_t)i;
long l = mi_option_get(option); MI_UNUSED(l); // initialize
}
mi_max_error_count = mi_option_get(mi_option_max_errors);
mi_max_warning_count = mi_option_get(mi_option_max_warnings);
#if MI_GUARDED
if (mi_option_get(mi_option_guarded_sample_rate) > 0) {
if (mi_option_is_enabled(mi_option_allow_large_os_pages)) {
mi_option_disable(mi_option_allow_large_os_pages);
_mi_warning_message("option 'allow_large_os_pages' is disabled to allow for guarded objects\n");
}
}
#endif
if (mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); }
}
#define mi_stringifyx(str) #str // and stringify
#define mi_stringify(str) mi_stringifyx(str) // expand
void mi_options_print(void) mi_attr_noexcept
{
// show version
const int vermajor = MI_MALLOC_VERSION/100;
const int verminor = (MI_MALLOC_VERSION%100)/10;
const int verpatch = (MI_MALLOC_VERSION%10);
_mi_message("v%i.%i.%i%s%s (built on %s, %s)\n", vermajor, verminor, verpatch,
#if defined(MI_CMAKE_BUILD_TYPE)
", " mi_stringify(MI_CMAKE_BUILD_TYPE)
#else
""
#endif
,
#if defined(MI_GIT_DESCRIBE)
", git " mi_stringify(MI_GIT_DESCRIBE)
#else
""
#endif
, __DATE__, __TIME__);
// show options
for (int i = 0; i < _mi_option_last; i++) {
mi_option_t option = (mi_option_t)i;
long l = mi_option_get(option); MI_UNUSED(l); // possibly initialize
mi_option_desc_t* desc = &options[option];
_mi_message("option '%s': %ld %s\n", desc->name, desc->value, (mi_option_has_size_in_kib(option) ? "KiB" : ""));
}
// show build configuration
_mi_message("debug level : %d\n", MI_DEBUG );
_mi_message("secure level: %d\n", MI_SECURE );
_mi_message("mem tracking: %s\n", MI_TRACK_TOOL);
#if MI_GUARDED
_mi_message("guarded build: %s\n", mi_option_get(mi_option_guarded_sample_rate) != 0 ? "enabled" : "disabled");
#endif
#if MI_TSAN
_mi_message("thread santizer enabled\n");
#endif
}
long _mi_option_get_fast(mi_option_t option) {
mi_assert(option >= 0 && option < _mi_option_last);
mi_option_desc_t* desc = &options[option];
mi_assert(desc->option == option); // index should match the option
//mi_assert(desc->init != UNINIT);
return desc->value;
}
mi_decl_nodiscard long mi_option_get(mi_option_t option) {
mi_assert(option >= 0 && option < _mi_option_last);
if (option < 0 || option >= _mi_option_last) return 0;
mi_option_desc_t* desc = &options[option];
mi_assert(desc->option == option); // index should match the option
if mi_unlikely(desc->init == UNINIT) {
mi_option_init(desc);
}
return desc->value;
}
mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) {
long x = mi_option_get(option);
return (x < min ? min : (x > max ? max : x));
}
mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) {
const long x = mi_option_get(option);
size_t size = (x < 0 ? 0 : (size_t)x);
if (mi_option_has_size_in_kib(option)) {
size *= MI_KiB;
}
return size;
}
void mi_option_set(mi_option_t option, long value) {
mi_assert(option >= 0 && option < _mi_option_last);
if (option < 0 || option >= _mi_option_last) return;
mi_option_desc_t* desc = &options[option];
mi_assert(desc->option == option); // index should match the option
desc->value = value;
desc->init = INITIALIZED;
// ensure min/max range; be careful to not recurse.
if (desc->option == mi_option_guarded_min && _mi_option_get_fast(mi_option_guarded_max) < value) {
mi_option_set(mi_option_guarded_max, value);
}
else if (desc->option == mi_option_guarded_max && _mi_option_get_fast(mi_option_guarded_min) > value) {
mi_option_set(mi_option_guarded_min, value);
}
}
void mi_option_set_default(mi_option_t option, long value) {
mi_assert(option >= 0 && option < _mi_option_last);
if (option < 0 || option >= _mi_option_last) return;
mi_option_desc_t* desc = &options[option];
if (desc->init != INITIALIZED) {
desc->value = value;
}
}
mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) {
return (mi_option_get(option) != 0);
}
void mi_option_set_enabled(mi_option_t option, bool enable) {
mi_option_set(option, (enable ? 1 : 0));
}
void mi_option_set_enabled_default(mi_option_t option, bool enable) {
mi_option_set_default(option, (enable ? 1 : 0));
}
void mi_option_enable(mi_option_t option) {
mi_option_set_enabled(option,true);
}
void mi_option_disable(mi_option_t option) {
mi_option_set_enabled(option,false);
}
static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
MI_UNUSED(arg);
if (msg != NULL && msg[0] != 0) {
_mi_prim_out_stderr(msg);
}
}
// Since an output function can be registered earliest in the `main`
// function we also buffer output that happens earlier. When
// an output function is registered it is called immediately with
// the output up to that point.
#ifndef MI_MAX_DELAY_OUTPUT
#define MI_MAX_DELAY_OUTPUT ((size_t)(16*1024))
#endif
static char out_buf[MI_MAX_DELAY_OUTPUT+1];
static _Atomic(size_t) out_len;
static void mi_cdecl mi_out_buf(const char* msg, void* arg) {
MI_UNUSED(arg);
if (msg==NULL) return;
if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
size_t n = _mi_strlen(msg);
if (n==0) return;
// claim space
size_t start = mi_atomic_add_acq_rel(&out_len, n);
if (start >= MI_MAX_DELAY_OUTPUT) return;
// check bound
if (start+n >= MI_MAX_DELAY_OUTPUT) {
n = MI_MAX_DELAY_OUTPUT-start-1;
}
_mi_memcpy(&out_buf[start], msg, n);
}
static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
if (out==NULL) return;
// claim (if `no_more_buf == true`, no more output will be added after this point)
size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
// and output the current contents
if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
out_buf[count] = 0;
out(out_buf,arg);
if (!no_more_buf) {
out_buf[count] = '\n'; // if continue with the buffer, insert a newline
}
}
// Once this module is loaded, switch to this routine
// which outputs to stderr and the delayed output buffer.
static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) {
mi_out_stderr(msg,arg);
mi_out_buf(msg,arg);
}
// --------------------------------------------------------
// Default output handler
// --------------------------------------------------------
// Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t.
// For now, don't register output from multiple threads.
static mi_output_fun* volatile mi_out_default; // = NULL
static _Atomic(void*) mi_out_arg; // = NULL
static mi_output_fun* mi_out_get_default(void** parg) {
if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); }
mi_output_fun* out = mi_out_default;
return (out == NULL ? &mi_out_buf : out);
}
void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
mi_atomic_store_ptr_release(void,&mi_out_arg, arg);
if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now
}
// add stderr to the delayed output after the module is loaded
static void mi_add_stderr_output(void) {
mi_assert_internal(mi_out_default == NULL);
mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr
mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output
}
// --------------------------------------------------------
// Messages, all end up calling `_mi_fputs`.
// --------------------------------------------------------
static _Atomic(size_t) error_count; // = 0; // when >= max_error_count stop emitting errors
static _Atomic(size_t) warning_count; // = 0; // when >= max_warning_count stop emitting warnings
// When overriding malloc, we may recurse into mi_vfprintf if an allocation
// inside the C runtime causes another message.
// In some cases (like on macOS) the loader already allocates which
// calls into mimalloc; if we then access thread locals (like `recurse`)
// this may crash as the access may call _tlv_bootstrap that tries to
// (recursively) invoke malloc again to allocate space for the thread local
// variables on demand. This is why we use a _mi_preloading test on such
// platforms. However, C code generator may move the initial thread local address
// load before the `if` and we therefore split it out in a separate function.
static mi_decl_thread bool recurse = false;
static mi_decl_noinline bool mi_recurse_enter_prim(void) {
if (recurse) return false;
recurse = true;
return true;
}
static mi_decl_noinline void mi_recurse_exit_prim(void) {
recurse = false;
}
static bool mi_recurse_enter(void) {
#if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD)
if (_mi_preloading()) return false;
#endif
return mi_recurse_enter_prim();
}
static void mi_recurse_exit(void) {
#if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD)
if (_mi_preloading()) return;
#endif
mi_recurse_exit_prim();
}
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (out==NULL || (void*)out==(void*)stdout || (void*)out==(void*)stderr) { // TODO: use mi_out_stderr for stderr?
if (!mi_recurse_enter()) return;
out = mi_out_get_default(&arg);
if (prefix != NULL) out(prefix, arg);
out(message, arg);
mi_recurse_exit();
}
else {
if (prefix != NULL) out(prefix, arg);
out(message, arg);
}
}
// Define our own limited `fprintf` that avoids memory allocation.
// We do this using `_mi_vsnprintf` with a limited buffer.
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
if (fmt==NULL) return;
if (!mi_recurse_enter()) return;
_mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);
mi_recurse_exit();
_mi_fputs(out,arg,prefix,buf);
}
void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args;
va_start(args,fmt);
mi_vfprintf(out,arg,NULL,fmt,args);
va_end(args);
}
static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) {
if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) {
char tprefix[64];
_mi_snprintf(tprefix, sizeof(tprefix), "%sthread 0x%tx: ", prefix, (uintptr_t)_mi_thread_id());
mi_vfprintf(out, arg, tprefix, fmt, args);
}
else {
mi_vfprintf(out, arg, prefix, fmt, args);
}
}
void _mi_message(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args);
va_end(args);
}
void _mi_trace_message(const char* fmt, ...) {
if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher
va_list args;
va_start(args, fmt);
mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args);
va_end(args);
}
void _mi_verbose_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_verbose)) return;
va_list args;
va_start(args,fmt);
mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
va_end(args);
}
static void mi_show_error_message(const char* fmt, va_list args) {
if (!mi_option_is_enabled(mi_option_verbose)) {
if (!mi_option_is_enabled(mi_option_show_errors)) return;
if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return;
}
mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args);
}
void _mi_warning_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_verbose)) {
if (!mi_option_is_enabled(mi_option_show_errors)) return;
if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return;
}
va_list args;
va_start(args,fmt);
mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args);
va_end(args);
}
#if MI_DEBUG
mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept {
_mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
abort();
}
#endif
// --------------------------------------------------------
// Errors
// --------------------------------------------------------
static mi_error_fun* volatile mi_error_handler; // = NULL
static _Atomic(void*) mi_error_arg; // = NULL
static void mi_error_default(int err) {
MI_UNUSED(err);
#if (MI_DEBUG>0)
if (err==EFAULT) {
#ifdef _MSC_VER
__debugbreak();
#endif
abort();
}
#endif
#if (MI_SECURE>0)
if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data)
abort();
}
#endif
#if defined(MI_XMALLOC)
if (err==ENOMEM || err==EOVERFLOW) { // abort on memory allocation fails in xmalloc mode
abort();
}
#endif
}
void mi_register_error(mi_error_fun* fun, void* arg) {
mi_error_handler = fun; // can be NULL
mi_atomic_store_ptr_release(void,&mi_error_arg, arg);
}
void _mi_error_message(int err, const char* fmt, ...) {
// show detailed error message
va_list args;
va_start(args, fmt);
mi_show_error_message(fmt, args);
va_end(args);
// and call the error handler which may abort (or return normally)
if (mi_error_handler != NULL) {
mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg));
}
else {
mi_error_default(err);
}
}
// --------------------------------------------------------
// Initialize options by checking the environment
// --------------------------------------------------------
// TODO: implement ourselves to reduce dependencies on the C runtime
#include <stdlib.h> // strtol
#include <string.h> // strstr
static void mi_option_init(mi_option_desc_t* desc) {
// Read option value from the environment
char s[64 + 1];
char buf[64+1];
_mi_strlcpy(buf, "mimalloc_", sizeof(buf));
_mi_strlcat(buf, desc->name, sizeof(buf));
bool found = _mi_getenv(buf, s, sizeof(s));
if (!found && desc->legacy_name != NULL) {
_mi_strlcpy(buf, "mimalloc_", sizeof(buf));
_mi_strlcat(buf, desc->legacy_name, sizeof(buf));
found = _mi_getenv(buf, s, sizeof(s));
if (found) {
_mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name);
}
}
if (found) {
size_t len = _mi_strnlen(s, sizeof(buf) - 1);
for (size_t i = 0; i < len; i++) {
buf[i] = _mi_toupper(s[i]);
}
buf[len] = 0;
if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
desc->value = 1;
desc->init = INITIALIZED;
}
else if (strstr("0;FALSE;NO;OFF", buf) != NULL) {
desc->value = 0;
desc->init = INITIALIZED;
}
else {
char* end = buf;
long value = strtol(buf, &end, 10);
if (mi_option_has_size_in_kib(desc->option)) {
// this option is interpreted in KiB to prevent overflow of `long` for large allocations
// (long is 32-bit on 64-bit windows, which allows for 4TiB max.)
size_t size = (value < 0 ? 0 : (size_t)value);
bool overflow = false;
if (*end == 'K') { end++; }
else if (*end == 'M') { overflow = mi_mul_overflow(size,MI_KiB,&size); end++; }
else if (*end == 'G') { overflow = mi_mul_overflow(size,MI_MiB,&size); end++; }
else if (*end == 'T') { overflow = mi_mul_overflow(size,MI_GiB,&size); end++; }
else { size = (size + MI_KiB - 1) / MI_KiB; }
if (end[0] == 'I' && end[1] == 'B') { end += 2; } // KiB, MiB, GiB, TiB
else if (*end == 'B') { end++; } // Kb, Mb, Gb, Tb
if (overflow || size > MI_MAX_ALLOC_SIZE) { size = (MI_MAX_ALLOC_SIZE / MI_KiB); }
value = (size > LONG_MAX ? LONG_MAX : (long)size);
}
if (*end == 0) {
mi_option_set(desc->option, value);
}
else {
// set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose.
desc->init = DEFAULTED;
if (desc->option == mi_option_verbose && desc->value == 0) {
// if the 'mimalloc_verbose' env var has a bogus value we'd never know
// (since the value defaults to 'off') so in that case briefly enable verbose
desc->value = 1;
_mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
desc->value = 0;
}
else {
_mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
}
}
}
mi_assert_internal(desc->init != UNINIT);
}
else if (!_mi_preloading()) {
desc->init = DEFAULTED;
}
}

770
compat/mimalloc/os.c Normal file
View File

@ -0,0 +1,770 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#define mi_os_stat_increase(stat,amount) _mi_stat_increase(&_mi_stats_main.stat, amount)
#define mi_os_stat_decrease(stat,amount) _mi_stat_decrease(&_mi_stats_main.stat, amount)
#define mi_os_stat_counter_increase(stat,inc) _mi_stat_counter_increase(&_mi_stats_main.stat, inc)
/* -----------------------------------------------------------
Initialization.
----------------------------------------------------------- */
#ifndef MI_DEFAULT_VIRTUAL_ADDRESS_BITS
#if MI_INTPTR_SIZE < 8
#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 32
#else
#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 48
#endif
#endif
#ifndef MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB
#if MI_INTPTR_SIZE < 8
#define MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB 4*MI_MiB // 4 GiB
#else
#define MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB 32*MI_MiB // 32 GiB
#endif
#endif
static mi_os_mem_config_t mi_os_mem_config = {
4096, // page size
0, // large page size (usually 2MiB)
4096, // allocation granularity
MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB,
MI_DEFAULT_VIRTUAL_ADDRESS_BITS,
true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems)
false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
};
bool _mi_os_has_overcommit(void) {
return mi_os_mem_config.has_overcommit;
}
bool _mi_os_has_virtual_reserve(void) {
return mi_os_mem_config.has_virtual_reserve;
}
// OS (small) page size
size_t _mi_os_page_size(void) {
return mi_os_mem_config.page_size;
}
// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
size_t _mi_os_large_page_size(void) {
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
}
bool _mi_os_canuse_large_page(size_t size, size_t alignment) {
// if we have access, check the size and alignment requirements
if (mi_os_mem_config.large_page_size == 0) return false;
return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
}
// round to a good OS allocation size (bounded by max 12.5% waste)
size_t _mi_os_good_alloc_size(size_t size) {
size_t align_size;
if (size < 512*MI_KiB) align_size = _mi_os_page_size();
else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
else align_size = 4*MI_MiB;
if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
return _mi_align_up(size, align_size);
}
void _mi_os_init(void) {
_mi_prim_mem_init(&mi_os_mem_config);
}
/* -----------------------------------------------------------
Util
-------------------------------------------------------------- */
bool _mi_os_decommit(void* addr, size_t size);
bool _mi_os_commit(void* addr, size_t size, bool* is_zero);
/* -----------------------------------------------------------
aligned hinting
-------------------------------------------------------------- */
// On systems with enough virtual address bits, we can do efficient aligned allocation by using
// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
// space (64TiB) we use this technique. (but see issue #939)
#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
// If this returns NULL, the OS will determine the address but on some OS's that may not be
// properly aligned which can be more costly as it needs to be adjusted afterwards.
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
// in the middle of the 2TiB - 6TiB address range (see issue #372))
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
{
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
size = _mi_align_up(size, MI_SEGMENT_SIZE);
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
#if (MI_SECURE>0)
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
#endif
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
uintptr_t init = MI_HINT_BASE;
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif
uintptr_t expected = hint + size;
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
}
if (hint%try_alignment != 0) return NULL;
return (void*)hint;
}
#else
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
MI_UNUSED(try_alignment); MI_UNUSED(size);
return NULL;
}
#endif
/* -----------------------------------------------------------
Free memory
-------------------------------------------------------------- */
static void mi_os_free_huge_os_pages(void* p, size_t size);
static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) {
mi_assert_internal((size % _mi_os_page_size()) == 0);
if (addr == NULL) return; // || _mi_os_is_huge_reserved(addr)
int err = _mi_prim_free(addr, size); // allow size==0 (issue #1041)
if (err != 0) {
_mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
}
if (commit_size > 0) {
mi_os_stat_decrease(committed, commit_size);
}
mi_os_stat_decrease(reserved, size);
}
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) {
if (mi_memkind_is_os(memid.memkind)) {
size_t csize = memid.mem.os.size;
if (csize==0) { csize = _mi_os_good_alloc_size(size); }
mi_assert_internal(csize >= size);
size_t commit_size = (still_committed ? csize : 0);
void* base = addr;
// different base? (due to alignment)
if (memid.mem.os.base != base) {
mi_assert(memid.mem.os.base <= addr);
base = memid.mem.os.base;
const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base;
if (memid.mem.os.size==0) {
csize += diff;
}
if (still_committed) {
commit_size -= diff; // the (addr-base) part was already un-committed
}
}
// free it
if (memid.memkind == MI_MEM_OS_HUGE) {
mi_assert(memid.is_pinned);
mi_os_free_huge_os_pages(base, csize);
}
else {
mi_os_prim_free(base, csize, (still_committed ? commit_size : 0));
}
}
else {
// nothing to do
mi_assert(memid.memkind < MI_MEM_OS);
}
}
void _mi_os_free(void* p, size_t size, mi_memid_t memid) {
_mi_os_free_ex(p, size, true, memid);
}
/* -----------------------------------------------------------
Primitive allocation from the OS.
-------------------------------------------------------------- */
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
// Also `hint_addr` is a hint and may be ignored.
static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero) {
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(is_zero != NULL);
mi_assert_internal(is_large != NULL);
if (size == 0) return NULL;
if (!commit) { allow_large = false; }
if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
*is_zero = false;
void* p = NULL;
int err = _mi_prim_alloc(hint_addr, size, try_alignment, commit, allow_large, is_large, is_zero, &p);
if (err != 0) {
_mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large);
}
mi_os_stat_counter_increase(mmap_calls, 1);
if (p != NULL) {
mi_os_stat_increase(reserved, size);
if (commit) {
mi_os_stat_increase(committed, size);
// seems needed for asan (or `mimalloc-test-api` fails)
#ifdef MI_TRACK_ASAN
if (*is_zero) { mi_track_mem_defined(p,size); }
else { mi_track_mem_undefined(p,size); }
#endif
}
}
return p;
}
static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero) {
return mi_os_prim_alloc_at(NULL, size, try_alignment, commit, allow_large, is_large, is_zero);
}
// Primitive aligned allocation from the OS.
// This function guarantees the allocated memory is aligned.
static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base) {
mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(is_large != NULL);
mi_assert_internal(is_zero != NULL);
mi_assert_internal(base != NULL);
if (!commit) allow_large = false;
if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
// try first with a requested alignment hint (this will usually be aligned directly on Win 10+ or BSD)
void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero);
if (p == NULL) return NULL;
// aligned already?
if (((uintptr_t)p % alignment) == 0) {
*base = p;
}
else {
// if not aligned, free it, overallocate, and unmap around it
#if !MI_TRACK_ASAN
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
#endif
if (p != NULL) { mi_os_prim_free(p, size, (commit ? size : 0)); }
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
const size_t over_size = size + alignment;
if (!mi_os_mem_config.has_partial_free) { // win32 virtualAlloc cannot free parts of an allocated block
// over-allocate uncommitted (virtual) memory
p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero);
if (p == NULL) return NULL;
// set p to the aligned part in the full region
// note: this is dangerous on Windows as VirtualFree needs the actual base pointer
// this is handled though by having the `base` field in the memid's
*base = p; // remember the base
p = mi_align_up_ptr(p, alignment);
// explicitly commit only the aligned part
if (commit) {
if (!_mi_os_commit(p, size, NULL)) {
mi_os_prim_free(*base, over_size, 0);
return NULL;
}
}
}
else { // mmap can free inside an allocation
// overallocate...
p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero);
if (p == NULL) return NULL;
// and selectively unmap parts around the over-allocated area.
void* aligned_p = mi_align_up_ptr(p, alignment);
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = over_size - pre_size - mid_size;
mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
if (pre_size > 0) { mi_os_prim_free(p, pre_size, (commit ? pre_size : 0)); }
if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, (commit ? post_size : 0)); }
// we can return the aligned pointer on `mmap` systems
p = aligned_p;
*base = aligned_p; // since we freed the pre part, `*base == p`.
}
}
mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
return p;
}
/* -----------------------------------------------------------
OS API: alloc and alloc_aligned
----------------------------------------------------------- */
void* _mi_os_alloc(size_t size, mi_memid_t* memid) {
*memid = _mi_memid_none();
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
bool os_is_large = false;
bool os_is_zero = false;
void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero);
if (p == NULL) return NULL;
*memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);
mi_assert_internal(memid->mem.os.size >= size);
mi_assert_internal(memid->initially_committed);
return p;
}
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid)
{
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
*memid = _mi_memid_none();
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
alignment = _mi_align_up(alignment, _mi_os_page_size());
bool os_is_large = false;
bool os_is_zero = false;
void* os_base = NULL;
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base );
if (p == NULL) return NULL;
*memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
memid->mem.os.base = os_base;
memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned?
mi_assert_internal(memid->mem.os.size >= size);
mi_assert_internal(_mi_is_aligned(p,alignment));
if (commit) { mi_assert_internal(memid->initially_committed); }
return p;
}
mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) {
if (p==NULL || size==0) return p;
// ensure committed
if (!memid->initially_committed) {
bool is_zero = false;
if (!_mi_os_commit(p, size, &is_zero)) {
_mi_os_free(p, size, *memid);
return NULL;
}
memid->initially_committed = true;
}
// ensure zero'd
if (memid->initially_zero) return p;
_mi_memzero_aligned(p,size);
memid->initially_zero = true;
return p;
}
void* _mi_os_zalloc(size_t size, mi_memid_t* memid) {
void* p = _mi_os_alloc(size,memid);
return mi_os_ensure_zero(p, size, memid);
}
/* -----------------------------------------------------------
OS aligned allocation with an offset. This is used
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
page where the object can be aligned at an offset from the start of the segment.
As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
to use the actual start of the memory region.
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid) {
mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid);
}
else {
// overallocate to align at an offset
const size_t extra = _mi_align_up(offset, alignment) - offset;
const size_t oversize = size + extra;
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid);
if (start == NULL) return NULL;
void* const p = (uint8_t*)start + extra;
mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
// decommit the overallocation at the start
if (commit && extra > _mi_os_page_size()) {
_mi_os_decommit(start, extra);
}
return p;
}
}
/* -----------------------------------------------------------
OS memory API: reset, commit, decommit, protect, unprotect.
----------------------------------------------------------- */
// OS page align within a given area, either conservative (pages inside the area only),
// or not (straddling pages outside the area is possible)
static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
mi_assert(addr != NULL && size > 0);
if (newsize != NULL) *newsize = 0;
if (size == 0 || addr == NULL) return NULL;
// page align conservatively within the range
void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
: mi_align_down_ptr(addr, _mi_os_page_size()));
void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
: mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
if (diff <= 0) return NULL;
mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
if (newsize != NULL) *newsize = (size_t)diff;
return start;
}
static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
return mi_os_page_align_areax(true, addr, size, newsize);
}
bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size) {
if (is_zero != NULL) { *is_zero = false; }
mi_os_stat_counter_increase(commit_calls, 1);
// page align range
size_t csize;
void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
if (csize == 0) return true;
// commit
bool os_is_zero = false;
int err = _mi_prim_commit(start, csize, &os_is_zero);
if (err != 0) {
_mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
return false;
}
if (os_is_zero && is_zero != NULL) {
*is_zero = true;
mi_assert_expensive(mi_mem_is_zero(start, csize));
}
// note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
#ifdef MI_TRACK_ASAN
if (os_is_zero) { mi_track_mem_defined(start,csize); }
else { mi_track_mem_undefined(start,csize); }
#endif
mi_os_stat_increase(committed, stat_size); // use size for precise commit vs. decommit
return true;
}
bool _mi_os_commit(void* addr, size_t size, bool* is_zero) {
return _mi_os_commit_ex(addr, size, is_zero, size);
}
static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, size_t stat_size) {
mi_assert_internal(needs_recommit!=NULL);
mi_os_stat_decrease(committed, stat_size);
// page align
size_t csize;
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0) return true;
// decommit
*needs_recommit = true;
int err = _mi_prim_decommit(start,csize,needs_recommit);
if (err != 0) {
_mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
}
mi_assert_internal(err == 0);
return (err == 0);
}
bool _mi_os_decommit(void* addr, size_t size) {
bool needs_recommit;
return mi_os_decommit_ex(addr, size, &needs_recommit, size);
}
// Signal to the OS that the address range is no longer in use
// but may be used later again. This will release physical memory
// pages and reduce swapping while keeping the memory committed.
// We page align to a conservative area inside the range to reset.
bool _mi_os_reset(void* addr, size_t size) {
// page align conservatively within the range
size_t csize;
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)
mi_os_stat_counter_increase(reset, csize);
mi_os_stat_counter_increase(reset_calls, 1);
#if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
memset(start, 0, csize); // pretend it is eagerly reset
#endif
int err = _mi_prim_reset(start, csize);
if (err != 0) {
_mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
}
return (err == 0);
}
void _mi_os_reuse( void* addr, size_t size ) {
// page align conservatively within the range
size_t csize = 0;
void* const start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0) return;
const int err = _mi_prim_reuse(start, csize);
if (err != 0) {
_mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
}
}
// either resets or decommits memory, returns true if the memory needs
// to be recommitted if it is to be re-used later on.
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
{
if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed?
mi_os_stat_counter_increase(purge_calls, 1);
mi_os_stat_counter_increase(purged, size);
if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit?
!_mi_preloading()) // don't decommit during preloading (unsafe)
{
bool needs_recommit = true;
mi_os_decommit_ex(p, size, &needs_recommit, stat_size);
return needs_recommit;
}
else {
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
_mi_os_reset(p, size);
}
return false; // needs no recommit
}
}
// either resets or decommits memory, returns true if the memory needs
// to be recommitted if it is to be re-used later on.
bool _mi_os_purge(void* p, size_t size) {
return _mi_os_purge_ex(p, size, true, size);
}
// Protect a region in memory to be not accessible.
static bool mi_os_protectx(void* addr, size_t size, bool protect) {
// page align conservatively within the range
size_t csize = 0;
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0) return false;
/*
if (_mi_os_is_huge_reserved(addr)) {
_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
}
*/
int err = _mi_prim_protect(start,csize,protect);
if (err != 0) {
_mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
}
return (err == 0);
}
bool _mi_os_protect(void* addr, size_t size) {
return mi_os_protectx(addr, size, true);
}
bool _mi_os_unprotect(void* addr, size_t size) {
return mi_os_protectx(addr, size, false);
}
/* ----------------------------------------------------------------------------
Support for allocating huge OS pages (1Gib) that are reserved up-front
and possibly associated with a specific NUMA node. (use `numa_node>=0`)
-----------------------------------------------------------------------------*/
#define MI_HUGE_OS_PAGE_SIZE (MI_GiB)
#if (MI_INTPTR_SIZE >= 8)
// To ensure proper alignment, use our own area for huge OS pages
static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
// Claim an aligned address range for huge pages
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
if (total_size != NULL) *total_size = 0;
const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
uintptr_t start = 0;
uintptr_t end = 0;
uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
do {
start = huge_start;
if (start == 0) {
// Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
#endif
}
end = start + size;
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
if (total_size != NULL) *total_size = size;
return (uint8_t*)start;
}
#else
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
MI_UNUSED(pages);
if (total_size != NULL) *total_size = 0;
return NULL;
}
#endif
// Allocate MI_SEGMENT_SIZE aligned huge pages
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
*memid = _mi_memid_none();
if (psize != NULL) *psize = 0;
if (pages_reserved != NULL) *pages_reserved = 0;
size_t size = 0;
uint8_t* const start = mi_os_claim_huge_pages(pages, &size);
if (start == NULL) return NULL; // or 32-bit systems
// Allocate one page at the time but try to place them contiguously
// We allocate one page at the time to be able to abort if it takes too long
// or to at least allocate as many as available on the system.
mi_msecs_t start_t = _mi_clock_start();
size_t page = 0;
bool all_zero = true;
while (page < pages) {
// allocate a page
bool is_zero = false;
void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
void* p = NULL;
int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
if (!is_zero) { all_zero = false; }
if (err != 0) {
_mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
break;
}
// Did we succeed at a contiguous address?
if (p != addr) {
// no success, issue a warning and break
if (p != NULL) {
_mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, MI_HUGE_OS_PAGE_SIZE);
}
break;
}
// success, record it
page++; // increase before timeout check (see issue #711)
mi_os_stat_increase(committed, MI_HUGE_OS_PAGE_SIZE);
mi_os_stat_increase(reserved, MI_HUGE_OS_PAGE_SIZE);
// check for timeout
if (max_msecs > 0) {
mi_msecs_t elapsed = _mi_clock_end(start_t);
if (page >= 1) {
mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
elapsed = max_msecs + 1;
}
}
if (elapsed > max_msecs) {
_mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);
break;
}
}
}
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
if (pages_reserved != NULL) { *pages_reserved = page; }
if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
if (page != 0) {
mi_assert(start != NULL);
*memid = _mi_memid_create_os(start, size, true /* is committed */, all_zero, true /* is_large */);
memid->memkind = MI_MEM_OS_HUGE;
mi_assert(memid->is_pinned);
#ifdef MI_TRACK_ASAN
if (all_zero) { mi_track_mem_defined(start,size); }
#endif
}
return (page == 0 ? NULL : start);
}
// free every huge page in a range individually (as we allocated per page)
// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
static void mi_os_free_huge_os_pages(void* p, size_t size) {
if (p==NULL || size==0) return;
uint8_t* base = (uint8_t*)p;
while (size >= MI_HUGE_OS_PAGE_SIZE) {
mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, MI_HUGE_OS_PAGE_SIZE);
size -= MI_HUGE_OS_PAGE_SIZE;
base += MI_HUGE_OS_PAGE_SIZE;
}
}
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count
int _mi_os_numa_node_count(void) {
size_t count = mi_atomic_load_acquire(&mi_numa_node_count);
if mi_unlikely(count == 0) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount > 0 && ncount < INT_MAX) {
count = (size_t)ncount;
}
else {
const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
if (n == 0 || n > INT_MAX) { count = 1; }
else { count = n; }
}
mi_atomic_store_release(&mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count);
}
mi_assert_internal(count > 0 && count <= INT_MAX);
return (int)count;
}
static int mi_os_numa_node_get(void) {
int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
const size_t n = _mi_prim_numa_node();
int numa_node = (n < INT_MAX ? (int)n : 0);
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return numa_node;
}
int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) {
return 0;
}
else {
return mi_os_numa_node_get();
}
}

View File

@ -0,0 +1,397 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* -----------------------------------------------------------
Definition of page queues for each block size
----------------------------------------------------------- */
#ifndef MI_IN_PAGE_C
#error "this file should be included from 'page.c'"
// include to help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#endif
/* -----------------------------------------------------------
Minimal alignment in machine words (i.e. `sizeof(void*)`)
----------------------------------------------------------- */
#if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE)
#error "define alignment for more than 4x word size for this platform"
#elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE)
#define MI_ALIGN4W // 4 machine words minimal alignment
#elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE)
#define MI_ALIGN2W // 2 machine words minimal alignment
#else
// ok, default alignment is 1 word
#endif
/* -----------------------------------------------------------
Queue query
----------------------------------------------------------- */
static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+sizeof(uintptr_t)));
}
static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+(2*sizeof(uintptr_t))));
}
static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
return (pq->block_size > MI_MEDIUM_OBJ_SIZE_MAX);
}
/* -----------------------------------------------------------
Bins
----------------------------------------------------------- */
// Return the bin for a given field size.
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`.
static inline size_t mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
#if defined(MI_ALIGN4W)
if mi_likely(wsize <= 4) {
return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
}
#elif defined(MI_ALIGN2W)
if mi_likely(wsize <= 8) {
return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
}
#else
if mi_likely(wsize <= 8) {
return (wsize == 0 ? 1 : wsize);
}
#endif
else if mi_unlikely(wsize > MI_MEDIUM_OBJ_WSIZE_MAX) {
return MI_BIN_HUGE;
}
else {
#if defined(MI_ALIGN4W)
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif
wsize--;
// find the highest bit
const size_t b = (MI_SIZE_BITS - 1 - mi_clz(wsize)); // note: wsize != 0
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3;
mi_assert_internal(bin > 0 && bin < MI_BIN_HUGE);
return bin;
}
}
/* -----------------------------------------------------------
Queue of pages with free blocks
----------------------------------------------------------- */
size_t _mi_bin(size_t size) {
return mi_bin(size);
}
size_t _mi_bin_size(size_t bin) {
return _mi_heap_empty.pages[bin].block_size;
}
// Good size for allocation
size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
}
else {
return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size());
}
}
#if (MI_DEBUG>1)
static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_page_t* list = queue->first;
while (list != NULL) {
mi_assert_internal(list->next == NULL || list->next->prev == list);
mi_assert_internal(list->prev == NULL || list->prev->next == list);
if (list == page) break;
list = list->next;
}
return (list == page);
}
#endif
#if (MI_DEBUG>1)
static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* pq) {
return (pq >= &heap->pages[0] && pq <= &heap->pages[MI_BIN_FULL]);
}
#endif
static inline bool mi_page_is_large_or_huge(const mi_page_t* page) {
return (mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_huge(page));
}
static size_t mi_page_bin(const mi_page_t* page) {
const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
mi_assert_internal(bin <= MI_BIN_FULL);
return bin;
}
// returns the page bin without using MI_BIN_FULL for statistics
size_t _mi_page_stats_bin(const mi_page_t* page) {
const size_t bin = (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)));
mi_assert_internal(bin <= MI_BIN_HUGE);
return bin;
}
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
mi_assert_internal(heap!=NULL);
const size_t bin = mi_page_bin(page);
mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(pq)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(pq)));
return pq;
}
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
mi_heap_t* heap = mi_page_heap(page);
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_assert_expensive(mi_page_queue_contains(pq, page));
return pq;
}
// The current small page array is for efficiency and for each
// small size (up to 256) it points directly to the page for that
// size without having to compute the bin. This means when the
// current free page queue is updated for a small bin, we need to update a
// range of entries in `_mi_page_small_free`.
static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_queue_t* pq) {
mi_assert_internal(mi_heap_contains_queue(heap,pq));
size_t size = pq->block_size;
if (size > MI_SMALL_SIZE_MAX) return;
mi_page_t* page = pq->first;
if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty;
// find index in the right direct page array
size_t start;
size_t idx = _mi_wsize_from_size(size);
mi_page_t** pages_free = heap->pages_free_direct;
if (pages_free[idx] == page) return; // already set
// find start slot
if (idx<=1) {
start = 0;
}
else {
// find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
size_t bin = mi_bin(size);
const mi_page_queue_t* prev = pq - 1;
while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) {
prev--;
}
start = 1 + _mi_wsize_from_size(prev->block_size);
if (start > idx) start = idx;
}
// set size range to the right page
mi_assert(start <= idx);
for (size_t sz = start; sz <= idx; sz++) {
pages_free[sz] = page;
}
}
/*
static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
return (queue->first == NULL);
}
*/
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == queue->last) queue->last = page->prev;
if (page == queue->first) {
queue->first = page->next;
// update first
mi_assert_internal(mi_heap_contains_queue(heap, queue));
mi_heap_queue_first_update(heap,queue);
}
heap->page_count--;
page->next = NULL;
page->prev = NULL;
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);
mi_page_set_in_full(page,false);
}
static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(!mi_page_queue_contains(queue, page));
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
#endif
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
page->next = queue->first;
page->prev = NULL;
if (queue->first != NULL) {
mi_assert_internal(queue->first->prev == NULL);
queue->first->prev = page;
queue->first = page;
}
else {
queue->first = queue->last = page;
}
// update direct
mi_heap_queue_first_update(heap, queue);
heap->page_count++;
}
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_queue_contains(queue, page));
if (queue->first == page) return;
mi_page_queue_remove(queue, page);
mi_page_queue_push(heap, queue, page);
mi_assert_internal(queue->first == page);
}
static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(from, page));
mi_assert_expensive(!mi_page_queue_contains(to, page));
const size_t bsize = mi_page_block_size(page);
MI_UNUSED(bsize);
mi_assert_internal((bsize == to->block_size && bsize == from->block_size) ||
(bsize == to->block_size && mi_page_queue_is_full(from)) ||
(bsize == from->block_size && mi_page_queue_is_full(to)) ||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(to)) ||
(mi_page_is_large_or_huge(page) && mi_page_queue_is_full(to)));
mi_heap_t* heap = mi_page_heap(page);
// delete from `from`
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == from->last) from->last = page->prev;
if (page == from->first) {
from->first = page->next;
// update first
mi_assert_internal(mi_heap_contains_queue(heap, from));
mi_heap_queue_first_update(heap, from);
}
// insert into `to`
if (enqueue_at_end) {
// enqueue at the end
page->prev = to->last;
page->next = NULL;
if (to->last != NULL) {
mi_assert_internal(heap == mi_page_heap(to->last));
to->last->next = page;
to->last = page;
}
else {
to->first = page;
to->last = page;
mi_heap_queue_first_update(heap, to);
}
}
else {
if (to->first != NULL) {
// enqueue at 2nd place
mi_assert_internal(heap == mi_page_heap(to->first));
mi_page_t* next = to->first->next;
page->prev = to->first;
page->next = next;
to->first->next = page;
if (next != NULL) {
next->prev = page;
}
else {
to->last = page;
}
}
else {
// enqueue at the head (singleton list)
page->prev = NULL;
page->next = NULL;
to->first = page;
to->last = page;
mi_heap_queue_first_update(heap, to);
}
}
mi_page_set_in_full(page, mi_page_queue_is_full(to));
}
static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end */, page);
}
static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
// note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`)
mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page);
}
// Only called from `mi_heap_absorb`.
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) {
mi_assert_internal(mi_heap_contains_queue(heap,pq));
mi_assert_internal(pq->block_size == append->block_size);
if (append->first==NULL) return 0;
// set append pages to new heap and count
size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
// inline `mi_page_set_heap` to avoid wrong assertion during absorption;
// in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
// set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
// side effect that it spins until any DELAYED_FREEING is finished. This ensures
// that after appending only the new heap will be used for delayed free operations.
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
count++;
}
if (pq->last==NULL) {
// take over afresh
mi_assert_internal(pq->first==NULL);
pq->first = append->first;
pq->last = append->last;
mi_heap_queue_first_update(heap, pq);
}
else {
// append to end
mi_assert_internal(pq->last!=NULL);
mi_assert_internal(append->first!=NULL);
pq->last->next = append->first;
append->first->prev = pq->last;
pq->last = append->last;
}
return count;
}

1042
compat/mimalloc/page.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// We use the unix/prim.c with the mmap API on macOSX
#include "../unix/prim.c"

View File

@ -0,0 +1,76 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// Select the implementation of the primitives
// depending on the OS.
#if defined(_WIN32)
#include "windows/prim.c" // VirtualAlloc (Windows)
#elif defined(__APPLE__)
#include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c)
#elif defined(__wasi__)
#define MI_USE_SBRK
#include "wasi/prim.c" // memory-grow or sbrk (Wasm)
#elif defined(__EMSCRIPTEN__)
#include "emscripten/prim.c" // emmalloc_*, + pthread support
#else
#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)
#endif
// Generic process initialization
#ifndef MI_PRIM_HAS_PROCESS_ATTACH
#if defined(__GNUC__) || defined(__clang__)
// gcc,clang: use the constructor/destructor attribute
// which for both seem to run before regular constructors/destructors
#if defined(__clang__)
#define mi_attr_constructor __attribute__((constructor(101)))
#define mi_attr_destructor __attribute__((destructor(101)))
#else
#define mi_attr_constructor __attribute__((constructor))
#define mi_attr_destructor __attribute__((destructor))
#endif
static void mi_attr_constructor mi_process_attach(void) {
_mi_auto_process_init();
}
static void mi_attr_destructor mi_process_detach(void) {
_mi_auto_process_done();
}
#elif defined(__cplusplus)
// C++: use static initialization to detect process start/end
// This is not guaranteed to be first/last but the best we can generally do?
struct mi_init_done_t {
mi_init_done_t() {
_mi_auto_process_init();
}
~mi_init_done_t() {
_mi_auto_process_done();
}
};
static mi_init_done_t mi_init_done;
#else
#pragma message("define a way to call _mi_auto_process_init/done on your platform")
#endif
#endif
// Generic allocator init/done callback
#ifndef MI_PRIM_HAS_ALLOCATOR_INIT
bool _mi_is_redirected(void) {
return false;
}
bool _mi_allocator_init(const char** message) {
if (message != NULL) { *message = NULL; }
return true;
}
void _mi_allocator_done(void) {
// nothing to do
}
#endif

View File

@ -0,0 +1,962 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// This file is included in `src/prim/prim.c`
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined
#endif
#if defined(__sun)
// illumos provides new mman.h api when any of these are defined
// otherwise the old api based on caddr_t which predates the void pointers one.
// stock solaris provides only the former, chose to atomically to discard those
// flags only here rather than project wide tough.
#undef _XOPEN_SOURCE
#undef _POSIX_C_SOURCE
#endif
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h"
#include <sys/mman.h> // mmap
#include <unistd.h> // sysconf
#include <fcntl.h> // open, close, read, access
#include <stdlib.h> // getenv, arc4random_buf
#if defined(__linux__)
#include <features.h>
#include <sys/prctl.h> // THP disable, PR_SET_VMA
#include <sys/sysinfo.h> // sysinfo
#if defined(__GLIBC__) && !defined(PR_SET_VMA)
#include <linux/prctl.h>
#endif
#if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags
#else
#include <sys/mman.h>
#endif
#elif defined(__APPLE__)
#include <AvailabilityMacros.h>
#include <TargetConditionals.h>
#if !defined(TARGET_OS_OSX) || TARGET_OS_OSX // see issue #879, used to be (!TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR)
#include <mach/vm_statistics.h> // VM_MAKE_TAG, VM_FLAGS_SUPERPAGE_SIZE_2MB, etc.
#endif
#if !defined(MAC_OS_X_VERSION_10_7)
#define MAC_OS_X_VERSION_10_7 1070
#endif
#include <sys/sysctl.h>
#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/param.h>
#if __FreeBSD_version >= 1200000
#include <sys/cpuset.h>
#include <sys/domainset.h>
#endif
#include <sys/sysctl.h>
#endif
#if (defined(__linux__) && !defined(__ANDROID__)) || defined(__FreeBSD__)
#define MI_HAS_SYSCALL_H
#include <sys/syscall.h>
#endif
#if !defined(MADV_DONTNEED) && defined(POSIX_MADV_DONTNEED) // QNX
#define MADV_DONTNEED POSIX_MADV_DONTNEED
#endif
#if !defined(MADV_FREE) && defined(POSIX_MADV_FREE) // QNX
#define MADV_FREE POSIX_MADV_FREE
#endif
#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
//------------------------------------------------------------------------------------
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
// still initializing (issue #713)
// Declare inline to avoid unused function warnings.
//------------------------------------------------------------------------------------
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
static inline int mi_prim_open(const char* fpath, int open_flags) {
return syscall(SYS_open,fpath,open_flags,0);
}
static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
return syscall(SYS_read,fd,buf,bufsize);
}
static inline int mi_prim_close(int fd) {
return syscall(SYS_close,fd);
}
static inline int mi_prim_access(const char *fpath, int mode) {
return syscall(SYS_access,fpath,mode);
}
#else
static inline int mi_prim_open(const char* fpath, int open_flags) {
return open(fpath,open_flags);
}
static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
return read(fd,buf,bufsize);
}
static inline int mi_prim_close(int fd) {
return close(fd);
}
static inline int mi_prim_access(const char *fpath, int mode) {
return access(fpath,mode);
}
#endif
//---------------------------------------------
// init
//---------------------------------------------
static bool unix_detect_overcommit(void) {
bool os_overcommit = true;
#if defined(__linux__)
int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
if (fd >= 0) {
char buf[32];
ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf));
mi_prim_close(fd);
// <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting>
// 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE)
if (nread >= 1) {
os_overcommit = (buf[0] == '0' || buf[0] == '1');
}
}
#elif defined(__FreeBSD__)
int val = 0;
size_t olen = sizeof(val);
if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
os_overcommit = (val != 0);
}
#else
// default: overcommit is true
#endif
return os_overcommit;
}
// try to detect the physical memory dynamically (if possible)
static void unix_detect_physical_memory( size_t page_size, size_t* physical_memory_in_kib ) {
#if defined(CTL_HW) && (defined(HW_PHYSMEM64) || defined(HW_MEMSIZE)) // freeBSD, macOS
MI_UNUSED(page_size);
int64_t physical_memory = 0;
size_t length = sizeof(int64_t);
#if defined(HW_PHYSMEM64)
int mib[2] = { CTL_HW, HW_PHYSMEM64 };
#else
int mib[2] = { CTL_HW, HW_MEMSIZE };
#endif
const int err = sysctl(mib, 2, &physical_memory, &length, NULL, 0);
if (err==0 && physical_memory > 0) {
const int64_t phys_in_kib = physical_memory / MI_KiB;
if (phys_in_kib > 0 && (uint64_t)phys_in_kib <= SIZE_MAX) {
*physical_memory_in_kib = (size_t)phys_in_kib;
}
}
#elif defined(__linux__)
MI_UNUSED(page_size);
struct sysinfo info; _mi_memzero_var(info);
const int err = sysinfo(&info);
if (err==0 && info.totalram > 0 && info.totalram <= SIZE_MAX) {
*physical_memory_in_kib = (size_t)info.totalram / MI_KiB;
}
#elif defined(_SC_PHYS_PAGES) // do not use by default as it might cause allocation (by using `fopen` to parse /proc/meminfo) (issue #1100)
const long pphys = sysconf(_SC_PHYS_PAGES);
const size_t psize_in_kib = page_size / MI_KiB;
if (psize_in_kib > 0 && pphys > 0 && (unsigned long)pphys <= SIZE_MAX && (size_t)pphys <= (SIZE_MAX/psize_in_kib)) {
*physical_memory_in_kib = (size_t)pphys * psize_in_kib;
}
#endif
}
void _mi_prim_mem_init( mi_os_mem_config_t* config )
{
long psize = sysconf(_SC_PAGESIZE);
if (psize > 0 && (unsigned long)psize < SIZE_MAX) {
config->page_size = (size_t)psize;
config->alloc_granularity = (size_t)psize;
unix_detect_physical_memory(config->page_size, &config->physical_memory_in_kib);
}
config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
config->has_overcommit = unix_detect_overcommit();
config->has_partial_free = true; // mmap can free in parts
config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
// disable transparent huge pages for this process?
#if (defined(__linux__) || defined(__ANDROID__)) && defined(PR_GET_THP_DISABLE)
#if defined(MI_NO_THP)
if (true)
#else
if (!mi_option_is_enabled(mi_option_allow_thp)) // disable THP if requested through an option
#endif
{
int val = 0;
if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) {
// Most likely since distros often come with always/madvise settings.
val = 1;
// Disabling only for mimalloc process rather than touching system wide settings
(void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0);
}
}
#endif
}
//---------------------------------------------
// free
//---------------------------------------------
int _mi_prim_free(void* addr, size_t size ) {
if (size==0) return 0;
bool err = (munmap(addr, size) == -1);
return (err ? errno : 0);
}
//---------------------------------------------
// mmap
//---------------------------------------------
static int unix_madvise(void* addr, size_t size, int advice) {
#if defined(__sun)
int res = madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520)
#elif defined(__QNX__)
int res = posix_madvise(addr, size, advice);
#else
int res = madvise(addr, size, advice);
#endif
return (res==0 ? 0 : errno);
}
static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
#if defined(__linux__) && defined(PR_SET_VMA)
if (p!=MAP_FAILED && p!=NULL) {
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
}
#endif
return p;
}
static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
MI_UNUSED(try_alignment);
void* p = NULL;
#if defined(MAP_ALIGNED) // BSD
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
size_t n = mi_bsr(try_alignment);
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd);
if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
int err = errno;
_mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
}
if (p!=MAP_FAILED) return p;
// fall back to regular mmap
}
}
#elif defined(MAP_ALIGN) // Solaris
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd); // addr parameter is the required alignment
if (p!=MAP_FAILED) return p;
// fall back to regular mmap
}
#endif
#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
// on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
if (addr == NULL) {
void* hint = _mi_os_get_aligned_hint(try_alignment, size);
if (hint != NULL) {
p = unix_mmap_prim(hint, size, protect_flags, flags, fd);
if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
#if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly?
int err = 0;
#else
int err = errno;
#endif
_mi_trace_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
}
if (p!=MAP_FAILED) return p;
// fall back to regular mmap
}
}
#endif
// regular mmap
p = unix_mmap_prim(addr, size, protect_flags, flags, fd);
if (p!=MAP_FAILED) return p;
// failed to allocate
return NULL;
}
static int unix_mmap_fd(void) {
#if defined(VM_MAKE_TAG)
// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
int os_tag = (int)mi_option_get(mi_option_os_tag);
if (os_tag < 100 || os_tag > 255) { os_tag = 254; }
return VM_MAKE_TAG(os_tag);
#else
return -1;
#endif
}
static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
#if !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#if !defined(MAP_NORESERVE)
#define MAP_NORESERVE 0
#endif
void* p = NULL;
const int fd = unix_mmap_fd();
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
if (_mi_os_has_overcommit()) {
flags |= MAP_NORESERVE;
}
#if defined(PROT_MAX)
protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
#endif
// huge page allocation
if (allow_large && (large_only || (_mi_os_canuse_large_page(size, try_alignment) && mi_option_is_enabled(mi_option_allow_large_os_pages)))) {
static _Atomic(size_t) large_page_try_ok; // = 0;
size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
if (!large_only && try_ok > 0) {
// If the OS is not configured for large OS pages, or the user does not have
// enough permission, the `mmap` will always fail (but it might also fail for other reasons).
// Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
// to avoid too many failing calls to mmap.
mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
}
else {
int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
int lfd = fd;
#ifdef MAP_ALIGNED_SUPER
lflags |= MAP_ALIGNED_SUPER;
#endif
#ifdef MAP_HUGETLB
lflags |= MAP_HUGETLB;
#endif
#ifdef MAP_HUGE_1GB
static bool mi_huge_pages_available = true;
if (large_only && (size % MI_GiB) == 0 && mi_huge_pages_available) {
lflags |= MAP_HUGE_1GB;
}
else
#endif
{
#ifdef MAP_HUGE_2MB
lflags |= MAP_HUGE_2MB;
#endif
}
#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
#endif
if (large_only || lflags != flags) {
// try large OS page allocation
*is_large = true;
p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
#ifdef MAP_HUGE_1GB
if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
mi_huge_pages_available = false; // don't try huge 1GiB pages again
if (large_only) {
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
}
lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
}
#endif
if (large_only) return p;
if (p == NULL) {
mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations
}
}
}
}
// regular allocation
if (p == NULL) {
*is_large = false;
p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd);
#if !defined(MI_NO_THP)
if (p != NULL && allow_large && mi_option_is_enabled(mi_option_allow_thp) && _mi_os_canuse_large_page(size, try_alignment)) {
#if defined(MADV_HUGEPAGE)
// Many Linux systems don't allow MAP_HUGETLB but they support instead
// transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
// though since properly aligned allocations will already use large pages if available
// in that case -- in particular for our large regions (in `memory.c`).
// However, some systems only allow THP if called with explicit `madvise`, so
// when large OS pages are enabled for mimalloc, we call `madvise` anyways.
if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) {
// *is_large = true; // possibly
};
#elif defined(__sun)
struct memcntl_mha cmd = {0};
cmd.mha_pagesize = _mi_os_large_page_size();
cmd.mha_cmd = MHA_MAPSIZE_VA;
if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
// *is_large = true; // possibly
}
#endif
}
#endif
}
return p;
}
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(commit || !allow_large);
mi_assert_internal(try_alignment > 0);
if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
}
*is_zero = true;
int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
*addr = unix_mmap(hint_addr, size, try_alignment, protect_flags, false, allow_large, is_large);
return (*addr != NULL ? 0 : errno);
}
//---------------------------------------------
// Commit/Reset
//---------------------------------------------
static void unix_mprotect_hint(int err) {
#if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page
if (err == ENOMEM) {
_mi_warning_message("The next warning may be caused by a low memory map limit.\n"
" On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n"
" For example: sudo sysctl -w vm.max_map_count=262144\n");
}
#else
MI_UNUSED(err);
#endif
}
int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
// commit: ensure we can access the area
// note: we may think that *is_zero can be true since the memory
// was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but
// we sometimes call commit on a range with still partially committed
// memory and `mprotect` does not zero the range.
*is_zero = false;
int err = mprotect(start, size, (PROT_READ | PROT_WRITE));
if (err != 0) {
err = errno;
unix_mprotect_hint(err);
}
return err;
}
int _mi_prim_reuse(void* start, size_t size) {
MI_UNUSED(start); MI_UNUSED(size);
#if defined(__APPLE__) && defined(MADV_FREE_REUSE)
return unix_madvise(start, size, MADV_FREE_REUSE);
#endif
return 0;
}
int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
int err = 0;
#if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
// decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097)
err = unix_madvise(start, size, MADV_FREE_REUSABLE);
if (err) { err = unix_madvise(start, size, MADV_DONTNEED); }
#else
// decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
err = unix_madvise(start, size, MADV_DONTNEED);
#endif
#if !MI_DEBUG && MI_SECURE<=2
*needs_recommit = false;
#else
*needs_recommit = true;
mprotect(start, size, PROT_NONE);
#endif
/*
// decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss)
*needs_recommit = true;
const int fd = unix_mmap_fd();
void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
if (p != start) { err = errno; }
*/
return err;
}
int _mi_prim_reset(void* start, size_t size) {
int err = 0;
// on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower)
#if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
err = unix_madvise(start, size, MADV_FREE_REUSABLE);
if (err==0) return 0;
// fall through
#endif
#if defined(MADV_FREE)
// Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it
// will not reduce the `rss` stats in tools like `top` even though the memory is available
// to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by
// default `MADV_DONTNEED` is used though.
static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
int oadvice = (int)mi_atomic_load_relaxed(&advice);
while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; };
if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
// if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
err = unix_madvise(start, size, MADV_DONTNEED);
}
#else
err = unix_madvise(start, size, MADV_DONTNEED);
#endif
return err;
}
int _mi_prim_protect(void* start, size_t size, bool protect) {
int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE));
if (err != 0) { err = errno; }
unix_mprotect_hint(err);
return err;
}
//---------------------------------------------
// Huge page allocation
//---------------------------------------------
#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__)
#ifndef MPOL_PREFERRED
#define MPOL_PREFERRED 1
#endif
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind)
static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
}
#else
static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
return 0;
}
#endif
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
bool is_large = true;
*is_zero = true;
*addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
unsigned long numa_mask = (1UL << numa_node);
// TODO: does `mbind` work correctly for huge OS pages? should we
// use `set_mempolicy` before calling mmap instead?
// see: <https://lkml.org/lkml/2017/2/9/875>
long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
if (err != 0) {
err = errno;
_mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err);
}
}
return (*addr != NULL ? 0 : errno);
}
#else
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
*is_zero = false;
*addr = NULL;
return ENOMEM;
}
#endif
//---------------------------------------------
// NUMA nodes
//---------------------------------------------
#if defined(__linux__)
size_t _mi_prim_numa_node(void) {
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu)
unsigned long node = 0;
unsigned long ncpu = 0;
long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
if (err != 0) return 0;
return node;
#else
return 0;
#endif
}
size_t _mi_prim_numa_node_count(void) {
char buf[128];
unsigned node = 0;
for(node = 0; node < 256; node++) {
// enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
_mi_snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
if (mi_prim_access(buf,R_OK) != 0) break;
}
return (node+1);
}
#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000
size_t _mi_prim_numa_node(void) {
domainset_t dom;
size_t node;
int policy;
if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul;
for (node = 0; node < MAXMEMDOM; node++) {
if (DOMAINSET_ISSET(node, &dom)) return node;
}
return 0ul;
}
size_t _mi_prim_numa_node_count(void) {
size_t ndomains = 0;
size_t len = sizeof(ndomains);
if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul;
return ndomains;
}
#elif defined(__DragonFly__)
size_t _mi_prim_numa_node(void) {
// TODO: DragonFly does not seem to provide any userland means to get this information.
return 0ul;
}
size_t _mi_prim_numa_node_count(void) {
size_t ncpus = 0, nvirtcoresperphys = 0;
size_t len = sizeof(size_t);
if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul;
if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul;
return nvirtcoresperphys * ncpus;
}
#else
size_t _mi_prim_numa_node(void) {
return 0;
}
size_t _mi_prim_numa_node_count(void) {
return 1;
}
#endif
// ----------------------------------------------------------------
// Clock
// ----------------------------------------------------------------
#include <time.h>
#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
mi_msecs_t _mi_prim_clock_now(void) {
struct timespec t;
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &t);
#else
clock_gettime(CLOCK_REALTIME, &t);
#endif
return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
}
#else
// low resolution timer
mi_msecs_t _mi_prim_clock_now(void) {
#if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
return (mi_msecs_t)clock();
#elif (CLOCKS_PER_SEC < 1000)
return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
#else
return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
#endif
}
#endif
//----------------------------------------------------------------
// Process info
//----------------------------------------------------------------
#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
#if defined(__APPLE__)
#include <mach/mach.h>
#endif
#if defined(__HAIKU__)
#include <kernel/OS.h>
#endif
static mi_msecs_t timeval_secs(const struct timeval* tv) {
return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
}
void _mi_prim_process_info(mi_process_info_t* pinfo)
{
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
pinfo->utime = timeval_secs(&rusage.ru_utime);
pinfo->stime = timeval_secs(&rusage.ru_stime);
#if !defined(__HAIKU__)
pinfo->page_faults = rusage.ru_majflt;
#endif
#if defined(__HAIKU__)
// Haiku does not have (yet?) a way to
// get these stats per process
thread_info tid;
area_info mem;
ssize_t c;
get_thread_info(find_thread(0), &tid);
while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
pinfo->peak_rss += mem.ram_size;
}
pinfo->page_faults = 0;
#elif defined(__APPLE__)
pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes
#ifdef MACH_TASK_BASIC_INFO
struct mach_task_basic_info info;
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
pinfo->current_rss = (size_t)info.resident_size;
}
#else
struct task_basic_info info;
mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
pinfo->current_rss = (size_t)info.resident_size;
}
#endif
#else
pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB
#endif
// use defaults for commit
}
#else
#ifndef __wasi__
// WebAssembly instances are not processes
#pragma message("define a way to get process info")
#endif
void _mi_prim_process_info(mi_process_info_t* pinfo)
{
// use defaults
MI_UNUSED(pinfo);
}
#endif
//----------------------------------------------------------------
// Output
//----------------------------------------------------------------
void _mi_prim_out_stderr( const char* msg ) {
fputs(msg,stderr);
}
//----------------------------------------------------------------
// Environment
//----------------------------------------------------------------
#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0)
// On Posix systemsr use `environ` to access environment variables
// even before the C runtime is initialized.
#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>)
#include <crt_externs.h>
static char** mi_get_environ(void) {
return (*_NSGetEnviron());
}
#else
extern char** environ;
static char** mi_get_environ(void) {
return environ;
}
#endif
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
if (name==NULL) return false;
const size_t len = _mi_strlen(name);
if (len == 0) return false;
char** env = mi_get_environ();
if (env == NULL) return false;
// compare up to 10000 entries
for (int i = 0; i < 10000 && env[i] != NULL; i++) {
const char* s = env[i];
if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive
// found it
_mi_strlcpy(result, s + len + 1, result_size);
return true;
}
}
return false;
}
#else
// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
// cannot call getenv() when still initializing the C runtime.
if (_mi_preloading()) return false;
const char* s = getenv(name);
if (s == NULL) {
// we check the upper case name too.
char buf[64+1];
size_t len = _mi_strnlen(name,sizeof(buf)-1);
for (size_t i = 0; i < len; i++) {
buf[i] = _mi_toupper(name[i]);
}
buf[len] = 0;
s = getenv(buf);
}
if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false;
_mi_strlcpy(result, s, result_size);
return true;
}
#endif // !MI_USE_ENVIRON
//----------------------------------------------------------------
// Random
//----------------------------------------------------------------
#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_15) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15)
#include <CommonCrypto/CommonCryptoError.h>
#include <CommonCrypto/CommonRandom.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
// We prefer CCRandomGenerateBytes as it returns an error code while arc4random_buf
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
}
#elif defined(__ANDROID__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__sun) || \
(defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7))
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;
}
#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // also for old apple versions < 10.7 (issue #829)
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
// Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
// and for the latter the actual `getrandom` call is not always defined.
// (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
// We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom)
#ifndef GRND_NONBLOCK
#define GRND_NONBLOCK (1)
#endif
static _Atomic(uintptr_t) no_getrandom; // = 0
if (mi_atomic_load_acquire(&no_getrandom)==0) {
ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
if (ret >= 0) return (buf_len == (size_t)ret);
if (errno != ENOSYS) return false;
mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom
}
#endif
int flags = O_RDONLY;
#if defined(O_CLOEXEC)
flags |= O_CLOEXEC;
#endif
int fd = mi_prim_open("/dev/urandom", flags);
if (fd < 0) return false;
size_t count = 0;
while(count < buf_len) {
ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count);
if (ret<=0) {
if (errno!=EAGAIN && errno!=EINTR) break;
}
else {
count += ret;
}
}
mi_prim_close(fd);
return (count==buf_len);
}
#else
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
return false;
}
#endif
//----------------------------------------------------------------
// Thread init/done
//----------------------------------------------------------------
#if defined(MI_USE_PTHREADS)
// use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
static void mi_pthread_done(void* value) {
if (value!=NULL) {
_mi_thread_done((mi_heap_t*)value);
}
}
void _mi_prim_thread_init_auto_done(void) {
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
}
void _mi_prim_thread_done_auto_done(void) {
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809
pthread_key_delete(_mi_heap_default_key);
}
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
pthread_setspecific(_mi_heap_default_key, heap);
}
}
#else
void _mi_prim_thread_init_auto_done(void) {
// nothing
}
void _mi_prim_thread_done_auto_done(void) {
// nothing
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif

View File

@ -0,0 +1,879 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// This file is included in `src/prim/prim.c`
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h"
#include <stdio.h> // fputs, stderr
// xbox has no console IO
#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM)
#define MI_HAS_CONSOLE_IO
#endif
//---------------------------------------------
// Dynamically bind Windows API points for portability
//---------------------------------------------
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's.
typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
MiMemExtendedParameterInvalidType = 0,
MiMemExtendedParameterAddressRequirements,
MiMemExtendedParameterNumaNode,
MiMemExtendedParameterPartitionHandle,
MiMemExtendedParameterUserPhysicalHandle,
MiMemExtendedParameterAttributeFlags,
MiMemExtendedParameterMax
} MI_MEM_EXTENDED_PARAMETER_TYPE;
typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg;
} MI_MEM_EXTENDED_PARAMETER;
typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
PVOID LowestStartingAddress;
PVOID HighestEndingAddress;
SIZE_T Alignment;
} MI_MEM_ADDRESS_REQUIREMENTS;
#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010
#include <winternl.h>
typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
typedef LONG (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); // avoid NTSTATUS as it is not defined on xbox (pr #1084)
static PVirtualAlloc2 pVirtualAlloc2 = NULL;
static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 (and GetNumaNodeProcessorMask is not supported on xbox)
typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask);
typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node);
static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
static PGetNumaProcessorNode pGetNumaProcessorNode = NULL;
static PGetNumaNodeProcessorMask pGetNumaNodeProcessorMask = NULL;
static PGetNumaHighestNodeNumber pGetNumaHighestNodeNumber = NULL;
// Not available on xbox
typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID);
static PGetLargePageMinimum pGetLargePageMinimum = NULL;
// Available after Windows XP
typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes );
//---------------------------------------------
// Enable large page support dynamically (if possible)
//---------------------------------------------
static bool win_enable_large_os_pages(size_t* large_page_size)
{
static bool large_initialized = false;
if (large_initialized) return (_mi_os_large_page_size() > 0);
large_initialized = true;
if (pGetLargePageMinimum==NULL) return false; // no large page support (xbox etc.)
// Try to see if large OS pages are supported
// To use large pages on Windows, we first need access permission
// Set "Lock pages in memory" permission in the group policy editor
// <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643>
unsigned long err = 0;
HANDLE token = NULL;
BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
if (ok) {
TOKEN_PRIVILEGES tp;
ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid);
if (ok) {
tp.PrivilegeCount = 1;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
if (ok) {
err = GetLastError();
ok = (err == ERROR_SUCCESS);
if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) {
*large_page_size = (*pGetLargePageMinimum)();
}
}
}
CloseHandle(token);
}
if (!ok) {
if (err == 0) err = GetLastError();
_mi_warning_message("cannot enable large OS page support, error %lu\n", err);
}
return (ok!=0);
}
//---------------------------------------------
// Initialize
//---------------------------------------------
void _mi_prim_mem_init( mi_os_mem_config_t* config )
{
config->has_overcommit = false;
config->has_partial_free = false;
config->has_virtual_reserve = true;
// get the page size
SYSTEM_INFO si;
GetSystemInfo(&si);
if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
// get virtual address bits
if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
config->virtual_address_bits = vbits;
}
// get the VirtualAlloc2 function
HINSTANCE hDll;
hDll = LoadLibrary(TEXT("kernelbase.dll"));
if (hDll != NULL) {
// use VirtualAlloc2FromApp if possible as it is available to Windows store apps
pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
FreeLibrary(hDll);
}
// NtAllocateVirtualMemoryEx is used for huge page allocation
hDll = LoadLibrary(TEXT("ntdll.dll"));
if (hDll != NULL) {
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
FreeLibrary(hDll);
}
// Try to use Win7+ numa API
hDll = LoadLibrary(TEXT("kernel32.dll"));
if (hDll != NULL) {
pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask");
pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber");
pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum");
// Get physical memory (not available on XP, so check dynamically)
PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory");
if (pGetPhysicallyInstalledSystemMemory != NULL) {
ULONGLONG memInKiB = 0;
if ((*pGetPhysicallyInstalledSystemMemory)(&memInKiB)) {
if (memInKiB > 0 && memInKiB <= SIZE_MAX) {
config->physical_memory_in_kib = (size_t)memInKiB;
}
}
}
FreeLibrary(hDll);
}
// Enable large/huge OS page support?
if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
win_enable_large_os_pages(&config->large_page_size);
}
}
//---------------------------------------------
// Free
//---------------------------------------------
int _mi_prim_free(void* addr, size_t size ) {
MI_UNUSED(size);
DWORD errcode = 0;
bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
if (err) { errcode = GetLastError(); }
if (errcode == ERROR_INVALID_ADDRESS) {
// In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside
// the memory region returned by VirtualAlloc; in that case we need to free using
// the start of the region.
MEMORY_BASIC_INFORMATION info; _mi_memzero_var(info);
VirtualQuery(addr, &info, sizeof(info));
if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
errcode = 0;
err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
if (err) { errcode = GetLastError(); }
}
}
return (int)errcode;
}
//---------------------------------------------
// VirtualAlloc
//---------------------------------------------
static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_alignment, DWORD flags) {
#if (MI_INTPTR_SIZE >= 8)
// on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
if (addr == NULL) {
void* hint = _mi_os_get_aligned_hint(try_alignment,size);
if (hint != NULL) {
void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
if (p != NULL) return p;
_mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
// fall through on error
}
}
#endif
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
reqs.Alignment = try_alignment;
MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
param.Type.Type = MiMemExtendedParameterAddressRequirements;
param.Arg.Pointer = &reqs;
void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
if (p != NULL) return p;
_mi_warning_message("unable to allocate aligned OS memory (0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags);
// fall through on error
}
// last resort
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
}
static bool win_is_out_of_memory_error(DWORD err) {
switch (err) {
case ERROR_COMMITMENT_MINIMUM:
case ERROR_COMMITMENT_LIMIT:
case ERROR_PAGEFILE_QUOTA:
case ERROR_NOT_ENOUGH_MEMORY:
return true;
default:
return false;
}
}
static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) {
long max_retry_msecs = mi_option_get_clamp(mi_option_retry_on_oom, 0, 2000); // at most 2 seconds
if (max_retry_msecs == 1) { max_retry_msecs = 100; } // if one sets the option to "true"
for (long tries = 1; tries <= 10; tries++) { // try at most 10 times (=2200ms)
void* p = win_virtual_alloc_prim_once(addr, size, try_alignment, flags);
if (p != NULL) {
// success, return the address
return p;
}
else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) &&
(flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
win_is_out_of_memory_error(GetLastError())) {
// if committing regular memory and being out-of-memory,
// keep trying for a bit in case memory frees up after all. See issue #894
_mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags);
long sleep_msecs = tries*40; // increasing waits
if (sleep_msecs > max_retry_msecs) { sleep_msecs = max_retry_msecs; }
max_retry_msecs -= sleep_msecs;
Sleep(sleep_msecs);
}
else {
// otherwise return with an error
break;
}
}
return NULL;
}
static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
mi_assert_internal(!(large_only && !allow_large));
static _Atomic(size_t) large_page_try_ok; // = 0;
void* p = NULL;
// Try to allocate large OS pages (2MiB) if allowed or required.
if ((large_only || (_mi_os_canuse_large_page(size, try_alignment) && mi_option_is_enabled(mi_option_allow_large_os_pages)))
&& allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0)
{
size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
if (!large_only && try_ok > 0) {
// if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
// therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
}
else {
// large OS pages must always reserve and commit.
*is_large = true;
p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES);
if (large_only) return p;
// fall back to non-large page allocation on error (`p == NULL`).
if (p == NULL) {
mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations
}
}
}
// Fall back to regular page allocation
if (p == NULL) {
*is_large = ((flags&MEM_LARGE_PAGES) != 0);
p = win_virtual_alloc_prim(addr, size, try_alignment, flags);
}
//if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); }
return p;
}
int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(commit || !allow_large);
mi_assert_internal(try_alignment > 0);
*is_zero = true;
int flags = MEM_RESERVE;
if (commit) { flags |= MEM_COMMIT; }
*addr = win_virtual_alloc(hint_addr, size, try_alignment, flags, false, allow_large, is_large);
return (*addr != NULL ? 0 : (int)GetLastError());
}
//---------------------------------------------
// Commit/Reset/Protect
//---------------------------------------------
#ifdef _MSC_VER
#pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit)
#endif
int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
*is_zero = false;
/*
// zero'ing only happens on an initial commit... but checking upfront seems expensive..
_MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo);
if (VirtualQuery(addr, &meminfo, size) > 0) {
if ((meminfo.State & MEM_COMMIT) == 0) {
*is_zero = true;
}
}
*/
// commit
void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
if (p == NULL) return (int)GetLastError();
return 0;
}
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
*needs_recommit = true; // for safety, assume always decommitted even in the case of an error.
return (ok ? 0 : (int)GetLastError());
}
int _mi_prim_reset(void* addr, size_t size) {
void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
mi_assert_internal(p == addr);
#if 0
if (p != NULL) {
VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set
}
#endif
return (p != NULL ? 0 : (int)GetLastError());
}
int _mi_prim_reuse(void* addr, size_t size) {
MI_UNUSED(addr); MI_UNUSED(size);
return 0;
}
int _mi_prim_protect(void* addr, size_t size, bool protect) {
DWORD oldprotect = 0;
BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);
return (ok ? 0 : (int)GetLastError());
}
//---------------------------------------------
// Huge page allocation
//---------------------------------------------
static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node)
{
const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
win_enable_large_os_pages(NULL);
MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
// on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
static bool mi_huge_pages_available = true;
if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
params[0].Type.Type = MiMemExtendedParameterAttributeFlags;
params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
ULONG param_count = 1;
if (numa_node >= 0) {
param_count++;
params[1].Type.Type = MiMemExtendedParameterNumaNode;
params[1].Arg.ULong = (unsigned)numa_node;
}
SIZE_T psize = size;
void* base = hint_addr;
LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
if (err == 0 && base != NULL) {
return base;
}
else {
// fall back to regular large pages
mi_huge_pages_available = false; // don't try further huge pages
_mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
}
}
// on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
if (pVirtualAlloc2 != NULL && numa_node >= 0) {
params[0].Type.Type = MiMemExtendedParameterNumaNode;
params[0].Arg.ULong = (unsigned)numa_node;
return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1);
}
// otherwise use regular virtual alloc on older windows
return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
}
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
*is_zero = true;
*addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
return (*addr != NULL ? 0 : (int)GetLastError());
}
//---------------------------------------------
// Numa nodes
//---------------------------------------------
size_t _mi_prim_numa_node(void) {
USHORT numa_node = 0;
if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
// Extended API is supported
MI_PROCESSOR_NUMBER pnum;
(*pGetCurrentProcessorNumberEx)(&pnum);
USHORT nnode = 0;
BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
if (ok) { numa_node = nnode; }
}
else if (pGetNumaProcessorNode != NULL) {
// Vista or earlier, use older API that is limited to 64 processors. Issue #277
DWORD pnum = GetCurrentProcessorNumber();
UCHAR nnode = 0;
BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
if (ok) { numa_node = nnode; }
}
return numa_node;
}
size_t _mi_prim_numa_node_count(void) {
ULONG numa_max = 0;
if (pGetNumaHighestNodeNumber!=NULL) {
(*pGetNumaHighestNodeNumber)(&numa_max);
}
// find the highest node number that has actual processors assigned to it. Issue #282
while (numa_max > 0) {
if (pGetNumaNodeProcessorMaskEx != NULL) {
// Extended API is supported
GROUP_AFFINITY affinity;
if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) {
if (affinity.Mask != 0) break; // found the maximum non-empty node
}
}
else {
// Vista or earlier, use older API that is limited to 64 processors.
ULONGLONG mask;
if (pGetNumaNodeProcessorMask != NULL) {
if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) {
if (mask != 0) break; // found the maximum non-empty node
}
};
}
// max node was invalid or had no processor assigned, try again
numa_max--;
}
return ((size_t)numa_max + 1);
}
//----------------------------------------------------------------
// Clock
//----------------------------------------------------------------
static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
static LARGE_INTEGER mfreq; // = 0
if (mfreq.QuadPart == 0LL) {
LARGE_INTEGER f;
QueryPerformanceFrequency(&f);
mfreq.QuadPart = f.QuadPart/1000LL;
if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
}
return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
}
mi_msecs_t _mi_prim_clock_now(void) {
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
return mi_to_msecs(t);
}
//----------------------------------------------------------------
// Process Info
//----------------------------------------------------------------
#include <psapi.h>
static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
ULARGE_INTEGER i;
i.LowPart = ftime->dwLowDateTime;
i.HighPart = ftime->dwHighDateTime;
mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
return msecs;
}
typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
void _mi_prim_process_info(mi_process_info_t* pinfo)
{
FILETIME ct;
FILETIME ut;
FILETIME st;
FILETIME et;
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
pinfo->utime = filetime_msecs(&ut);
pinfo->stime = filetime_msecs(&st);
// load psapi on demand
if (pGetProcessMemoryInfo == NULL) {
HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
if (hDll != NULL) {
pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
}
}
// get process info
PROCESS_MEMORY_COUNTERS info; _mi_memzero_var(info);
if (pGetProcessMemoryInfo != NULL) {
pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
}
pinfo->current_rss = (size_t)info.WorkingSetSize;
pinfo->peak_rss = (size_t)info.PeakWorkingSetSize;
pinfo->current_commit = (size_t)info.PagefileUsage;
pinfo->peak_commit = (size_t)info.PeakPagefileUsage;
pinfo->page_faults = (size_t)info.PageFaultCount;
}
//----------------------------------------------------------------
// Output
//----------------------------------------------------------------
void _mi_prim_out_stderr( const char* msg )
{
// on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output.
if (!_mi_preloading()) {
// _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console
static HANDLE hcon = INVALID_HANDLE_VALUE;
static bool hconIsConsole = false;
if (hcon == INVALID_HANDLE_VALUE) {
hcon = GetStdHandle(STD_ERROR_HANDLE);
#ifdef MI_HAS_CONSOLE_IO
CONSOLE_SCREEN_BUFFER_INFO sbi;
hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
#endif
}
const size_t len = _mi_strlen(msg);
if (len > 0 && len < UINT32_MAX) {
DWORD written = 0;
if (hconIsConsole) {
#ifdef MI_HAS_CONSOLE_IO
WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
#endif
}
else if (hcon != INVALID_HANDLE_VALUE) {
// use direct write if stderr was redirected
WriteFile(hcon, msg, (DWORD)len, &written, NULL);
}
else {
// finally fall back to fputs after all
fputs(msg, stderr);
}
}
}
}
//----------------------------------------------------------------
// Environment
//----------------------------------------------------------------
// On Windows use GetEnvironmentVariable instead of getenv to work
// reliably even when this is invoked before the C runtime is initialized.
// i.e. when `_mi_preloading() == true`.
// Note: on windows, environment names are not case sensitive.
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
result[0] = 0;
size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
return (len > 0 && len < result_size);
}
//----------------------------------------------------------------
// Random
//----------------------------------------------------------------
#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
// dynamic overriding, we observed it can raise an exception when compiled with C++, and
// sometimes deadlocks when also running under the VS debugger.
// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
// To be continued..
#pragma comment (lib,"advapi32.lib")
#define RtlGenRandom SystemFunction036
mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
}
#else
#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
#endif
typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
static PBCryptGenRandom pBCryptGenRandom = NULL;
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
if (pBCryptGenRandom == NULL) {
HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
if (hDll != NULL) {
pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
}
if (pBCryptGenRandom == NULL) return false;
}
return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
}
#endif // MI_USE_RTLGENRANDOM
//----------------------------------------------------------------
// Process & Thread Init/Done
//----------------------------------------------------------------
#if MI_WIN_USE_FIXED_TLS==1
mi_decl_cache_align size_t _mi_win_tls_offset = 0;
#endif
//static void mi_debug_out(const char* s) {
// HANDLE h = GetStdHandle(STD_ERROR_HANDLE);
// WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL);
//}
static void mi_win_tls_init(DWORD reason) {
if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
#if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically
if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) {
const DWORD tls_slot = TlsAlloc(); // usually returns slot 1
if (tls_slot == TLS_OUT_OF_INDEXES) {
_mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
}
_mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
}
#endif
#if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation
if (mi_prim_get_default_heap() == NULL) {
_mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
#if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
mi_assert_internal(p == (void*)&_mi_heap_empty);
#endif
}
#endif
}
}
static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
MI_UNUSED(reserved);
MI_UNUSED(module);
mi_win_tls_init(reason);
if (reason==DLL_PROCESS_ATTACH) {
_mi_auto_process_init();
}
else if (reason==DLL_PROCESS_DETACH) {
_mi_auto_process_done();
}
else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) {
_mi_thread_done(NULL);
}
}
#if defined(MI_SHARED_LIB)
#define MI_PRIM_HAS_PROCESS_ATTACH 1
// Windows DLL: easy to hook into process_init and thread_done
BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
mi_win_main((PVOID)inst,reason,reserved);
return TRUE;
}
// nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
void _mi_prim_thread_init_auto_done(void) { }
void _mi_prim_thread_done_auto_done(void) { }
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#elif !defined(MI_WIN_USE_FLS)
#define MI_PRIM_HAS_PROCESS_ATTACH 1
static void NTAPI mi_win_main_attach(PVOID module, DWORD reason, LPVOID reserved) {
if (reason == DLL_PROCESS_ATTACH || reason == DLL_THREAD_ATTACH) {
mi_win_main(module, reason, reserved);
}
}
static void NTAPI mi_win_main_detach(PVOID module, DWORD reason, LPVOID reserved) {
if (reason == DLL_PROCESS_DETACH || reason == DLL_THREAD_DETACH) {
mi_win_main(module, reason, reserved);
}
}
// Set up TLS callbacks in a statically linked library by using special data sections.
// See <https://stackoverflow.com/questions/14538159/tls-callback-in-windows>
// We use 2 entries to ensure we call attach events before constructors
// are called, and detach events after destructors are called.
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(_WIN64)
#pragma comment(linker, "/INCLUDE:_tls_used")
#pragma comment(linker, "/INCLUDE:_mi_tls_callback_pre")
#pragma comment(linker, "/INCLUDE:_mi_tls_callback_post")
#pragma const_seg(".CRT$XLB")
extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[];
const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach };
#pragma const_seg()
#pragma const_seg(".CRT$XLY")
extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[];
const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
#pragma const_seg()
#else
#pragma comment(linker, "/INCLUDE:__tls_used")
#pragma comment(linker, "/INCLUDE:__mi_tls_callback_pre")
#pragma comment(linker, "/INCLUDE:__mi_tls_callback_post")
#pragma data_seg(".CRT$XLB")
PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach };
#pragma data_seg()
#pragma data_seg(".CRT$XLY")
PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
#pragma data_seg()
#endif
#if defined(__cplusplus)
}
#endif
// nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
void _mi_prim_thread_init_auto_done(void) { }
void _mi_prim_thread_done_auto_done(void) { }
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#else // deprecated: statically linked, use fiber api
#if defined(_MSC_VER) // on clang/gcc use the constructor attribute (in `src/prim/prim.c`)
// MSVC: use data section magic for static libraries
// See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm>
#define MI_PRIM_HAS_PROCESS_ATTACH 1
static int mi_process_attach(void) {
mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL);
atexit(&_mi_auto_process_done);
return 0;
}
typedef int(*mi_crt_callback_t)(void);
#if defined(_WIN64)
#pragma comment(linker, "/INCLUDE:_mi_tls_callback")
#pragma section(".CRT$XIU", long, read)
#else
#pragma comment(linker, "/INCLUDE:__mi_tls_callback")
#endif
#pragma data_seg(".CRT$XIU")
mi_decl_externc mi_crt_callback_t _mi_tls_callback[] = { &mi_process_attach };
#pragma data_seg()
#endif
// use the fiber api for calling `_mi_thread_done`.
#include <fibersapi.h>
#if (_WIN32_WINNT < 0x600) // before Windows Vista
WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex);
#endif
static DWORD mi_fls_key = (DWORD)(-1);
static void NTAPI mi_fls_done(PVOID value) {
mi_heap_t* heap = (mi_heap_t*)value;
if (heap != NULL) {
_mi_thread_done(heap);
FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672
}
}
void _mi_prim_thread_init_auto_done(void) {
mi_fls_key = FlsAlloc(&mi_fls_done);
}
void _mi_prim_thread_done_auto_done(void) {
// call thread-done on all threads (except the main thread) to prevent
// dangling callback pointer if statically linked with a DLL; Issue #208
FlsFree(mi_fls_key);
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
mi_assert_internal(mi_fls_key != (DWORD)(-1));
FlsSetValue(mi_fls_key, heap);
}
#endif
// ----------------------------------------------------
// Communicate with the redirection module on Windows
// ----------------------------------------------------
#if defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT)
#define MI_PRIM_HAS_ALLOCATOR_INIT 1
static bool mi_redirected = false; // true if malloc redirects to mi_malloc
bool _mi_is_redirected(void) {
return mi_redirected;
}
#ifdef __cplusplus
extern "C" {
#endif
mi_decl_export void _mi_redirect_entry(DWORD reason) {
// called on redirection; careful as this may be called before DllMain
mi_win_tls_init(reason);
if (reason == DLL_PROCESS_ATTACH) {
mi_redirected = true;
}
else if (reason == DLL_PROCESS_DETACH) {
mi_redirected = false;
}
else if (reason == DLL_THREAD_DETACH) {
_mi_thread_done(NULL);
}
}
__declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message);
__declspec(dllimport) void mi_cdecl mi_allocator_done(void);
#ifdef __cplusplus
}
#endif
bool _mi_allocator_init(const char** message) {
return mi_allocator_init(message);
}
void _mi_allocator_done(void) {
mi_allocator_done();
}
#endif

258
compat/mimalloc/random.c Normal file
View File

@ -0,0 +1,258 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h" // _mi_prim_random_buf
#include <string.h> // memset
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
and to avoid implementations that use a lock. We only use the OS provided
random source to initialize the initial seeds. Since we do not need ultimate
performance but we do rely on the security (for secret cookies in secure mode)
we use a cryptographically secure generator (chacha20).
-----------------------------------------------------------------------------*/
#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
/* ----------------------------------------------------------------------------
Chacha20 implementation as the original algorithm with a 64-bit nonce
and counter: https://en.wikipedia.org/wiki/Salsa20
The input matrix has sixteen 32-bit values:
Position 0 to 3: constant key
Position 4 to 11: the key
Position 12 to 13: the counter.
Position 14 to 15: the nonce.
The implementation uses regular C code which compiles very well on modern compilers.
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
-----------------------------------------------------------------------------*/
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
return (x << shift) | (x >> (32 - shift));
}
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
{
// scramble into `x`
uint32_t x[16];
for (size_t i = 0; i < 16; i++) {
x[i] = ctx->input[i];
}
for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
qround(x, 0, 4, 8, 12);
qround(x, 1, 5, 9, 13);
qround(x, 2, 6, 10, 14);
qround(x, 3, 7, 11, 15);
qround(x, 0, 5, 10, 15);
qround(x, 1, 6, 11, 12);
qround(x, 2, 7, 8, 13);
qround(x, 3, 4, 9, 14);
}
// add scrambled data to the initial state
for (size_t i = 0; i < 16; i++) {
ctx->output[i] = x[i] + ctx->input[i];
}
ctx->output_available = 16;
// increment the counter for the next round
ctx->input[12] += 1;
if (ctx->input[12] == 0) {
ctx->input[13] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
}
}
}
static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
if (ctx->output_available <= 0) {
chacha_block(ctx);
ctx->output_available = 16; // (assign again to suppress static analysis warning)
}
const uint32_t x = ctx->output[16 - ctx->output_available];
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
ctx->output_available--;
return x;
}
static inline uint32_t read32(const uint8_t* p, size_t idx32) {
const size_t i = 4*idx32;
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
}
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
{
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
for (size_t i = 0; i < 4; i++) {
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
ctx->input[i] = read32(sigma,i);
}
for (size_t i = 0; i < 8; i++) {
ctx->input[i + 4] = read32(key,i);
}
ctx->input[12] = 0;
ctx->input[13] = 0;
ctx->input[14] = (uint32_t)nonce;
ctx->input[15] = (uint32_t)(nonce >> 32);
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
_mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
ctx_new->input[14] = (uint32_t)nonce;
ctx_new->input[15] = (uint32_t)(nonce >> 32);
mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
chacha_block(ctx_new);
}
/* ----------------------------------------------------------------------------
Random interface
-----------------------------------------------------------------------------*/
#if MI_DEBUG>1
static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
return (ctx != NULL && ctx->input[0] != 0);
}
#endif
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
mi_assert_internal(mi_random_is_initialized(ctx));
mi_assert_internal(ctx != ctx_new);
chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
}
uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
mi_assert_internal(mi_random_is_initialized(ctx));
uintptr_t r;
do {
#if MI_INTPTR_SIZE <= 4
r = chacha_next32(ctx);
#elif MI_INTPTR_SIZE == 8
r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
#else
# error "define mi_random_next for this platform"
#endif
} while (r==0);
return r;
}
/* ----------------------------------------------------------------------------
To initialize a fresh random context.
If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
-----------------------------------------------------------------------------*/
uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
x ^= _mi_prim_clock_now();
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max || x==0; i++, x++) {
x = _mi_random_shuffle(x);
}
mi_assert_internal(x != 0);
return x;
}
static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
uint8_t key[32];
if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
#if !defined(__wasi__)
if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); }
#endif
uintptr_t x = _mi_os_random_weak(0);
for (size_t i = 0; i < 8; i++, x++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
}
ctx->weak = true;
}
else {
ctx->weak = false;
}
chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
}
void _mi_random_init(mi_random_ctx_t* ctx) {
mi_random_init_ex(ctx, false);
}
void _mi_random_init_weak(mi_random_ctx_t * ctx) {
mi_random_init_ex(ctx, true);
}
void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx) {
if (ctx->weak) {
_mi_random_init(ctx);
}
}
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
/*
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
for (size_t i = 0; i < n; i++) {
if (x[i] != y[i]) return false;
}
return true;
}
static void chacha_test(void)
{
uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
qround(x, 0, 1, 2, 3);
mi_assert_internal(array_equals(x, x_out, 4));
uint32_t y[16] = {
0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
uint32_t y_out[16] = {
0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
qround(y, 2, 7, 8, 13);
mi_assert_internal(array_equals(y, y_out, 16));
mi_random_ctx_t r = {
{ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
{0},
0
};
uint32_t r_out[16] = {
0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
chacha_block(&r);
mi_assert_internal(array_equals(r.output, r_out, 16));
}
*/

View File

@ -0,0 +1,142 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* -----------------------------------------------------------
The following functions are to reliably find the segment or
block that encompasses any pointer p (or NULL if it is not
in any of our segments).
We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
set to 1 if it contains the segment meta data.
----------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
// Reduce total address space to reduce .bss (due to the `mi_segment_map`)
#if (MI_INTPTR_SIZE > 4) && MI_TRACK_ASAN
#define MI_SEGMENT_MAP_MAX_ADDRESS (128*1024ULL*MI_GiB) // 128 TiB (see issue #881)
#elif (MI_INTPTR_SIZE > 4)
#define MI_SEGMENT_MAP_MAX_ADDRESS (48*1024ULL*MI_GiB) // 48 TiB
#else
#define MI_SEGMENT_MAP_MAX_ADDRESS (UINT32_MAX)
#endif
#define MI_SEGMENT_MAP_PART_SIZE (MI_INTPTR_SIZE*MI_KiB - 128) // 128 > sizeof(mi_memid_t) !
#define MI_SEGMENT_MAP_PART_BITS (8*MI_SEGMENT_MAP_PART_SIZE)
#define MI_SEGMENT_MAP_PART_ENTRIES (MI_SEGMENT_MAP_PART_SIZE / MI_INTPTR_SIZE)
#define MI_SEGMENT_MAP_PART_BIT_SPAN (MI_SEGMENT_ALIGN) // memory area covered by 1 bit
#if (MI_SEGMENT_MAP_PART_BITS < (MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_BIT_SPAN)) // prevent overflow on 32-bit (issue #1017)
#define MI_SEGMENT_MAP_PART_SPAN (MI_SEGMENT_MAP_PART_BITS * MI_SEGMENT_MAP_PART_BIT_SPAN)
#else
#define MI_SEGMENT_MAP_PART_SPAN MI_SEGMENT_MAP_MAX_ADDRESS
#endif
#define MI_SEGMENT_MAP_MAX_PARTS ((MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_SPAN) + 1)
// A part of the segment map.
typedef struct mi_segmap_part_s {
mi_memid_t memid;
_Atomic(uintptr_t) map[MI_SEGMENT_MAP_PART_ENTRIES];
} mi_segmap_part_t;
// Allocate parts on-demand to reduce .bss footprint
static _Atomic(mi_segmap_part_t*) mi_segment_map[MI_SEGMENT_MAP_MAX_PARTS]; // = { NULL, .. }
static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bool create_on_demand, size_t* idx, size_t* bitidx) {
// note: segment can be invalid or NULL.
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
*idx = 0;
*bitidx = 0;
if ((uintptr_t)segment >= MI_SEGMENT_MAP_MAX_ADDRESS) return NULL;
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_MAP_PART_SPAN;
if (segindex >= MI_SEGMENT_MAP_MAX_PARTS) return NULL;
mi_segmap_part_t* part = mi_atomic_load_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[segindex]);
// allocate on demand to reduce .bss footprint
if mi_unlikely(part == NULL) {
if (!create_on_demand) return NULL;
mi_memid_t memid;
part = (mi_segmap_part_t*)_mi_os_zalloc(sizeof(mi_segmap_part_t), &memid);
if (part == NULL) return NULL;
part->memid = memid;
mi_segmap_part_t* expected = NULL;
if (!mi_atomic_cas_ptr_strong_release(mi_segmap_part_t, &mi_segment_map[segindex], &expected, part)) {
_mi_os_free(part, sizeof(mi_segmap_part_t), memid);
part = expected;
if (part == NULL) return NULL;
}
}
mi_assert(part != NULL);
const uintptr_t offset = ((uintptr_t)segment) % MI_SEGMENT_MAP_PART_SPAN;
const uintptr_t bitofs = offset / MI_SEGMENT_MAP_PART_BIT_SPAN;
*idx = bitofs / MI_INTPTR_BITS;
*bitidx = bitofs % MI_INTPTR_BITS;
return part;
}
void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return; // we lookup segments first in the arena's and don't need the segment map
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, true /* alloc map if needed */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask | ((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
void _mi_segment_map_freed_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return;
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* don't alloc if not present */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask & ~((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* dont alloc if not present */, &index, &bitidx);
if (part == NULL) return NULL;
const uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok); MI_UNUSED(cookie_ok);
return segment; // yes, allocated by us
}
return NULL;
}
// Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) {
// first check if it is in an arena, then check if it is OS allocated
return (_mi_arena_contains(p) || _mi_segment_of(p) != NULL);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return mi_is_valid_pointer(p);
}
void _mi_segment_map_unsafe_destroy(void) {
for (size_t i = 0; i < MI_SEGMENT_MAP_MAX_PARTS; i++) {
mi_segmap_part_t* part = mi_atomic_exchange_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[i], NULL);
if (part != NULL) {
_mi_os_free(part, sizeof(mi_segmap_part_t), part->memid);
}
}
}

1706
compat/mimalloc/segment.c Normal file

File diff suppressed because it is too large Load Diff

633
compat/mimalloc/stats.c Normal file
View File

@ -0,0 +1,633 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#include <string.h> // memset
#if defined(_MSC_VER) && (_MSC_VER < 1920)
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
/* -----------------------------------------------------------
Statistics operations
----------------------------------------------------------- */
static bool mi_is_in_main(void* stat) {
return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
&& (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
}
static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
// if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->total,amount);
}
}
else {
// add thread local
stat->current += amount;
if (stat->current > stat->peak) { stat->peak = stat->current; }
if (amount > 0) { stat->total += amount; }
}
}
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
if (mi_is_in_main(stat)) {
mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
}
else {
stat->total += amount;
}
}
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, (int64_t)amount);
}
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, -((int64_t)amount));
}
static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// adjust atomically
mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_addi64_relaxed(&stat->total,amount);
}
else {
// adjust local
stat->current += amount;
stat->total += amount;
}
}
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, -((int64_t)amount));
}
// must be thread safe as it is called from stats_merge
static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) {
if (stat==src) return;
mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
const int64_t prev_current = mi_atomic_addi64_relaxed(&stat->current, src->current);
// Global current plus thread peak approximates new global peak
// note: peak scores do really not work across threads.
// we used to just add them together but that often overestimates in practice.
// similarly, max does not seem to work well. The current approach
// by Artem Kharytoniuk (@artem-lunarg) seems to work better, see PR#1112
// for a longer description.
mi_atomic_maxi64_relaxed(&stat->peak, prev_current + src->peak);
}
static void mi_stat_counter_add_mt(mi_stat_counter_t* stat, const mi_stat_counter_t* src) {
if (stat==src) return;
mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
}
#define MI_STAT_COUNT(stat) mi_stat_count_add_mt(&stats->stat, &src->stat);
#define MI_STAT_COUNTER(stat) mi_stat_counter_add_mt(&stats->stat, &src->stat);
// must be thread safe as it is called from stats_merge
static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
if (stats==src) return;
// copy all fields
MI_STAT_FIELDS()
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
mi_stat_count_add_mt(&stats->malloc_bins[i], &src->malloc_bins[i]);
}
#endif
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]);
}
}
#undef MI_STAT_COUNT
#undef MI_STAT_COUNTER
/* -----------------------------------------------------------
Display statistics
----------------------------------------------------------- */
// unit > 0 : size in binary bytes
// unit == 0: count as decimal
// unit < 0 : count in binary
static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
char buf[32]; buf[0] = 0;
int len = 32;
const char* suffix = (unit <= 0 ? " " : "B");
const int64_t base = (unit == 0 ? 1000 : 1024);
if (unit>0) n *= unit;
const int64_t pos = (n < 0 ? -n : n);
if (pos < base) {
if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column
_mi_snprintf(buf, len, "%lld %-3s", (long long)n, (n==0 ? "" : suffix));
}
}
else {
int64_t divider = base;
const char* magnitude = "K";
if (pos >= divider*base) { divider *= base; magnitude = "M"; }
if (pos >= divider*base) { divider *= base; magnitude = "G"; }
const int64_t tens = (n / (divider/10));
const long whole = (long)(tens/10);
const long frac1 = (long)(tens%10);
char unitdesc[8];
_mi_snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
_mi_snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
}
_mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf);
}
static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
mi_printf_amount(n,unit,out,arg,NULL);
}
static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
if (unit==1) _mi_fprintf(out, arg, "%12s"," ");
else mi_print_amount(n,0,out,arg);
}
static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
_mi_fprintf(out, arg,"%10s:", msg);
if (unit != 0) {
if (unit > 0) {
mi_print_amount(stat->peak, unit, out, arg);
mi_print_amount(stat->total, unit, out, arg);
// mi_print_amount(stat->freed, unit, out, arg);
mi_print_amount(stat->current, unit, out, arg);
mi_print_amount(unit, 1, out, arg);
mi_print_count(stat->total, unit, out, arg);
}
else {
mi_print_amount(stat->peak, -1, out, arg);
mi_print_amount(stat->total, -1, out, arg);
// mi_print_amount(stat->freed, -1, out, arg);
mi_print_amount(stat->current, -1, out, arg);
if (unit == -1) {
_mi_fprintf(out, arg, "%24s", "");
}
else {
mi_print_amount(-unit, 1, out, arg);
mi_print_count((stat->total / -unit), 0, out, arg);
}
}
if (stat->current != 0) {
_mi_fprintf(out, arg, " ");
_mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok));
_mi_fprintf(out, arg, "\n");
}
else {
_mi_fprintf(out, arg, " ok\n");
}
}
else {
mi_print_amount(stat->peak, 1, out, arg);
mi_print_amount(stat->total, 1, out, arg);
_mi_fprintf(out, arg, "%11s", " "); // no freed
mi_print_amount(stat->current, 1, out, arg);
_mi_fprintf(out, arg, "\n");
}
}
static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
mi_stat_print_ex(stat, msg, unit, out, arg, NULL);
}
#if MI_STAT>1
static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
_mi_fprintf(out, arg, "%10s:", msg);
_mi_fprintf(out, arg, "%12s", " "); // no peak
mi_print_amount(stat->total, unit, out, arg);
_mi_fprintf(out, arg, "\n");
}
#endif
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
_mi_fprintf(out, arg, "%10s:", msg);
mi_print_amount(stat->total, -1, out, arg);
_mi_fprintf(out, arg, "\n");
}
static void mi_stat_average_print(size_t count, size_t total, const char* msg, mi_output_fun* out, void* arg) {
const int64_t avg_tens = (count == 0 ? 0 : (total*10 / count));
const long avg_whole = (long)(avg_tens/10);
const long avg_frac1 = (long)(avg_tens%10);
_mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
}
static void mi_print_header(mi_output_fun* out, void* arg ) {
_mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "block ", "total# ");
}
#if MI_STAT>1
static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) {
bool found = false;
char buf[64];
for (size_t i = 0; i <= max; i++) {
if (bins[i].total > 0) {
found = true;
int64_t unit = _mi_bin_size((uint8_t)i);
_mi_snprintf(buf, 64, "%s %3lu", fmt, (long)i);
mi_stat_print(&bins[i], buf, unit, out, arg);
}
}
if (found) {
_mi_fprintf(out, arg, "\n");
mi_print_header(out, arg);
}
}
#endif
//------------------------------------------------------------
// Use an output wrapper for line-buffered output
// (which is nice when using loggers etc.)
//------------------------------------------------------------
typedef struct buffered_s {
mi_output_fun* out; // original output function
void* arg; // and state
char* buf; // local buffer of at least size `count+1`
size_t used; // currently used chars `used <= count`
size_t count; // total chars available for output
} buffered_t;
static void mi_buffered_flush(buffered_t* buf) {
buf->buf[buf->used] = 0;
_mi_fputs(buf->out, buf->arg, NULL, buf->buf);
buf->used = 0;
}
static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
buffered_t* buf = (buffered_t*)arg;
if (msg==NULL || buf==NULL) return;
for (const char* src = msg; *src != 0; src++) {
char c = *src;
if (buf->used >= buf->count) mi_buffered_flush(buf);
mi_assert_internal(buf->used < buf->count);
buf->buf[buf->used++] = c;
if (c == '\n') mi_buffered_flush(buf);
}
}
//------------------------------------------------------------
// Print statistics
//------------------------------------------------------------
static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
// wrap the output function to be line buffered
char buf[256];
buffered_t buffer = { out0, arg0, NULL, 0, 255 };
buffer.buf = buf;
mi_output_fun* out = &mi_buffered_out;
void* arg = &buffer;
// and print using that
mi_print_header(out,arg);
#if MI_STAT>1
mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "bin",out,arg);
#endif
#if MI_STAT
mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
// mi_stat_print(&stats->malloc_large, "large", (stats->malloc_large_count.total == 0 ? 1 : -1), out, arg);
mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
mi_stat_count_t total = { 0,0,0 };
mi_stat_count_add_mt(&total, &stats->malloc_normal);
// mi_stat_count_add(&total, &stats->malloc_large);
mi_stat_count_add_mt(&total, &stats->malloc_huge);
mi_stat_print_ex(&total, "total", 1, out, arg, "");
#endif
#if MI_STAT>1
mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg);
_mi_fprintf(out, arg, "\n");
#endif
mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");
mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
mi_stat_counter_print(&stats->reset, "reset", out, arg );
mi_stat_counter_print(&stats->purged, "purged", out, arg );
mi_stat_print_ex(&stats->page_committed, "touched", 1, out, arg, "");
mi_stat_print(&stats->segments, "segments", -1, out, arg);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
mi_stat_print(&stats->pages, "pages", -1, out, arg);
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
mi_stat_counter_print(&stats->pages_retire, "-retire", out, arg);
mi_stat_counter_print(&stats->arena_count, "arenas", out, arg);
// mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg);
mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg);
mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);
mi_stat_counter_print(&stats->purge_calls, "purges", out, arg);
mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg);
mi_stat_print(&stats->threads, "threads", -1, out, arg);
mi_stat_average_print(stats->page_searches_count.total, stats->page_searches.total, "searches", out, arg);
_mi_fprintf(out, arg, "%10s: %5i\n", "numa nodes", _mi_os_numa_node_count());
size_t elapsed;
size_t user_time;
size_t sys_time;
size_t current_rss;
size_t peak_rss;
size_t current_commit;
size_t peak_commit;
size_t page_faults;
mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
_mi_fprintf(out, arg, "%10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000);
_mi_fprintf(out, arg, "%10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, peak rss: ", "process",
user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults );
mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
if (peak_commit > 0) {
_mi_fprintf(out, arg, ", peak commit: ");
mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
}
_mi_fprintf(out, arg, "\n");
}
static mi_msecs_t mi_process_start; // = 0
static mi_stats_t* mi_stats_get_default(void) {
mi_heap_t* heap = mi_heap_get_default();
return &heap->tld->stats;
}
static void mi_stats_merge_from(mi_stats_t* stats) {
if (stats != &_mi_stats_main) {
mi_stats_add(&_mi_stats_main, stats);
memset(stats, 0, sizeof(mi_stats_t));
}
}
void mi_stats_reset(void) mi_attr_noexcept {
mi_stats_t* stats = mi_stats_get_default();
if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
memset(&_mi_stats_main, 0, sizeof(mi_stats_t));
if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
}
void mi_stats_merge(void) mi_attr_noexcept {
mi_stats_merge_from( mi_stats_get_default() );
}
void _mi_stats_merge_thread(mi_tld_t* tld) {
mi_stats_merge_from( &tld->stats );
}
void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done`
mi_stats_merge_from(stats);
}
void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
mi_stats_merge_from(mi_stats_get_default());
_mi_stats_print(&_mi_stats_main, out, arg);
}
void mi_stats_print(void* out) mi_attr_noexcept {
// for compatibility there is an `out` parameter (which can be `stdout` or `stderr`)
mi_stats_print_out((mi_output_fun*)out, NULL);
}
void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
_mi_stats_print(mi_stats_get_default(), out, arg);
}
// ----------------------------------------------------------------
// Basic timer for convenience; use milli-seconds to avoid doubles
// ----------------------------------------------------------------
static mi_msecs_t mi_clock_diff;
mi_msecs_t _mi_clock_now(void) {
return _mi_prim_clock_now();
}
mi_msecs_t _mi_clock_start(void) {
if (mi_clock_diff == 0.0) {
mi_msecs_t t0 = _mi_clock_now();
mi_clock_diff = _mi_clock_now() - t0;
}
return _mi_clock_now();
}
mi_msecs_t _mi_clock_end(mi_msecs_t start) {
mi_msecs_t end = _mi_clock_now();
return (end - start - mi_clock_diff);
}
// --------------------------------------------------------
// Basic process statistics
// --------------------------------------------------------
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
{
mi_process_info_t pinfo;
_mi_memzero_var(pinfo);
pinfo.elapsed = _mi_clock_end(mi_process_start);
pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
pinfo.current_rss = pinfo.current_commit;
pinfo.peak_rss = pinfo.peak_commit;
pinfo.utime = 0;
pinfo.stime = 0;
pinfo.page_faults = 0;
_mi_prim_process_info(&pinfo);
if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX));
if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX));
if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX));
if (current_rss!=NULL) *current_rss = pinfo.current_rss;
if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss;
if (current_commit!=NULL) *current_commit = pinfo.current_commit;
if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit;
if (page_faults!=NULL) *page_faults = pinfo.page_faults;
}
// --------------------------------------------------------
// Return statistics
// --------------------------------------------------------
void mi_stats_get(size_t stats_size, mi_stats_t* stats) mi_attr_noexcept {
if (stats == NULL || stats_size == 0) return;
_mi_memzero(stats, stats_size);
const size_t size = (stats_size > sizeof(mi_stats_t) ? sizeof(mi_stats_t) : stats_size);
_mi_memcpy(stats, &_mi_stats_main, size);
stats->version = MI_STAT_VERSION;
}
// --------------------------------------------------------
// Statics in json format
// --------------------------------------------------------
typedef struct mi_heap_buf_s {
char* buf;
size_t size;
size_t used;
bool can_realloc;
} mi_heap_buf_t;
static bool mi_heap_buf_expand(mi_heap_buf_t* hbuf) {
if (hbuf==NULL) return false;
if (hbuf->buf != NULL && hbuf->size>0) {
hbuf->buf[hbuf->size-1] = 0;
}
if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false;
const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size);
char* const newbuf = (char*)mi_rezalloc(hbuf->buf, newsize);
if (newbuf == NULL) return false;
hbuf->buf = newbuf;
hbuf->size = newsize;
return true;
}
static void mi_heap_buf_print(mi_heap_buf_t* hbuf, const char* msg) {
if (msg==NULL || hbuf==NULL) return;
if (hbuf->used + 1 >= hbuf->size && !hbuf->can_realloc) return;
for (const char* src = msg; *src != 0; src++) {
char c = *src;
if (hbuf->used + 1 >= hbuf->size) {
if (!mi_heap_buf_expand(hbuf)) return;
}
mi_assert_internal(hbuf->used < hbuf->size);
hbuf->buf[hbuf->used++] = c;
}
mi_assert_internal(hbuf->used < hbuf->size);
hbuf->buf[hbuf->used] = 0;
}
static void mi_heap_buf_print_count_bin(mi_heap_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, size_t bin, bool add_comma) {
const size_t binsize = _mi_bin_size(bin);
const size_t pagesize = (binsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_SMALL_PAGE_SIZE :
(binsize <= MI_MEDIUM_OBJ_SIZE_MAX ? MI_MEDIUM_PAGE_SIZE :
#if MI_LARGE_PAGE_SIZE
(binsize <= MI_LARGE_OBJ_SIZE_MAX ? MI_LARGE_PAGE_SIZE : 0)
#else
0
#endif
));
char buf[128];
_mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld, \"block_size\": %zu, \"page_size\": %zu }%s\n", prefix, stat->total, stat->peak, stat->current, binsize, pagesize, (add_comma ? "," : ""));
buf[127] = 0;
mi_heap_buf_print(hbuf, buf);
}
static void mi_heap_buf_print_count(mi_heap_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, bool add_comma) {
char buf[128];
_mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld }%s\n", prefix, stat->total, stat->peak, stat->current, (add_comma ? "," : ""));
buf[127] = 0;
mi_heap_buf_print(hbuf, buf);
}
static void mi_heap_buf_print_count_value(mi_heap_buf_t* hbuf, const char* name, mi_stat_count_t* stat) {
char buf[128];
_mi_snprintf(buf, 128, " \"%s\": ", name);
buf[127] = 0;
mi_heap_buf_print(hbuf, buf);
mi_heap_buf_print_count(hbuf, "", stat, true);
}
static void mi_heap_buf_print_value(mi_heap_buf_t* hbuf, const char* name, int64_t val) {
char buf[128];
_mi_snprintf(buf, 128, " \"%s\": %lld,\n", name, val);
buf[127] = 0;
mi_heap_buf_print(hbuf, buf);
}
static void mi_heap_buf_print_size(mi_heap_buf_t* hbuf, const char* name, size_t val, bool add_comma) {
char buf[128];
_mi_snprintf(buf, 128, " \"%s\": %zu%s\n", name, val, (add_comma ? "," : ""));
buf[127] = 0;
mi_heap_buf_print(hbuf, buf);
}
static void mi_heap_buf_print_counter_value(mi_heap_buf_t* hbuf, const char* name, mi_stat_counter_t* stat) {
mi_heap_buf_print_value(hbuf, name, stat->total);
}
#define MI_STAT_COUNT(stat) mi_heap_buf_print_count_value(&hbuf, #stat, &stats->stat);
#define MI_STAT_COUNTER(stat) mi_heap_buf_print_counter_value(&hbuf, #stat, &stats->stat);
char* mi_stats_get_json(size_t output_size, char* output_buf) mi_attr_noexcept {
mi_heap_buf_t hbuf = { NULL, 0, 0, true };
if (output_size > 0 && output_buf != NULL) {
_mi_memzero(output_buf, output_size);
hbuf.buf = output_buf;
hbuf.size = output_size;
hbuf.can_realloc = false;
}
else {
if (!mi_heap_buf_expand(&hbuf)) return NULL;
}
mi_heap_buf_print(&hbuf, "{\n");
mi_heap_buf_print_value(&hbuf, "version", MI_STAT_VERSION);
mi_heap_buf_print_value(&hbuf, "mimalloc_version", MI_MALLOC_VERSION);
// process info
mi_heap_buf_print(&hbuf, " \"process\": {\n");
size_t elapsed;
size_t user_time;
size_t sys_time;
size_t current_rss;
size_t peak_rss;
size_t current_commit;
size_t peak_commit;
size_t page_faults;
mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
mi_heap_buf_print_size(&hbuf, "elapsed_msecs", elapsed, true);
mi_heap_buf_print_size(&hbuf, "user_msecs", user_time, true);
mi_heap_buf_print_size(&hbuf, "system_msecs", sys_time, true);
mi_heap_buf_print_size(&hbuf, "page_faults", page_faults, true);
mi_heap_buf_print_size(&hbuf, "rss_current", current_rss, true);
mi_heap_buf_print_size(&hbuf, "rss_peak", peak_rss, true);
mi_heap_buf_print_size(&hbuf, "commit_current", current_commit, true);
mi_heap_buf_print_size(&hbuf, "commit_peak", peak_commit, false);
mi_heap_buf_print(&hbuf, " },\n");
// statistics
mi_stats_t* stats = &_mi_stats_main;
MI_STAT_FIELDS()
// size bins
mi_heap_buf_print(&hbuf, " \"malloc_bins\": [\n");
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
mi_heap_buf_print_count_bin(&hbuf, " ", &stats->malloc_bins[i], i, i!=MI_BIN_HUGE);
}
mi_heap_buf_print(&hbuf, " ],\n");
mi_heap_buf_print(&hbuf, " \"page_bins\": [\n");
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
mi_heap_buf_print_count_bin(&hbuf, " ", &stats->page_bins[i], i, i!=MI_BIN_HUGE);
}
mi_heap_buf_print(&hbuf, " ]\n");
mi_heap_buf_print(&hbuf, "}\n");
return hbuf.buf;
}