LiVES  3.2.0
rpmalloc.c
Go to the documentation of this file.
1 /* rpmalloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
2 
3  This library provides a cross-platform lock free thread caching malloc implementation in C11.
4  The latest source code is always available at
5 
6  https://github.com/mjansson/rpmalloc
7 
8  This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9 
10 */
11 
12 #include "rpmalloc.h"
13 
19 
20 #ifndef HEAP_ARRAY_SIZE
21 #define HEAP_ARRAY_SIZE 47
23 #endif
24 #ifndef ENABLE_THREAD_CACHE
25 #define ENABLE_THREAD_CACHE 1
27 #endif
28 #ifndef ENABLE_GLOBAL_CACHE
29 #define ENABLE_GLOBAL_CACHE 1
31 #endif
32 #ifndef ENABLE_VALIDATE_ARGS
33 #define ENABLE_VALIDATE_ARGS 0
35 #endif
36 #ifndef ENABLE_STATISTICS
37 #define ENABLE_STATISTICS 0
39 #endif
40 #ifndef ENABLE_ASSERTS
41 #define ENABLE_ASSERTS 0
43 #endif
44 #ifndef ENABLE_OVERRIDE
45 #define ENABLE_OVERRIDE 0
47 #endif
48 #ifndef ENABLE_PRELOAD
49 #define ENABLE_PRELOAD 0
51 #endif
52 #ifndef DISABLE_UNMAP
53 #define DISABLE_UNMAP 0
55 #endif
56 #ifndef DEFAULT_SPAN_MAP_COUNT
57 #define DEFAULT_SPAN_MAP_COUNT 64
59 #endif
60 
61 #if ENABLE_THREAD_CACHE
62 #ifndef ENABLE_UNLIMITED_CACHE
63 #define ENABLE_UNLIMITED_CACHE 0
65 #endif
66 #ifndef ENABLE_UNLIMITED_THREAD_CACHE
67 #define ENABLE_UNLIMITED_THREAD_CACHE ENABLE_UNLIMITED_CACHE
69 #endif
70 #if !ENABLE_UNLIMITED_THREAD_CACHE
71 #ifndef THREAD_CACHE_MULTIPLIER
72 #define THREAD_CACHE_MULTIPLIER 16
74 #endif
75 #ifndef ENABLE_ADAPTIVE_THREAD_CACHE
76 #define ENABLE_ADAPTIVE_THREAD_CACHE 0
78 #endif
79 #endif
80 #endif
81 
82 #if ENABLE_GLOBAL_CACHE && ENABLE_THREAD_CACHE
83 #if DISABLE_UNMAP
84 #undef ENABLE_UNLIMITED_GLOBAL_CACHE
85 #define ENABLE_UNLIMITED_GLOBAL_CACHE 1
86 #endif
87 #ifndef ENABLE_UNLIMITED_GLOBAL_CACHE
88 #define ENABLE_UNLIMITED_GLOBAL_CACHE ENABLE_UNLIMITED_CACHE
90 #endif
91 #if !ENABLE_UNLIMITED_GLOBAL_CACHE
92 #define GLOBAL_CACHE_MULTIPLIER (THREAD_CACHE_MULTIPLIER * 6)
94 #endif
95 #else
96 # undef ENABLE_GLOBAL_CACHE
97 # define ENABLE_GLOBAL_CACHE 0
98 #endif
99 
100 #if !ENABLE_THREAD_CACHE || ENABLE_UNLIMITED_THREAD_CACHE
101 # undef ENABLE_ADAPTIVE_THREAD_CACHE
102 # define ENABLE_ADAPTIVE_THREAD_CACHE 0
103 #endif
104 
105 #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
106 # error Must use global cache if unmap is disabled
107 #endif
108 
109 #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
110 # define PLATFORM_WINDOWS 1
111 # define PLATFORM_POSIX 0
112 #else
113 # define PLATFORM_WINDOWS 0
114 # define PLATFORM_POSIX 1
115 #endif
116 
118 #if defined(_MSC_VER) && !defined(__clang__)
119 # ifndef FORCEINLINE
120 # define FORCEINLINE inline __forceinline
121 # endif
122 # define _Static_assert static_assert
123 #else
124 # ifndef FORCEINLINE
125 # define FORCEINLINE inline __attribute__((__always_inline__))
126 # endif
127 #endif
128 #if PLATFORM_WINDOWS
129 # ifndef WIN32_LEAN_AND_MEAN
130 # define WIN32_LEAN_AND_MEAN
131 # endif
132 # include <Windows.h>
133 # if ENABLE_VALIDATE_ARGS
134 # include <Intsafe.h>
135 # endif
136 #else
137 # include <unistd.h>
138 # include <stdio.h>
139 # include <stdlib.h>
140 # if defined(__APPLE__)
141 # include <mach/mach_vm.h>
142 # include <mach/vm_statistics.h>
143 # include <pthread.h>
144 # endif
145 # if defined(__HAIKU__)
146 # include <OS.h>
147 # include <pthread.h>
148 # endif
149 #endif
150 
151 #include <stdint.h>
152 #include <string.h>
153 #include <errno.h>
154 
155 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
156 #include <fibersapi.h>
157 static DWORD fls_key;
158 static void NTAPI
159 _rpmalloc_thread_destructor(void *value) {
160  if (value)
162 }
163 #endif
164 
165 #if PLATFORM_POSIX
166 # include <sys/mman.h>
167 # include <sched.h>
168 # ifdef __FreeBSD__
169 # include <sys/sysctl.h>
170 # define MAP_HUGETLB MAP_ALIGNED_SUPER
171 # endif
172 # ifndef MAP_UNINITIALIZED
173 # define MAP_UNINITIALIZED 0
174 # endif
175 #endif
176 #include <errno.h>
177 
178 #if ENABLE_ASSERTS
179 # undef NDEBUG
180 # if defined(_MSC_VER) && !defined(_DEBUG)
181 # define _DEBUG
182 # endif
183 # include <assert.h>
184 #else
185 # undef assert
186 # define assert(x) do {} while(0)
187 #endif
188 #if ENABLE_STATISTICS
189 # include <stdio.h>
190 #endif
191 
197 
198 #if defined(_MSC_VER) && !defined(__clang__)
199 
200 typedef volatile long atomic32_t;
201 typedef volatile long long atomic64_t;
202 typedef volatile void *atomicptr_t;
203 
204 static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
205 static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) { *dst = val; }
206 static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) { return (int32_t)InterlockedIncrement(val); }
207 static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) { return (int32_t)InterlockedDecrement(val); }
208 #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
209 static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
210 static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; }
211 #endif
212 static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; }
213 static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) { return (void *)*src; }
214 static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) { *dst = val; }
215 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) { *dst = val; }
216 static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) { return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) == ref) ? 1 : 0; }
217 static FORCEINLINE int atomic_cas_ptr_acquire(atomicptr_t *dst, void *val, void *ref) { return atomic_cas_ptr(dst, val, ref); }
218 
219 #define EXPECTED(x) (x)
220 #define UNEXPECTED(x) (x)
221 
222 #else
223 
224 #include <stdatomic.h>
225 
226 typedef volatile _Atomic(int32_t) atomic32_t;
227 typedef volatile _Atomic(int64_t) atomic64_t;
228 typedef volatile _Atomic(void *) atomicptr_t;
229 
230 static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return atomic_load_explicit(src, memory_order_relaxed); }
231 static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
232 static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; }
233 static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; }
234 #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
235 static FORCEINLINE int64_t atomic_load64(atomic64_t *val) { return atomic_load_explicit(val, memory_order_relaxed); }
236 static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
237 #endif
238 static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
239 static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) { return atomic_load_explicit(src, memory_order_relaxed); }
240 static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
241 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) { atomic_store_explicit(dst, val, memory_order_release); }
242 static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); }
243 static FORCEINLINE int atomic_cas_ptr_acquire(atomicptr_t *dst, void *val, void *ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); }
244 
245 #define EXPECTED(x) __builtin_expect((x), 1)
246 #define UNEXPECTED(x) __builtin_expect((x), 0)
247 
248 #endif
249 
255 
256 #if ENABLE_STATISTICS
257 # define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
258 # define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
259 # define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value))
260 # define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value))
261 # define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0)
262 # define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value))
263 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \
264  int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
265  if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
266  heap->size_class_use[class_idx].alloc_peak = alloc_current; \
267  atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
268 } while(0)
269 # define _rpmalloc_stat_inc_free(heap, class_idx) do { \
270  atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
271  atomic_incr32(&heap->size_class_use[class_idx].free_total); \
272 } while(0)
273 #else
274 # define _rpmalloc_stat_inc(counter) do {} while(0)
275 # define _rpmalloc_stat_dec(counter) do {} while(0)
276 # define _rpmalloc_stat_add(counter, value) do {} while(0)
277 # define _rpmalloc_stat_add64(counter, value) do {} while(0)
278 # define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0)
279 # define _rpmalloc_stat_sub(counter, value) do {} while(0)
280 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0)
281 # define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0)
282 #endif
283 
287 
289 #define SMALL_GRANULARITY 16
290 #define SMALL_GRANULARITY_SHIFT 4
292 #define SMALL_CLASS_COUNT 65
294 #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
296 #define MEDIUM_GRANULARITY 512
298 #define MEDIUM_GRANULARITY_SHIFT 9
300 #define MEDIUM_CLASS_COUNT 61
302 #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
304 #define LARGE_CLASS_COUNT 32
306 #define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
308 #define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
310 #define HEAP_ORPHAN_ABA_SIZE 512
312 #define SPAN_HEADER_SIZE 128
314 
315 _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two");
316 _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two");
317 
318 #if ENABLE_VALIDATE_ARGS
319 #undef MAX_ALLOC_SIZE
321 #define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size)
322 #endif
323 
324 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
325 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
326 
327 #define INVALID_POINTER ((void*)((uintptr_t)-1))
328 
329 #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
330 #define SIZE_CLASS_HUGE ((uint32_t)-1)
331 
337 
339 typedef struct heap_t heap_t;
341 typedef struct span_t span_t;
343 typedef struct span_list_t span_list_t;
345 typedef struct span_active_t span_active_t;
347 typedef struct size_class_t size_class_t;
349 typedef struct global_cache_t global_cache_t;
350 
352 #define SPAN_FLAG_MASTER 1U
353 #define SPAN_FLAG_SUBSPAN 2U
355 #define SPAN_FLAG_ALIGNED_BLOCKS 4U
357 
358 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
359 struct span_use_t {
361  atomic32_t current;
363  atomic32_t high;
364 #if ENABLE_STATISTICS
365  atomic32_t spans_to_global;
368  atomic32_t spans_from_global;
370  atomic32_t spans_to_cache;
372  atomic32_t spans_from_cache;
374  atomic32_t spans_to_reserved;
376  atomic32_t spans_from_reserved;
378  atomic32_t spans_map_calls;
379 #endif
380 };
381 typedef struct span_use_t span_use_t;
382 #endif
383 
384 #if ENABLE_STATISTICS
385 struct size_class_use_t {
387  atomic32_t alloc_current;
389  int32_t alloc_peak;
391  atomic32_t alloc_total;
393  atomic32_t free_total;
395  atomic32_t spans_current;
397  int32_t spans_peak;
399  atomic32_t spans_to_cache;
401  atomic32_t spans_from_cache;
403  atomic32_t spans_from_reserved;
405  atomic32_t spans_map_calls;
406 };
407 typedef struct size_class_use_t size_class_use_t;
408 #endif
409 
410 // A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
411 // or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
412 // span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first
413 // (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
414 // that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
415 // superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
416 // in the same call to release the virtual memory range, but individual subranges can be decommitted individually
417 // to reduce physical memory use).
418 struct span_t {
420  void *free_list;
422  uint32_t block_count;
424  uint32_t size_class;
426  uint32_t free_list_limit;
428  uint32_t used_count;
430  atomicptr_t free_list_deferred;
432  uint32_t list_size;
434  uint32_t block_size;
436  uint32_t flags;
438  uint32_t span_count;
440  uint32_t total_spans;
442  uint32_t offset_from_master;
444  atomic32_t remaining_spans;
446  uint32_t align_offset;
448  heap_t *heap;
450  span_t *next;
452  span_t *prev;
453 };
454 _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
455 
456 // Control structure for a heap, either a thread heap or a first class heap if enabled
457 struct heap_t {
459  uintptr_t owner_thread;
461  void *free_list[SIZE_CLASS_COUNT];
463  // Previous span pointer in head points to tail span of list.
464  span_t *partial_span[SIZE_CLASS_COUNT];
465 #if RPMALLOC_FIRST_CLASS_HEAPS
466  // Previous span pointer in head points to tail span of list.
468  span_t *full_span[SIZE_CLASS_COUNT];
469 #endif
470 #if ENABLE_THREAD_CACHE
471  span_t *span_cache[LARGE_CLASS_COUNT];
473 #endif
474  atomicptr_t span_free_deferred;
476 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
477  span_use_t span_use[LARGE_CLASS_COUNT];
479 #endif
480 #if RPMALLOC_FIRST_CLASS_HEAPS
481  span_t *large_huge_span;
483 #endif
484  size_t full_span_count;
487  span_t *span_reserve;
489  span_t *span_reserve_master;
491  size_t spans_reserved;
493  heap_t *next_heap;
495  heap_t *next_orphan;
497  size_t align_offset;
499  int32_t id;
501  int finalize;
503  heap_t *master_heap;
505  atomic32_t child_count;
506 #if ENABLE_STATISTICS
507  atomic64_t thread_to_global;
510  atomic64_t global_to_thread;
512  size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
513 #endif
514 };
515 
516 // Size class for defining a block size bucket
517 struct size_class_t {
519  uint32_t block_size;
521  uint16_t block_count;
523  uint16_t class_idx;
524 };
525 _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
526 
527 struct global_cache_t {
529  atomicptr_t cache;
531  atomic32_t size;
533  atomic32_t counter;
534 };
535 
541 
543 static int _rpmalloc_initialized;
545 static rpmalloc_config_t _memory_config;
547 static size_t _memory_page_size;
549 static size_t _memory_page_size_shift;
551 static size_t _memory_map_granularity;
552 #if RPMALLOC_CONFIGURABLE
553 static size_t _memory_span_size;
556 static size_t _memory_span_size_shift;
558 static uintptr_t _memory_span_mask;
559 #else
560 #define _memory_span_size (64 * 1024)
562 #define _memory_span_size_shift 16
563 #define _memory_span_mask (~((uintptr_t)(_memory_span_size - 1)))
564 #endif
565 static size_t _memory_span_map_count;
568 static size_t _memory_span_release_count;
570 static size_t _memory_span_release_count_large;
572 static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
574 static size_t _memory_medium_size_limit;
576 static atomic32_t _memory_heap_id;
578 static int _memory_huge_pages;
579 #if ENABLE_GLOBAL_CACHE
580 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
582 #endif
583 static atomicptr_t _memory_heaps[HEAP_ARRAY_SIZE];
586 static atomicptr_t _memory_orphan_heaps;
587 #if RPMALLOC_FIRST_CLASS_HEAPS
588 static atomicptr_t _memory_first_class_orphan_heaps;
590 #endif
591 static atomic32_t _memory_orphan_counter;
593 #if ENABLE_STATISTICS
594 static atomic32_t _memory_active_heaps;
597 static atomic32_t _mapped_pages;
599 static int32_t _mapped_pages_peak;
601 static atomic32_t _master_spans;
603 static atomic32_t _reserved_spans;
605 static atomic32_t _mapped_total;
607 static atomic32_t _unmapped_total;
609 static atomic32_t _mapped_pages_os;
611 static atomic32_t _huge_pages_current;
613 static int32_t _huge_pages_peak;
614 #endif
615 
621 
623 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
624 static pthread_key_t _memory_thread_heap;
625 #else
626 # ifdef _MSC_VER
627 # define _Thread_local __declspec(thread)
628 # define TLS_MODEL
629 # else
630 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
631 # if !defined(__clang__) && defined(__GNUC__)
632 # define _Thread_local __thread
633 # endif
634 # endif
635 static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
636 #endif
637 
638 static inline heap_t *
639 get_thread_heap_raw(void) {
640 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
641  return pthread_getspecific(_memory_thread_heap);
642 #else
643  return _memory_thread_heap;
644 #endif
645 }
646 
648 static inline heap_t *
649 get_thread_heap(void) {
650  heap_t *heap = get_thread_heap_raw();
651 #if ENABLE_PRELOAD
652  if (EXPECTED(heap != 0))
653  return heap;
655  return get_thread_heap_raw();
656 #else
657  return heap;
658 #endif
659 }
660 
662 static inline uintptr_t
663 get_thread_id(void) {
664 #if defined(_WIN32)
665  return (uintptr_t)((void *)NtCurrentTeb());
666 #elif defined(__GNUC__) || defined(__clang__)
667  uintptr_t tid;
668 # if defined(__i386__)
669  __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
670 # elif defined(__MACH__)
671  __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
672 # elif defined(__x86_64__)
673  __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
674 # elif defined(__arm__)
675  __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
676 # elif defined(__aarch64__)
677  __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
678 # else
679  tid = (uintptr_t)get_thread_heap_raw();
680 # endif
681  return tid;
682 #else
683  return (uintptr_t)get_thread_heap_raw();
684 #endif
685 }
686 
688 static void
689 set_thread_heap(heap_t *heap) {
690 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
691  pthread_setspecific(_memory_thread_heap, heap);
692 #else
693  _memory_thread_heap = heap;
694 #endif
695  if (heap)
696  heap->owner_thread = get_thread_id();
697 }
698 
704 
706 // size is number of bytes to map
707 // offset receives the offset in bytes from start of mapped region
708 // returns address to start of mapped region to use
709 static void *
710 _rpmalloc_mmap(size_t size, size_t *offset) {
711  assert(!(size % _memory_page_size));
712  assert(size >= _memory_page_size);
713  _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak);
714  _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
715  return _memory_config.memory_map(size, offset);
716 }
717 
719 // address is the memory address to unmap, as returned from _memory_map
720 // size is the number of bytes to unmap, which might be less than full region for a partial unmap
721 // offset is the offset in bytes to the actual mapped region, as set by _memory_map
722 // release is set to 0 for partial unmap, or size of entire range for a full unmap
723 static void
724 _rpmalloc_unmap(void *address, size_t size, size_t offset, size_t release) {
725  assert(!release || (release >= size));
726  assert(!release || (release >= _memory_page_size));
727  if (release) {
728  assert(!(release % _memory_page_size));
729  _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
730  _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
731  }
732  _memory_config.memory_unmap(address, size, offset, release);
733 }
734 
736 static void *
737 _rpmalloc_mmap_os(size_t size, size_t *offset) {
738  //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity
739  size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
740  assert(size >= _memory_page_size);
741 #if PLATFORM_WINDOWS
742  //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
743  void *ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT,
744  PAGE_READWRITE);
745  if (!ptr) {
746  assert(ptr && "Failed to map virtual memory block");
747  return 0;
748  }
749 #else
750  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
751 # if defined(__APPLE__)
752  int fd = (int)VM_MAKE_TAG(240U);
753  if (_memory_huge_pages)
754  fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
755  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
756 # elif defined(MAP_HUGETLB)
757  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
758 # else
759  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
760 # endif
761  if ((ptr == MAP_FAILED) || !ptr) {
762  assert("Failed to map virtual memory block" == 0);
763  return 0;
764  }
765 #endif
766  _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift));
767  if (padding) {
768  size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
769  assert(final_padding <= _memory_span_size);
770  assert(final_padding <= padding);
771  assert(!(final_padding % 8));
772  ptr = pointer_offset(ptr, final_padding);
773  *offset = final_padding >> 3;
774  }
775  assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask));
776  return ptr;
777 }
778 
780 static void
781 _rpmalloc_unmap_os(void *address, size_t size, size_t offset, size_t release) {
782  assert(release || (offset == 0));
783  assert(!release || (release >= _memory_page_size));
784  assert(size >= _memory_page_size);
785  if (release && offset) {
786  offset <<= 3;
787  address = pointer_offset(address, -(int32_t)offset);
788 #if PLATFORM_POSIX
789  //Padding is always one span size
790  release += _memory_span_size;
791 #endif
792  }
793 #if !DISABLE_UNMAP
794 #if PLATFORM_WINDOWS
795  if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
796  assert(address && "Failed to unmap virtual memory block");
797  }
798 #else
799  if (release) {
800  if (munmap(address, release)) {
801  assert("Failed to unmap virtual memory block" == 0);
802  }
803  } else {
804 #if defined(POSIX_MADV_FREE)
805  if (posix_madvise(address, size, POSIX_MADV_FREE))
806 #endif
807  if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
808  assert("Failed to madvise virtual memory block as free" == 0);
809  }
810  }
811 #endif
812 #endif
813  if (release)
814  _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
815 }
816 
817 
823 
824 #if ENABLE_THREAD_CACHE
825 
826 static void
827 _rpmalloc_span_unmap(span_t *span);
828 
830 static void
831 _rpmalloc_span_list_unmap_all(span_t *span) {
832  size_t list_size = span->list_size;
833  for (size_t ispan = 0; ispan < list_size; ++ispan) {
834  span_t *next_span = span->next;
835  _rpmalloc_span_unmap(span);
836  span = next_span;
837  }
838  assert(!span);
839 }
840 
842 static size_t
843 _rpmalloc_span_list_push(span_t **head, span_t *span) {
844  span->next = *head;
845  if (*head)
846  span->list_size = (*head)->list_size + 1;
847  else
848  span->list_size = 1;
849  *head = span;
850  return span->list_size;
851 }
852 
854 static span_t *
855 _rpmalloc_span_list_pop(span_t **head) {
856  span_t *span = *head;
857  span_t *next_span = 0;
858  if (span->list_size > 1) {
859  assert(span->next);
860  next_span = span->next;
861  assert(next_span);
862  next_span->list_size = span->list_size - 1;
863  }
864  *head = next_span;
865  return span;
866 }
867 
869 static span_t *
870 _rpmalloc_span_list_split(span_t *span, size_t limit) {
871  span_t *next = 0;
872  if (limit < 2)
873  limit = 2;
874  if (span->list_size > limit) {
875  uint32_t list_size = 1;
876  span_t *last = span;
877  next = span->next;
878  while (list_size < limit) {
879  last = next;
880  next = next->next;
881  ++list_size;
882  }
883  last->next = 0;
884  assert(next);
885  next->list_size = span->list_size - list_size;
886  span->list_size = list_size;
887  span->prev = 0;
888  }
889  return next;
890 }
891 
892 #endif
893 
895 static void
896 _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
897  if (*head) {
898  span->next = *head;
899  (*head)->prev = span;
900  } else {
901  span->next = 0;
902  }
903  *head = span;
904 }
905 
907 static void
908 _rpmalloc_span_double_link_list_pop_head(span_t **head, span_t *span) {
909  assert(*head == span);
910  span = *head;
911  *head = span->next;
912 }
913 
915 static void
916 _rpmalloc_span_double_link_list_remove(span_t **head, span_t *span) {
917  assert(*head);
918  if (*head == span) {
919  *head = span->next;
920  } else {
921  span_t *next_span = span->next;
922  span_t *prev_span = span->prev;
923  prev_span->next = next_span;
924  if (EXPECTED(next_span != 0)) {
925  next_span->prev = prev_span;
926  }
927  }
928 }
929 
930 
936 
937 static void
938 _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
939 
940 static void
941 _rpmalloc_heap_finalize(heap_t *heap);
942 
943 static void
944 _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count);
945 
947 static void
948 _rpmalloc_span_mark_as_subspan_unless_master(span_t *master, span_t *subspan, size_t span_count) {
949  assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER));
950  if (subspan != master) {
951  subspan->flags = SPAN_FLAG_SUBSPAN;
952  subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift);
953  subspan->align_offset = 0;
954  }
955  subspan->span_count = (uint32_t)span_count;
956 }
957 
959 static span_t *
960 _rpmalloc_span_map_from_reserve(heap_t *heap, size_t span_count) {
961  //Update the heap span reserve
962  span_t *span = heap->span_reserve;
963  heap->span_reserve = (span_t *)pointer_offset(span, span_count * _memory_span_size);
964  heap->spans_reserved -= span_count;
965 
966  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count);
967  if (span_count <= LARGE_CLASS_COUNT)
968  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
969 
970  return span;
971 }
972 
974 static size_t
975 _rpmalloc_span_align_count(size_t span_count) {
976  size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count;
977  if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size))
978  request_count += _memory_span_map_count - (request_count % _memory_span_map_count);
979  return request_count;
980 }
981 
983 static void
984 _rpmalloc_span_initialize(span_t *span, size_t total_span_count, size_t span_count, size_t align_offset) {
985  span->total_spans = (uint32_t)total_span_count;
986  span->span_count = (uint32_t)span_count;
987  span->align_offset = (uint32_t)align_offset;
988  span->flags = SPAN_FLAG_MASTER;
989  atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
990 }
991 
993 static span_t *
994 _rpmalloc_span_map_aligned_count(heap_t *heap, size_t span_count) {
995  //If we already have some, but not enough, reserved spans, release those to heap cache and map a new
996  //full set of spans. Otherwise we would waste memory if page size > span size (huge pages)
997  size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
998  size_t align_offset = 0;
999  span_t *span = (span_t *)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset);
1000  if (!span)
1001  return 0;
1002  _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
1003  _rpmalloc_stat_add(&_reserved_spans, aligned_span_count);
1004  _rpmalloc_stat_inc(&_master_spans);
1005  if (span_count <= LARGE_CLASS_COUNT)
1006  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
1007  if (aligned_span_count > span_count) {
1008  span_t *reserved_spans = (span_t *)pointer_offset(span, span_count * _memory_span_size);
1009  size_t reserved_count = aligned_span_count - span_count;
1010  if (heap->spans_reserved) {
1011  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
1012  _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
1013  }
1014  _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count);
1015  }
1016  return span;
1017 }
1018 
1020 static span_t *
1021 _rpmalloc_span_map(heap_t *heap, size_t span_count) {
1022  if (span_count <= heap->spans_reserved)
1023  return _rpmalloc_span_map_from_reserve(heap, span_count);
1024  return _rpmalloc_span_map_aligned_count(heap, span_count);
1025 }
1026 
1028 static void
1029 _rpmalloc_span_unmap(span_t *span) {
1030  assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
1031  assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
1032 
1033  int is_master = !!(span->flags & SPAN_FLAG_MASTER);
1034  span_t *master = is_master ? span : ((span_t *)pointer_offset(span,
1035  -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size)));
1036  assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN));
1037  assert(master->flags & SPAN_FLAG_MASTER);
1038 
1039  size_t span_count = span->span_count;
1040  if (!is_master) {
1041  //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
1042  assert(span->align_offset == 0);
1043  if (_memory_span_size >= _memory_page_size) {
1044  _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
1045  _rpmalloc_stat_sub(&_reserved_spans, span_count);
1046  }
1047  } else {
1048  //Special double flag to denote an unmapped master
1049  //It must be kept in memory since span header must be used
1050  span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN;
1051  }
1052 
1053  if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
1054  //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
1055  assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN));
1056  size_t unmap_count = master->span_count;
1057  if (_memory_span_size < _memory_page_size)
1058  unmap_count = master->total_spans;
1059  _rpmalloc_stat_sub(&_reserved_spans, unmap_count);
1060  _rpmalloc_stat_sub(&_master_spans, 1);
1061  _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size);
1062  }
1063 }
1064 
1066 static void
1067 _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
1068  assert(heap == span->heap);
1069  assert(span->size_class < SIZE_CLASS_COUNT);
1070 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1071  atomic_decr32(&heap->span_use[0].current);
1072 #endif
1073  _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
1074  _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
1075  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1076  _rpmalloc_heap_cache_insert(heap, span);
1077 }
1078 
1081 static uint32_t
1082 free_list_partial_init(void **list, void **first_block, void *page_start, void *block_start,
1083  uint32_t block_count, uint32_t block_size) {
1084  assert(block_count);
1085  *first_block = block_start;
1086  if (block_count > 1) {
1087  void *free_block = pointer_offset(block_start, block_size);
1088  void *block_end = pointer_offset(block_start, (size_t)block_size * block_count);
1089  //If block size is less than half a memory page, bound init to next memory page boundary
1090  if (block_size < (_memory_page_size >> 1)) {
1091  void *page_end = pointer_offset(page_start, _memory_page_size);
1092  if (page_end < block_end)
1093  block_end = page_end;
1094  }
1095  *list = free_block;
1096  block_count = 2;
1097  void *next_block = pointer_offset(free_block, block_size);
1098  while (next_block < block_end) {
1099  *((void **)free_block) = next_block;
1100  free_block = next_block;
1101  ++block_count;
1102  next_block = pointer_offset(next_block, block_size);
1103  }
1104  *((void **)free_block) = 0;
1105  } else {
1106  *list = 0;
1107  }
1108  return block_count;
1109 }
1110 
1112 static void *
1113 _rpmalloc_span_initialize_new(heap_t *heap, span_t *span, uint32_t class_idx) {
1114  assert(span->span_count == 1);
1115  size_class_t *size_class = _memory_size_class + class_idx;
1116  span->size_class = class_idx;
1117  span->heap = heap;
1118  span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
1119  span->block_size = size_class->block_size;
1120  span->block_count = size_class->block_count;
1121  span->free_list = 0;
1122  span->list_size = 0;
1123  atomic_store_ptr_release(&span->free_list_deferred, 0);
1124 
1125  //Setup free list. Only initialize one system page worth of free blocks in list
1126  void *block;
1127  span->free_list_limit = free_list_partial_init(&heap->free_list[class_idx], &block,
1128  span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
1129  //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
1130  if (span->free_list_limit < span->block_count) {
1131  _rpmalloc_span_double_link_list_add(&heap->partial_span[class_idx], span);
1132  span->used_count = span->free_list_limit;
1133  } else {
1134 #if RPMALLOC_FIRST_CLASS_HEAPS
1135  _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1136 #endif
1137  ++heap->full_span_count;
1138  span->used_count = span->block_count;
1139  }
1140  return block;
1141 }
1142 
1143 static void
1144 _rpmalloc_span_extract_free_list_deferred(span_t *span) {
1145  // We need acquire semantics on the CAS operation since we are interested in the list size
1146  // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency
1147  do {
1148  span->free_list = atomic_load_ptr(&span->free_list_deferred);
1149  } while ((span->free_list == INVALID_POINTER) ||
1150  !atomic_cas_ptr_acquire(&span->free_list_deferred, INVALID_POINTER, span->free_list));
1151  span->used_count -= span->list_size;
1152  span->list_size = 0;
1153  atomic_store_ptr_release(&span->free_list_deferred, 0);
1154 }
1155 
1156 static int
1157 _rpmalloc_span_is_fully_utilized(span_t *span) {
1158  assert(span->free_list_limit <= span->block_count);
1159  return !span->free_list && (span->free_list_limit >= span->block_count);
1160 }
1161 
1162 static int
1163 _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span, span_t **list_head) {
1164  span_t *class_span = (span_t *)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
1165  if (span == class_span) {
1166  // Adopt the heap class free list back into the span free list
1167  void *block = span->free_list;
1168  void *last_block = 0;
1169  while (block) {
1170  last_block = block;
1171  block = *((void **)block);
1172  }
1173  uint32_t free_count = 0;
1174  block = heap->free_list[iclass];
1175  while (block) {
1176  ++free_count;
1177  block = *((void **)block);
1178  }
1179  if (last_block) {
1180  *((void **)last_block) = heap->free_list[iclass];
1181  } else {
1182  span->free_list = heap->free_list[iclass];
1183  }
1184  heap->free_list[iclass] = 0;
1185  span->used_count -= free_count;
1186  }
1187  //If this assert triggers you have memory leaks
1188  assert(span->list_size == span->used_count);
1189  if (span->list_size == span->used_count) {
1190  _rpmalloc_stat_dec(&heap->span_use[0].current);
1191  _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
1192  // This function only used for spans in double linked lists
1193  if (list_head)
1194  _rpmalloc_span_double_link_list_remove(list_head, span);
1195  _rpmalloc_span_unmap(span);
1196  return 1;
1197  }
1198  return 0;
1199 }
1200 
1201 
1207 
1208 #if ENABLE_GLOBAL_CACHE
1209 
1211 static void
1212 _rpmalloc_global_cache_insert(global_cache_t *cache, span_t *span, size_t cache_limit) {
1213  assert((span->list_size == 1) || (span->next != 0));
1214  int32_t list_size = (int32_t)span->list_size;
1215  //Unmap if cache has reached the limit. Does not need stronger synchronization, the worst
1216  //case is that the span list is unmapped when it could have been cached (no real dependency
1217  //between the two variables)
1218  if (atomic_add32(&cache->size, list_size) > (int32_t)cache_limit) {
1219 #if !ENABLE_UNLIMITED_GLOBAL_CACHE
1220  _rpmalloc_span_list_unmap_all(span);
1221  atomic_add32(&cache->size, -list_size);
1222  return;
1223 #endif
1224  }
1225  void *current_cache, *new_cache;
1226  do {
1227  current_cache = atomic_load_ptr(&cache->cache);
1228  span->prev = (span_t *)((uintptr_t)current_cache & _memory_span_mask);
1229  new_cache = (void *)((uintptr_t)span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
1230  } while (!atomic_cas_ptr(&cache->cache, new_cache, current_cache));
1231 }
1232 
1234 static span_t *
1235 _rpmalloc_global_cache_extract(global_cache_t *cache) {
1236  uintptr_t span_ptr;
1237  do {
1238  void *global_span = atomic_load_ptr(&cache->cache);
1239  span_ptr = (uintptr_t)global_span & _memory_span_mask;
1240  if (span_ptr) {
1241  span_t *span = (span_t *)span_ptr;
1242  //By accessing the span ptr before it is swapped out of list we assume that a contending thread
1243  //does not manage to traverse the span to being unmapped before we access it
1244  void *new_cache = (void *)((uintptr_t)span->prev | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
1245  if (atomic_cas_ptr(&cache->cache, new_cache, global_span)) {
1246  atomic_add32(&cache->size, -(int32_t)span->list_size);
1247  return span;
1248  }
1249  }
1250  } while (span_ptr);
1251  return 0;
1252 }
1253 
1255 static void
1256 _rpmalloc_global_cache_finalize(global_cache_t *cache) {
1257  void *current_cache = atomic_load_ptr(&cache->cache);
1258  span_t *span = (span_t *)((uintptr_t)current_cache & _memory_span_mask);
1259  while (span) {
1260  span_t *skip_span = (span_t *)((uintptr_t)span->prev & _memory_span_mask);
1261  atomic_add32(&cache->size, -(int32_t)span->list_size);
1262  _rpmalloc_span_list_unmap_all(span);
1263  span = skip_span;
1264  }
1265  assert(!atomic_load32(&cache->size));
1266  atomic_store_ptr(&cache->cache, 0);
1267  atomic_store32(&cache->size, 0);
1268 }
1269 
1271 static void
1272 _rpmalloc_global_cache_insert_span_list(span_t *span) {
1273  size_t span_count = span->span_count;
1274 #if ENABLE_UNLIMITED_GLOBAL_CACHE
1275  _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, 0);
1276 #else
1277  const size_t cache_limit = (GLOBAL_CACHE_MULTIPLIER * ((span_count == 1) ? _memory_span_release_count :
1278  _memory_span_release_count_large));
1279  _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, cache_limit);
1280 #endif
1281 }
1282 
1284 static span_t *
1285 _rpmalloc_global_cache_extract_span_list(size_t span_count) {
1286  span_t *span = _rpmalloc_global_cache_extract(&_memory_span_cache[span_count - 1]);
1287  assert(!span || (span->span_count == span_count));
1288  return span;
1289 }
1290 
1291 #endif
1292 
1293 
1299 
1300 static void _rpmalloc_deallocate_huge(span_t *);
1301 
1303 static void
1304 _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count) {
1305  heap->span_reserve_master = master;
1306  heap->span_reserve = reserve;
1307  heap->spans_reserved = reserve_span_count;
1308 }
1309 
1311 static void
1312 _rpmalloc_heap_cache_adopt_deferred(heap_t *heap, span_t **single_span) {
1313  span_t *span = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
1314  if (!span)
1315  return;
1316  while (!atomic_cas_ptr(&heap->span_free_deferred, 0, span))
1317  span = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
1318  while (span) {
1319  span_t *next_span = (span_t *)span->free_list;
1320  assert(span->heap == heap);
1321  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
1322  assert(heap->full_span_count);
1323  --heap->full_span_count;
1324 #if RPMALLOC_FIRST_CLASS_HEAPS
1325  _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
1326 #endif
1327  if (single_span && !*single_span) {
1328  *single_span = span;
1329  } else {
1330  _rpmalloc_stat_dec(&heap->span_use[0].current);
1331  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1332  _rpmalloc_heap_cache_insert(heap, span);
1333  }
1334  } else {
1335  if (span->size_class == SIZE_CLASS_HUGE) {
1336  _rpmalloc_deallocate_huge(span);
1337  } else {
1338  assert(span->size_class == SIZE_CLASS_LARGE);
1339  assert(heap->full_span_count);
1340  --heap->full_span_count;
1341 #if RPMALLOC_FIRST_CLASS_HEAPS
1342  _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
1343 #endif
1344  uint32_t idx = span->span_count - 1;
1345  if (!idx && single_span && !*single_span) {
1346  *single_span = span;
1347  } else {
1348  _rpmalloc_stat_dec(&heap->span_use[idx].current);
1349  _rpmalloc_heap_cache_insert(heap, span);
1350  }
1351  }
1352  }
1353  span = next_span;
1354  }
1355 }
1356 
1357 static void
1358 _rpmalloc_heap_unmap(heap_t *heap) {
1359  if (!heap->master_heap) {
1360  if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
1361  size_t heap_size = sizeof(heap_t);
1362  size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1363  _rpmalloc_unmap(heap, block_size, heap->align_offset, block_size);
1364  }
1365  } else {
1366  if (atomic_decr32(&heap->master_heap->child_count) == 0) {
1367  _rpmalloc_heap_unmap(heap->master_heap);
1368  }
1369  }
1370 }
1371 
1372 static void
1373 _rpmalloc_heap_global_finalize(heap_t *heap) {
1374  if (heap->finalize++ > 1) {
1375  --heap->finalize;
1376  return;
1377  }
1378 
1379  _rpmalloc_heap_finalize(heap);
1380 
1381 #if ENABLE_THREAD_CACHE
1382  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1383  span_t *span = heap->span_cache[iclass];
1384  heap->span_cache[iclass] = 0;
1385  if (span)
1386  _rpmalloc_span_list_unmap_all(span);
1387  }
1388 #endif
1389 
1390  if (heap->full_span_count) {
1391  --heap->finalize;
1392  return;
1393  }
1394 
1395  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1396  if (heap->free_list[iclass] || heap->partial_span[iclass]) {
1397  --heap->finalize;
1398  return;
1399  }
1400  }
1401  //Heap is now completely free, unmap and remove from heap list
1402  size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1403  heap_t *list_heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
1404  if (list_heap == heap) {
1405  atomic_store_ptr(&_memory_heaps[list_idx], heap->next_heap);
1406  } else {
1407  while (list_heap->next_heap != heap)
1408  list_heap = list_heap->next_heap;
1409  list_heap->next_heap = heap->next_heap;
1410  }
1411 
1412  _rpmalloc_heap_unmap(heap);
1413 }
1414 
1416 static void
1417 _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
1418  if (UNEXPECTED(heap->finalize != 0)) {
1419  _rpmalloc_span_unmap(span);
1420  _rpmalloc_heap_global_finalize(heap);
1421  return;
1422  }
1423 #if ENABLE_THREAD_CACHE
1424  size_t span_count = span->span_count;
1425  size_t idx = span_count - 1;
1426  _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_cache);
1427 #if ENABLE_UNLIMITED_THREAD_CACHE
1428  _rpmalloc_span_list_push(&heap->span_cache[idx], span);
1429 #else
1430  const size_t release_count = (!idx ? _memory_span_release_count : _memory_span_release_count_large);
1431  size_t current_cache_size = _rpmalloc_span_list_push(&heap->span_cache[idx], span);
1432  if (current_cache_size <= release_count)
1433  return;
1434  const size_t hard_limit = release_count * THREAD_CACHE_MULTIPLIER;
1435  if (current_cache_size <= hard_limit) {
1436 #if ENABLE_ADAPTIVE_THREAD_CACHE
1437  //Require 25% of high water mark to remain in cache (and at least 1, if use is 0)
1438  const size_t high_mark = heap->span_use[idx].high;
1439  const size_t min_limit = (high_mark >> 2) + release_count + 1;
1440  if (current_cache_size < min_limit)
1441  return;
1442 #else
1443  return;
1444 #endif
1445  }
1446  heap->span_cache[idx] = _rpmalloc_span_list_split(span, release_count);
1447  assert(span->list_size == release_count);
1448 #if ENABLE_GLOBAL_CACHE
1449  _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span_count * _memory_span_size);
1450  _rpmalloc_stat_add(&heap->span_use[idx].spans_to_global, span->list_size);
1451  _rpmalloc_global_cache_insert_span_list(span);
1452 #else
1453  _rpmalloc_span_list_unmap_all(span);
1454 #endif
1455 #endif
1456 #else
1457  (void)sizeof(heap);
1458  _rpmalloc_span_unmap(span);
1459 #endif
1460 }
1461 
1463 static span_t *
1464 _rpmalloc_heap_thread_cache_extract(heap_t *heap, size_t span_count) {
1465  span_t *span = 0;
1466  size_t idx = span_count - 1;
1467  if (!idx)
1468  _rpmalloc_heap_cache_adopt_deferred(heap, &span);
1469 #if ENABLE_THREAD_CACHE
1470  if (!span && heap->span_cache[idx]) {
1471  _rpmalloc_stat_inc(&heap->span_use[idx].spans_from_cache);
1472  span = _rpmalloc_span_list_pop(&heap->span_cache[idx]);
1473  }
1474 #endif
1475  return span;
1476 }
1477 
1478 static span_t *
1479 _rpmalloc_heap_reserved_extract(heap_t *heap, size_t span_count) {
1480  if (heap->spans_reserved >= span_count)
1481  return _rpmalloc_span_map(heap, span_count);
1482  return 0;
1483 }
1484 
1486 static span_t *
1487 _rpmalloc_heap_global_cache_extract(heap_t *heap, size_t span_count) {
1488 #if ENABLE_GLOBAL_CACHE
1489  size_t idx = span_count - 1;
1490  heap->span_cache[idx] = _rpmalloc_global_cache_extract_span_list(span_count);
1491  if (heap->span_cache[idx]) {
1492  _rpmalloc_stat_add64(&heap->global_to_thread, (size_t)heap->span_cache[idx]->list_size * span_count * _memory_span_size);
1493  _rpmalloc_stat_add(&heap->span_use[idx].spans_from_global, heap->span_cache[idx]->list_size);
1494  return _rpmalloc_span_list_pop(&heap->span_cache[idx]);
1495  }
1496 #endif
1497  (void)sizeof(heap);
1498  (void)sizeof(span_count);
1499  return 0;
1500 }
1501 
1503 static span_t *
1504 _rpmalloc_heap_extract_new_span(heap_t *heap, size_t span_count, uint32_t class_idx) {
1505  (void)sizeof(class_idx);
1506 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1507  uint32_t idx = (uint32_t)span_count - 1;
1508  uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current);
1509  if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
1510  atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
1511  _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak);
1512 #endif
1513  span_t *span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
1514  if (EXPECTED(span != 0)) {
1515  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1516  return span;
1517  }
1518  span = _rpmalloc_heap_reserved_extract(heap, span_count);
1519  if (EXPECTED(span != 0)) {
1520  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
1521  return span;
1522  }
1523  span = _rpmalloc_heap_global_cache_extract(heap, span_count);
1524  if (EXPECTED(span != 0)) {
1525  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1526  return span;
1527  }
1528  //Final fallback, map in more virtual memory
1529  span = _rpmalloc_span_map(heap, span_count);
1530  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
1531  return span;
1532 }
1533 
1534 static void
1535 _rpmalloc_heap_initialize(heap_t *heap) {
1536  memset(heap, 0, sizeof(heap_t));
1537 
1538  //Get a new heap ID
1539  heap->id = 1 + atomic_incr32(&_memory_heap_id);
1540 
1541  //Link in heap in heap ID map
1542  heap_t *next_heap;
1543  size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1544  do {
1545  next_heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
1546  heap->next_heap = next_heap;
1547  } while (!atomic_cas_ptr(&_memory_heaps[list_idx], heap, next_heap));
1548 }
1549 
1550 static void
1551 _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
1552  void *raw_heap;
1553  uintptr_t orphan_counter;
1554  heap_t *last_heap;
1555  heap->owner_thread = (uintptr_t) - 1;
1556 #if RPMALLOC_FIRST_CLASS_HEAPS
1557  atomicptr_t *heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
1558 #else
1559  (void)sizeof(first_class);
1560  atomicptr_t *heap_list = &_memory_orphan_heaps;
1561 #endif
1562  do {
1563  last_heap = (heap_t *)atomic_load_ptr(heap_list);
1564  heap->next_orphan = (heap_t *)((uintptr_t)last_heap & ~(uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1));
1565  orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
1566  raw_heap = (void *)((uintptr_t)heap | (orphan_counter & (uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1)));
1567  } while (!atomic_cas_ptr(heap_list, raw_heap, last_heap));
1568 }
1569 
1571 static heap_t *
1572 _rpmalloc_heap_allocate_new(void) {
1573  //Map in pages for a new heap
1574  size_t align_offset = 0;
1575  size_t heap_size = sizeof(heap_t);
1576  size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1577  heap_t *heap = (heap_t *)_rpmalloc_mmap(block_size, &align_offset);
1578  if (!heap)
1579  return heap;
1580 
1581  _rpmalloc_heap_initialize(heap);
1582  heap->align_offset = align_offset;
1583 
1584  //Put extra heaps as orphans, aligning to make sure ABA protection bits fit in pointer low bits
1585  size_t aligned_heap_size = HEAP_ORPHAN_ABA_SIZE * ((heap_size + HEAP_ORPHAN_ABA_SIZE - 1) / HEAP_ORPHAN_ABA_SIZE);
1586  size_t num_heaps = block_size / aligned_heap_size;
1587  atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
1588  heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
1589  while (num_heaps > 1) {
1590  _rpmalloc_heap_initialize(extra_heap);
1591  extra_heap->master_heap = heap;
1592  _rpmalloc_heap_orphan(extra_heap, 1);
1593  extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
1594  --num_heaps;
1595  }
1596  return heap;
1597 }
1598 
1599 static heap_t *
1600 _rpmalloc_heap_extract_orphan(atomicptr_t *heap_list) {
1601  void *raw_heap;
1602  void *next_raw_heap;
1603  uintptr_t orphan_counter;
1604  heap_t *heap;
1605  heap_t *next_heap;
1606  do {
1607  raw_heap = atomic_load_ptr(heap_list);
1608  heap = (heap_t *)((uintptr_t)raw_heap & ~(uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1));
1609  if (!heap)
1610  break;
1611  next_heap = heap->next_orphan;
1612  orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
1613  next_raw_heap = (void *)((uintptr_t)next_heap | (orphan_counter & (uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1)));
1614  } while (!atomic_cas_ptr(heap_list, next_raw_heap, raw_heap));
1615  return heap;
1616 }
1617 
1619 static heap_t *
1620 _rpmalloc_heap_allocate(int first_class) {
1621  heap_t *heap = 0;
1622  if (first_class == 0)
1623  heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
1624 #if RPMALLOC_FIRST_CLASS_HEAPS
1625  if (!heap)
1626  heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
1627 #endif
1628  if (!heap)
1629  heap = _rpmalloc_heap_allocate_new();
1630  return heap;
1631 }
1632 
1633 static void
1634 _rpmalloc_heap_release(void *heapptr, int first_class) {
1635  heap_t *heap = (heap_t *)heapptr;
1636  if (!heap)
1637  return;
1638  //Release thread cache spans back to global cache
1639  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
1640 #if ENABLE_THREAD_CACHE
1641  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1642  span_t *span = heap->span_cache[iclass];
1643  heap->span_cache[iclass] = 0;
1644  if (span && heap->finalize) {
1645  _rpmalloc_span_list_unmap_all(span);
1646  continue;
1647  }
1648 #if ENABLE_GLOBAL_CACHE
1649  while (span) {
1650  assert(span->span_count == (iclass + 1));
1651  size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
1652  span_t *next = _rpmalloc_span_list_split(span, (uint32_t)release_count);
1653  _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
1654  _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
1655  _rpmalloc_global_cache_insert_span_list(span);
1656  span = next;
1657  }
1658 #else
1659  if (span)
1660  _rpmalloc_span_list_unmap_all(span);
1661 #endif
1662  }
1663 #endif
1664 
1665  //Orphan the heap
1666  _rpmalloc_heap_orphan(heap, first_class);
1667 
1668  if (get_thread_heap_raw() == heap)
1669  set_thread_heap(0);
1670 #if ENABLE_STATISTICS
1671  atomic_decr32(&_memory_active_heaps);
1672  assert(atomic_load32(&_memory_active_heaps) >= 0);
1673 #endif
1674 }
1675 
1676 static void
1677 _rpmalloc_heap_release_raw(void *heapptr) {
1678  _rpmalloc_heap_release(heapptr, 0);
1679 }
1680 
1681 static void
1682 _rpmalloc_heap_finalize(heap_t *heap) {
1683  if (heap->spans_reserved) {
1684  span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
1685  _rpmalloc_span_unmap(span);
1686  heap->spans_reserved = 0;
1687  }
1688 
1689  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
1690 
1691  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1692  span_t *span = heap->partial_span[iclass];
1693  while (span) {
1694  span_t *next = span->next;
1695  _rpmalloc_span_finalize(heap, iclass, span, &heap->partial_span[iclass]);
1696  span = next;
1697  }
1698  // If class still has a free list it must be a full span
1699  if (heap->free_list[iclass]) {
1700  span_t *class_span = (span_t *)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
1701  span_t **list = 0;
1702 #if RPMALLOC_FIRST_CLASS_HEAPS
1703  list = &heap->full_span[iclass];
1704 #endif
1705  --heap->full_span_count;
1706  if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
1707  if (list)
1708  _rpmalloc_span_double_link_list_remove(list, class_span);
1709  _rpmalloc_span_double_link_list_add(&heap->partial_span[iclass], class_span);
1710  }
1711  }
1712  }
1713 
1714 #if ENABLE_THREAD_CACHE
1715  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1716  if (heap->span_cache[iclass]) {
1717  _rpmalloc_span_list_unmap_all(heap->span_cache[iclass]);
1718  heap->span_cache[iclass] = 0;
1719  }
1720  }
1721 #endif
1722  assert(!atomic_load_ptr(&heap->span_free_deferred));
1723 }
1724 
1725 
1731 
1733 static void *
1734 free_list_pop(void **list) {
1735  void *block = *list;
1736  *list = *((void **)block);
1737  return block;
1738 }
1739 
1741 static void *
1742 _rpmalloc_allocate_from_heap_fallback(heap_t *heap, uint32_t class_idx) {
1743  span_t *span = heap->partial_span[class_idx];
1744  if (EXPECTED(span != 0)) {
1745  assert(span->block_count == _memory_size_class[span->size_class].block_count);
1746  assert(!_rpmalloc_span_is_fully_utilized(span));
1747  void *block;
1748  if (span->free_list) {
1749  //Swap in free list if not empty
1750  heap->free_list[class_idx] = span->free_list;
1751  span->free_list = 0;
1752  block = free_list_pop(&heap->free_list[class_idx]);
1753  } else {
1754  //If the span did not fully initialize free list, link up another page worth of blocks
1755  void *block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size));
1756  span->free_list_limit += free_list_partial_init(&heap->free_list[class_idx], &block,
1757  (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
1758  span->block_count - span->free_list_limit, span->block_size);
1759  }
1760  assert(span->free_list_limit <= span->block_count);
1761  span->used_count = span->free_list_limit;
1762 
1763  //Swap in deferred free list if present
1764  if (atomic_load_ptr(&span->free_list_deferred))
1765  _rpmalloc_span_extract_free_list_deferred(span);
1766 
1767  //If span is still not fully utilized keep it in partial list and early return block
1768  if (!_rpmalloc_span_is_fully_utilized(span))
1769  return block;
1770 
1771  //The span is fully utilized, unlink from partial list and add to fully utilized list
1772  _rpmalloc_span_double_link_list_pop_head(&heap->partial_span[class_idx], span);
1773 #if RPMALLOC_FIRST_CLASS_HEAPS
1774  _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1775 #endif
1776  ++heap->full_span_count;
1777  return block;
1778  }
1779 
1780  //Find a span in one of the cache levels
1781  span = _rpmalloc_heap_extract_new_span(heap, 1, class_idx);
1782  if (EXPECTED(span != 0)) {
1783  //Mark span as owned by this heap and set base data, return first block
1784  return _rpmalloc_span_initialize_new(heap, span, class_idx);
1785  }
1786 
1787  return 0;
1788 }
1789 
1791 static void *
1792 _rpmalloc_allocate_small(heap_t *heap, size_t size) {
1793  assert(heap);
1794  //Small sizes have unique size classes
1795  const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
1796  _rpmalloc_stat_inc_alloc(heap, class_idx);
1797  if (EXPECTED(heap->free_list[class_idx] != 0))
1798  return free_list_pop(&heap->free_list[class_idx]);
1799  return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1800 }
1801 
1803 static void *
1804 _rpmalloc_allocate_medium(heap_t *heap, size_t size) {
1805  assert(heap);
1806  //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
1807  const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
1808  const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
1809  _rpmalloc_stat_inc_alloc(heap, class_idx);
1810  if (EXPECTED(heap->free_list[class_idx] != 0))
1811  return free_list_pop(&heap->free_list[class_idx]);
1812  return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1813 }
1814 
1816 static void *
1817 _rpmalloc_allocate_large(heap_t *heap, size_t size) {
1818  assert(heap);
1819  //Calculate number of needed max sized spans (including header)
1820  //Since this function is never called if size > LARGE_SIZE_LIMIT
1821  //the span_count is guaranteed to be <= LARGE_CLASS_COUNT
1822  size += SPAN_HEADER_SIZE;
1823  size_t span_count = size >> _memory_span_size_shift;
1824  if (size & (_memory_span_size - 1))
1825  ++span_count;
1826 
1827  //Find a span in one of the cache levels
1828  span_t *span = _rpmalloc_heap_extract_new_span(heap, span_count, SIZE_CLASS_LARGE);
1829  if (!span)
1830  return span;
1831 
1832  //Mark span as owned by this heap and set base data
1833  assert(span->span_count == span_count);
1834  span->size_class = SIZE_CLASS_LARGE;
1835  span->heap = heap;
1836 
1837 #if RPMALLOC_FIRST_CLASS_HEAPS
1838  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1839 #endif
1840  ++heap->full_span_count;
1841 
1842  return pointer_offset(span, SPAN_HEADER_SIZE);
1843 }
1844 
1846 static void *
1847 _rpmalloc_allocate_huge(heap_t *heap, size_t size) {
1848  assert(heap);
1849  size += SPAN_HEADER_SIZE;
1850  size_t num_pages = size >> _memory_page_size_shift;
1851  if (size & (_memory_page_size - 1))
1852  ++num_pages;
1853  size_t align_offset = 0;
1854  span_t *span = (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
1855  if (!span)
1856  return span;
1857 
1858  //Store page count in span_count
1859  span->size_class = SIZE_CLASS_HUGE;
1860  span->span_count = (uint32_t)num_pages;
1861  span->align_offset = (uint32_t)align_offset;
1862  span->heap = heap;
1863  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1864 
1865 #if RPMALLOC_FIRST_CLASS_HEAPS
1866  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1867 #endif
1868  ++heap->full_span_count;
1869 
1870  return pointer_offset(span, SPAN_HEADER_SIZE);
1871 }
1872 
1874 static void *
1875 _rpmalloc_allocate(heap_t *heap, size_t size) {
1876  if (EXPECTED(size <= SMALL_SIZE_LIMIT))
1877  return _rpmalloc_allocate_small(heap, size);
1878  else if (size <= _memory_medium_size_limit)
1879  return _rpmalloc_allocate_medium(heap, size);
1880  else if (size <= LARGE_SIZE_LIMIT)
1881  return _rpmalloc_allocate_large(heap, size);
1882  return _rpmalloc_allocate_huge(heap, size);
1883 }
1884 
1885 static void *
1886 _rpmalloc_aligned_allocate(heap_t *heap, size_t alignment, size_t size) {
1887  if (alignment <= SMALL_GRANULARITY)
1888  return _rpmalloc_allocate(heap, size);
1889 
1890 #if ENABLE_VALIDATE_ARGS
1891  if ((size + alignment) < size) {
1892  errno = EINVAL;
1893  return 0;
1894  }
1895  if (alignment & (alignment - 1)) {
1896  errno = EINVAL;
1897  return 0;
1898  }
1899 #endif
1900 
1901  if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) {
1902  // If alignment is less or equal to span header size (which is power of two),
1903  // and size aligned to span header size multiples is less than size + alignment,
1904  // then use natural alignment of blocks to provide alignment
1905  size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE;
1906  assert(!(multiple_size % SPAN_HEADER_SIZE));
1907  if (multiple_size <= (size + alignment))
1908  return _rpmalloc_allocate(heap, multiple_size);
1909  }
1910 
1911  void *ptr = 0;
1912  size_t align_mask = alignment - 1;
1913  if (alignment <= _memory_page_size) {
1914  ptr = _rpmalloc_allocate(heap, size + alignment);
1915  if ((uintptr_t)ptr & align_mask) {
1916  ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1917  //Mark as having aligned blocks
1918  span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
1919  span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
1920  }
1921  return ptr;
1922  }
1923 
1924  // Fallback to mapping new pages for this request. Since pointers passed
1925  // to rpfree must be able to reach the start of the span by bitmasking of
1926  // the address with the span size, the returned aligned pointer from this
1927  // function must be with a span size of the start of the mapped area.
1928  // In worst case this requires us to loop and map pages until we get a
1929  // suitable memory address. It also means we can never align to span size
1930  // or greater, since the span header will push alignment more than one
1931  // span size away from span start (thus causing pointer mask to give us
1932  // an invalid span start on free)
1933  if (alignment & align_mask) {
1934  errno = EINVAL;
1935  return 0;
1936  }
1937  if (alignment >= _memory_span_size) {
1938  errno = EINVAL;
1939  return 0;
1940  }
1941 
1942  size_t extra_pages = alignment / _memory_page_size;
1943 
1944  // Since each span has a header, we will at least need one extra memory page
1945  size_t num_pages = 1 + (size / _memory_page_size);
1946  if (size & (_memory_page_size - 1))
1947  ++num_pages;
1948 
1949  if (extra_pages > num_pages)
1950  num_pages = 1 + extra_pages;
1951 
1952  size_t original_pages = num_pages;
1953  size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
1954  if (limit_pages < (original_pages * 2))
1955  limit_pages = original_pages * 2;
1956 
1957  size_t mapped_size, align_offset;
1958  span_t *span;
1959 
1960 retry:
1961  align_offset = 0;
1962  mapped_size = num_pages * _memory_page_size;
1963 
1964  span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
1965  if (!span) {
1966  errno = ENOMEM;
1967  return 0;
1968  }
1969  ptr = pointer_offset(span, SPAN_HEADER_SIZE);
1970 
1971  if ((uintptr_t)ptr & align_mask)
1972  ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1973 
1974  if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
1975  (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
1976  (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
1977  _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
1978  ++num_pages;
1979  if (num_pages > limit_pages) {
1980  errno = EINVAL;
1981  return 0;
1982  }
1983  goto retry;
1984  }
1985 
1986  //Store page count in span_count
1987  span->size_class = SIZE_CLASS_HUGE;
1988  span->span_count = (uint32_t)num_pages;
1989  span->align_offset = (uint32_t)align_offset;
1990  span->heap = heap;
1991  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1992 
1993 #if RPMALLOC_FIRST_CLASS_HEAPS
1994  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1995 #endif
1996  ++heap->full_span_count;
1997 
1998  return ptr;
1999 }
2000 
2001 
2007 
2009 static void
2010 _rpmalloc_deallocate_direct_small_or_medium(span_t *span, void *block) {
2011  heap_t *heap = span->heap;
2012  assert(heap->owner_thread == get_thread_id() || heap->finalize);
2013  //Add block to free list
2014  if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
2015  span->used_count = span->block_count;
2016 #if RPMALLOC_FIRST_CLASS_HEAPS
2017  _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
2018 #endif
2019  _rpmalloc_span_double_link_list_add(&heap->partial_span[span->size_class], span);
2020  --heap->full_span_count;
2021  }
2022  --span->used_count;
2023  *((void **)block) = span->free_list;
2024  span->free_list = block;
2025  if (UNEXPECTED(span->used_count == span->list_size)) {
2026  _rpmalloc_span_double_link_list_remove(&heap->partial_span[span->size_class], span);
2027  _rpmalloc_span_release_to_cache(heap, span);
2028  }
2029 }
2030 
2031 static void
2032 _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
2033  //This list does not need ABA protection, no mutable side state
2034  do {
2035  span->free_list = atomic_load_ptr(&heap->span_free_deferred);
2036  } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
2037 }
2038 
2040 static void
2041 _rpmalloc_deallocate_defer_small_or_medium(span_t *span, void *block) {
2042  // The memory ordering here is a bit tricky, to avoid having to ABA protect
2043  // the deferred free list to avoid desynchronization of list and list size
2044  // we need to have acquire semantics on successful CAS of the pointer to
2045  // guarantee the list_size variable validity + release semantics on pointer store
2046  void *free_list;
2047  do {
2048  free_list = atomic_load_ptr(&span->free_list_deferred);
2049  *((void **)block) = free_list;
2050  } while ((free_list == INVALID_POINTER) || !atomic_cas_ptr_acquire(&span->free_list_deferred, INVALID_POINTER, free_list));
2051  uint32_t free_count = ++span->list_size;
2052  atomic_store_ptr_release(&span->free_list_deferred, block);
2053  if (free_count == span->block_count) {
2054  // Span was completely freed by this block. Due to the INVALID_POINTER spin lock
2055  // no other thread can reach this state simultaneously on this span.
2056  // Safe to move to owner heap deferred cache
2057  _rpmalloc_deallocate_defer_free_span(span->heap, span);
2058  }
2059 }
2060 
2061 static void
2062 _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
2063  _rpmalloc_stat_inc_free(span->heap, span->size_class);
2064  if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
2065  //Realign pointer to block start
2066  void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2067  uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2068  p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
2069  }
2070  //Check if block belongs to this heap or if deallocation should be deferred
2071  if ((span->heap->owner_thread == get_thread_id()) || span->heap->finalize)
2072  _rpmalloc_deallocate_direct_small_or_medium(span, p);
2073  else
2074  _rpmalloc_deallocate_defer_small_or_medium(span, p);
2075 }
2076 
2078 static void
2079 _rpmalloc_deallocate_large(span_t *span) {
2080  assert(span->size_class == SIZE_CLASS_LARGE);
2081  assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
2082  assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
2083  //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap
2084  int defer = (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize;
2085  if (defer) {
2086  _rpmalloc_deallocate_defer_free_span(span->heap, span);
2087  return;
2088  }
2089  assert(span->heap->full_span_count);
2090  --span->heap->full_span_count;
2091 #if RPMALLOC_FIRST_CLASS_HEAPS
2092  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2093 #endif
2094 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2095  //Decrease counter
2096  size_t idx = span->span_count - 1;
2097  atomic_decr32(&span->heap->span_use[idx].current);
2098 #endif
2099  heap_t *heap = get_thread_heap();
2100  assert(heap);
2101  span->heap = heap;
2102  if ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved) {
2103  heap->span_reserve = span;
2104  heap->spans_reserved = span->span_count;
2105  if (span->flags & SPAN_FLAG_MASTER) {
2106  heap->span_reserve_master = span;
2107  } else { //SPAN_FLAG_SUBSPAN
2108  span_t *master = (span_t *)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
2109  heap->span_reserve_master = master;
2110  assert(master->flags & SPAN_FLAG_MASTER);
2111  assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count);
2112  }
2113  _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
2114  } else {
2115  //Insert into cache list
2116  _rpmalloc_heap_cache_insert(heap, span);
2117  }
2118 }
2119 
2121 static void
2122 _rpmalloc_deallocate_huge(span_t *span) {
2123  assert(span->heap);
2124  if ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize) {
2125  _rpmalloc_deallocate_defer_free_span(span->heap, span);
2126  return;
2127  }
2128  assert(span->heap->full_span_count);
2129  --span->heap->full_span_count;
2130 #if RPMALLOC_FIRST_CLASS_HEAPS
2131  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2132 #endif
2133 
2134  //Oversized allocation, page count is stored in span_count
2135  size_t num_pages = span->span_count;
2136  _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size);
2137  _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
2138 }
2139 
2141 static void
2142 _rpmalloc_deallocate(void *p) {
2143  //Grab the span (always at start of span, using span alignment)
2144  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2145 
2146  if (UNEXPECTED(!span))
2147  return;
2148  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
2149  _rpmalloc_deallocate_small_or_medium(span, p);
2150  else if (span->size_class == SIZE_CLASS_LARGE)
2151  _rpmalloc_deallocate_large(span);
2152  else
2153  _rpmalloc_deallocate_huge(span);
2154 }
2155 
2156 
2162 
2163 static size_t
2164 _rpmalloc_usable_size(void *p);
2165 
2167 static void *
2168 _rpmalloc_reallocate(heap_t *heap, void *p, size_t size, size_t oldsize, unsigned int flags) {
2169  if (p) {
2170  //Grab the span using guaranteed span alignment
2171  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2172  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
2173  //Small/medium sized block
2174  assert(span->span_count == 1);
2175  void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2176  uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2177  uint32_t block_idx = block_offset / span->block_size;
2178  void *block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
2179  if (!oldsize)
2180  oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
2181  if ((size_t)span->block_size >= size) {
2182  //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2183  if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2184  memmove(block, p, oldsize);
2185  return block;
2186  }
2187  } else if (span->size_class == SIZE_CLASS_LARGE) {
2188  //Large block
2189  size_t total_size = size + SPAN_HEADER_SIZE;
2190  size_t num_spans = total_size >> _memory_span_size_shift;
2191  if (total_size & (_memory_span_mask - 1))
2192  ++num_spans;
2193  size_t current_spans = span->span_count;
2194  void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2195  if (!oldsize)
2196  oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2197  if ((current_spans >= num_spans) && (num_spans >= (current_spans / 2))) {
2198  //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2199  if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2200  memmove(block, p, oldsize);
2201  return block;
2202  }
2203  } else {
2204  //Oversized block
2205  size_t total_size = size + SPAN_HEADER_SIZE;
2206  size_t num_pages = total_size >> _memory_page_size_shift;
2207  if (total_size & (_memory_page_size - 1))
2208  ++num_pages;
2209  //Page count is stored in span_count
2210  size_t current_pages = span->span_count;
2211  void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2212  if (!oldsize)
2213  oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2214  if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
2215  //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2216  if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2217  memmove(block, p, oldsize);
2218  return block;
2219  }
2220  }
2221  } else {
2222  oldsize = 0;
2223  }
2224 
2225  if (!!(flags & RPMALLOC_GROW_OR_FAIL))
2226  return 0;
2227 
2228  //Size is greater than block size, need to allocate a new block and deallocate the old
2229  //Avoid hysteresis by overallocating if increase is small (below 37%)
2230  size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
2231  size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
2232  void *block = _rpmalloc_allocate(heap, new_size);
2233  if (p && block) {
2234  if (!(flags & RPMALLOC_NO_PRESERVE))
2235  memcpy(block, p, oldsize < new_size ? oldsize : new_size);
2236  _rpmalloc_deallocate(p);
2237  }
2238 
2239  return block;
2240 }
2241 
2242 static void *
2243 _rpmalloc_aligned_reallocate(heap_t *heap, void *ptr, size_t alignment, size_t size, size_t oldsize,
2244  unsigned int flags) {
2245  if (alignment <= SMALL_GRANULARITY)
2246  return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
2247 
2248  int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
2249  size_t usablesize = _rpmalloc_usable_size(ptr);
2250  if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
2251  if (no_alloc || (size >= (usablesize / 2)))
2252  return ptr;
2253  }
2254  // Aligned alloc marks span as having aligned blocks
2255  void *block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
2256  if (EXPECTED(block != 0)) {
2257  if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
2258  if (!oldsize)
2259  oldsize = usablesize;
2260  memcpy(block, ptr, oldsize < size ? oldsize : size);
2261  }
2262  _rpmalloc_deallocate(ptr);
2263  }
2264  return block;
2265 }
2266 
2267 
2273 
2275 static size_t
2276 _rpmalloc_usable_size(void *p) {
2277  //Grab the span using guaranteed span alignment
2278  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2279  if (span->size_class < SIZE_CLASS_COUNT) {
2280  //Small/medium block
2281  void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2282  fprintf(stderr, "RPV %p, %p %p %d\n", span, blocks_start, p, span->block_size);
2283  return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size);
2284  }
2285  if (span->size_class == SIZE_CLASS_LARGE) {
2286  //Large block
2287  size_t current_spans = span->span_count;
2288  return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
2289  }
2290  //Oversized block, page count is stored in span_count
2291  size_t current_pages = span->span_count;
2292  return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
2293 }
2294 
2296 static void
2297 _rpmalloc_adjust_size_class(size_t iclass) {
2298  size_t block_size = _memory_size_class[iclass].block_size;
2299  size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
2300 
2301  _memory_size_class[iclass].block_count = (uint16_t)block_count;
2302  _memory_size_class[iclass].class_idx = (uint16_t)iclass;
2303 
2304  //Check if previous size classes can be merged
2305  size_t prevclass = iclass;
2306  while (prevclass > 0) {
2307  --prevclass;
2308  //A class can be merged if number of pages and number of blocks are equal
2309  if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count)
2310  memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass]));
2311  else
2312  break;
2313  }
2314 }
2315 
2317 extern inline int
2319  if (_rpmalloc_initialized) {
2321  return 0;
2322  }
2323  return rpmalloc_initialize_config(0);
2324 }
2325 
2326 int
2328  if (_rpmalloc_initialized) {
2330  return 0;
2331  }
2332  _rpmalloc_initialized = 1;
2333 
2334  if (config)
2335  memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
2336  else
2337  memset(&_memory_config, 0, sizeof(rpmalloc_config_t));
2338 
2339  if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
2340  _memory_config.memory_map = _rpmalloc_mmap_os;
2341  _memory_config.memory_unmap = _rpmalloc_unmap_os;
2342  }
2343 
2344 #if RPMALLOC_CONFIGURABLE
2345  _memory_page_size = _memory_config.page_size;
2346 #else
2347  _memory_page_size = 0;
2348 #endif
2349  _memory_huge_pages = 0;
2350  _memory_map_granularity = _memory_page_size;
2351  if (!_memory_page_size) {
2352 #if PLATFORM_WINDOWS
2353  SYSTEM_INFO system_info;
2354  memset(&system_info, 0, sizeof(system_info));
2355  GetSystemInfo(&system_info);
2356  _memory_page_size = system_info.dwPageSize;
2357  _memory_map_granularity = system_info.dwAllocationGranularity;
2358 #else
2359  _memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
2360  _memory_map_granularity = _memory_page_size;
2361  if (_memory_config.enable_huge_pages) {
2362 #if defined(__linux__)
2363  size_t huge_page_size = 0;
2364  FILE *meminfo = fopen("/proc/meminfo", "r");
2365  if (meminfo) {
2366  char line[128];
2367  while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
2368  line[sizeof(line) - 1] = 0;
2369  if (strstr(line, "Hugepagesize:"))
2370  huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
2371  }
2372  fclose(meminfo);
2373  }
2374  if (huge_page_size) {
2375  _memory_huge_pages = 1;
2376  _memory_page_size = huge_page_size;
2377  _memory_map_granularity = huge_page_size;
2378  }
2379 #elif defined(__FreeBSD__)
2380  int rc;
2381  size_t sz = sizeof(rc);
2382 
2383  if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
2384  _memory_huge_pages = 1;
2385  _memory_page_size = 2 * 1024 * 1024;
2386  _memory_map_granularity = _memory_page_size;
2387  }
2388 #elif defined(__APPLE__)
2389  _memory_huge_pages = 1;
2390  _memory_page_size = 2 * 1024 * 1024;
2391  _memory_map_granularity = _memory_page_size;
2392 #endif
2393  }
2394 #endif
2395  } else {
2396  if (_memory_config.enable_huge_pages)
2397  _memory_huge_pages = 1;
2398  }
2399 
2400 #if PLATFORM_WINDOWS
2401  if (_memory_config.enable_huge_pages) {
2402  HANDLE token = 0;
2403  size_t large_page_minimum = GetLargePageMinimum();
2404  if (large_page_minimum)
2405  OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
2406  if (token) {
2407  LUID luid;
2408  if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
2409  TOKEN_PRIVILEGES token_privileges;
2410  memset(&token_privileges, 0, sizeof(token_privileges));
2411  token_privileges.PrivilegeCount = 1;
2412  token_privileges.Privileges[0].Luid = luid;
2413  token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
2414  if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
2415  DWORD err = GetLastError();
2416  if (err == ERROR_SUCCESS) {
2417  _memory_huge_pages = 1;
2418  if (large_page_minimum > _memory_page_size)
2419  _memory_page_size = large_page_minimum;
2420  if (large_page_minimum > _memory_map_granularity)
2421  _memory_map_granularity = large_page_minimum;
2422  }
2423  }
2424  }
2425  CloseHandle(token);
2426  }
2427  }
2428 #endif
2429 
2430  //The ABA counter in heap orphan list is tied to using HEAP_ORPHAN_ABA_SIZE
2431  size_t min_span_size = HEAP_ORPHAN_ABA_SIZE;
2432  size_t max_page_size;
2433 #if UINTPTR_MAX > 0xFFFFFFFF
2434  max_page_size = 4096ULL * 1024ULL * 1024ULL;
2435 #else
2436  max_page_size = 4 * 1024 * 1024;
2437 #endif
2438  if (_memory_page_size < min_span_size)
2439  _memory_page_size = min_span_size;
2440  if (_memory_page_size > max_page_size)
2441  _memory_page_size = max_page_size;
2442  _memory_page_size_shift = 0;
2443  size_t page_size_bit = _memory_page_size;
2444  while (page_size_bit != 1) {
2445  ++_memory_page_size_shift;
2446  page_size_bit >>= 1;
2447  }
2448  _memory_page_size = ((size_t)1 << _memory_page_size_shift);
2449 
2450 #if RPMALLOC_CONFIGURABLE
2451  size_t span_size = _memory_config.span_size;
2452  if (!span_size)
2453  span_size = (64 * 1024);
2454  if (span_size > (256 * 1024))
2455  span_size = (256 * 1024);
2456  _memory_span_size = 4096;
2458  while (_memory_span_size < span_size) {
2459  _memory_span_size <<= 1;
2461  }
2462  _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
2463 #endif
2464 
2465  _memory_span_map_count = (_memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT);
2466  if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
2467  _memory_span_map_count = (_memory_page_size / _memory_span_size);
2468  if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
2469  _memory_span_map_count = (_memory_page_size / _memory_span_size);
2470 
2471  _memory_config.page_size = _memory_page_size;
2472  _memory_config.span_size = _memory_span_size;
2473  _memory_config.span_map_count = _memory_span_map_count;
2474  _memory_config.enable_huge_pages = _memory_huge_pages;
2475 
2476  _memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4);
2477  _memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2);
2478 
2479 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2480  if (pthread_key_create(&_memory_thread_heap, _memory_heap_release_raw))
2481  return -1;
2482 #endif
2483 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2484  fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
2485 #endif
2486 
2487  //Setup all small and medium size classes
2488  size_t iclass = 0;
2489  _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
2490  _rpmalloc_adjust_size_class(iclass);
2491  for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
2492  size_t size = iclass * SMALL_GRANULARITY;
2493  _memory_size_class[iclass].block_size = (uint32_t)size;
2494  _rpmalloc_adjust_size_class(iclass);
2495  }
2496  //At least two blocks per span, then fall back to large allocations
2497  _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
2498  if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
2499  _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
2500  for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
2501  size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
2502  if (size > _memory_medium_size_limit)
2503  break;
2504  _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
2505  _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
2506  }
2507 
2508  atomic_store_ptr(&_memory_orphan_heaps, 0);
2509 #if RPMALLOC_FIRST_CLASS_HEAPS
2510  atomic_store_ptr(&_memory_first_class_orphan_heaps, 0);
2511 #endif
2512  for (size_t ilist = 0, lsize = (sizeof(_memory_heaps) / sizeof(_memory_heaps[0])); ilist < lsize; ++ilist)
2513  atomic_store_ptr(&_memory_heaps[ilist], 0);
2514 
2515  //Initialize this thread
2517  return 0;
2518 }
2519 
2521 void
2524  //rpmalloc_dump_statistics(stderr);
2525 
2526  //Free all thread caches and fully free spans
2527  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2528  heap_t *heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
2529  while (heap) {
2530  heap_t *next_heap = heap->next_heap;
2531  heap->finalize = 1;
2532  _rpmalloc_heap_global_finalize(heap);
2533  heap = next_heap;
2534  }
2535  }
2536 
2537 #if ENABLE_GLOBAL_CACHE
2538  //Free global caches
2539  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
2540  _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
2541 #endif
2542 
2543 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2544  pthread_key_delete(_memory_thread_heap);
2545 #endif
2546 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2547  FlsFree(fls_key);
2548  fls_key = 0;
2549 #endif
2550 #if ENABLE_STATISTICS
2551  //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code
2552  assert(!atomic_load32(&_mapped_pages));
2553  assert(!atomic_load32(&_reserved_spans));
2554  assert(!atomic_load32(&_mapped_pages_os));
2555 #endif
2556 
2557  _rpmalloc_initialized = 0;
2558 }
2559 
2561 extern inline void
2563  if (!get_thread_heap_raw()) {
2564  heap_t *heap = _rpmalloc_heap_allocate(0);
2565  if (heap) {
2566  _rpmalloc_stat_inc(&_memory_active_heaps);
2567  set_thread_heap(heap);
2568 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2569  FlsSetValue(fls_key, heap);
2570 #endif
2571  }
2572  }
2573 }
2574 
2576 void
2578  heap_t *heap = get_thread_heap_raw();
2579  if (heap)
2580  _rpmalloc_heap_release_raw(heap);
2581  set_thread_heap(0);
2582 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2583  FlsSetValue(fls_key, 0);
2584 #endif
2585 }
2586 
2587 int
2589  return (get_thread_heap_raw() != 0) ? 1 : 0;
2590 }
2591 
2592 const rpmalloc_config_t *
2594  return &_memory_config;
2595 }
2596 
2597 // Extern interface
2598 
2599 extern inline RPMALLOC_ALLOCATOR void *
2600 rpmalloc(size_t size) {
2601 #if ENABLE_VALIDATE_ARGS
2602  if (size >= MAX_ALLOC_SIZE) {
2603  errno = EINVAL;
2604  return 0;
2605  }
2606 #endif
2607  heap_t *heap = get_thread_heap();
2608  return _rpmalloc_allocate(heap, size);
2609 }
2610 
2611 extern inline void
2612 rpfree(void *ptr) {
2613  _rpmalloc_deallocate(ptr);
2614 }
2615 
2616 extern inline RPMALLOC_ALLOCATOR void *
2617 rpcalloc(size_t num, size_t size) {
2618  size_t total;
2619 #if ENABLE_VALIDATE_ARGS
2620 #if PLATFORM_WINDOWS
2621  int err = SizeTMult(num, size, &total);
2622  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2623  errno = EINVAL;
2624  return 0;
2625  }
2626 #else
2627  int err = __builtin_umull_overflow(num, size, &total);
2628  if (err || (total >= MAX_ALLOC_SIZE)) {
2629  errno = EINVAL;
2630  return 0;
2631  }
2632 #endif
2633 #else
2634  total = num * size;
2635 #endif
2636  heap_t *heap = get_thread_heap();
2637  void *block = _rpmalloc_allocate(heap, total);
2638  if (block)
2639  memset(block, 0, total);
2640  return block;
2641 }
2642 
2643 extern inline RPMALLOC_ALLOCATOR void *
2644 rprealloc(void *ptr, size_t size) {
2645 #if ENABLE_VALIDATE_ARGS
2646  if (size >= MAX_ALLOC_SIZE) {
2647  errno = EINVAL;
2648  return ptr;
2649  }
2650 #endif
2651  heap_t *heap = get_thread_heap();
2652  return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
2653 }
2654 
2655 extern RPMALLOC_ALLOCATOR void *
2656 rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
2657  unsigned int flags) {
2658 #if ENABLE_VALIDATE_ARGS
2659  if ((size + alignment < size) || (alignment > _memory_page_size)) {
2660  errno = EINVAL;
2661  return 0;
2662  }
2663 #endif
2664  heap_t *heap = get_thread_heap();
2665  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags);
2666 }
2667 
2668 extern RPMALLOC_ALLOCATOR void *
2669 rpaligned_alloc(size_t alignment, size_t size) {
2670  heap_t *heap = get_thread_heap();
2671  return _rpmalloc_aligned_allocate(heap, alignment, size);
2672 }
2673 
2674 extern inline RPMALLOC_ALLOCATOR void *
2675 rpaligned_calloc(size_t alignment, size_t num, size_t size) {
2676  size_t total;
2677 #if ENABLE_VALIDATE_ARGS
2678 #if PLATFORM_WINDOWS
2679  int err = SizeTMult(num, size, &total);
2680  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2681  errno = EINVAL;
2682  return 0;
2683  }
2684 #else
2685  int err = __builtin_umull_overflow(num, size, &total);
2686  if (err || (total >= MAX_ALLOC_SIZE)) {
2687  errno = EINVAL;
2688  return 0;
2689  }
2690 #endif
2691 #else
2692  total = num * size;
2693 #endif
2694  void *block = rpaligned_alloc(alignment, total);
2695  if (block)
2696  memset(block, 0, total);
2697  return block;
2698 }
2699 
2700 extern inline RPMALLOC_ALLOCATOR void *
2701 rpmemalign(size_t alignment, size_t size) {
2702  return rpaligned_alloc(alignment, size);
2703 }
2704 
2705 extern inline int
2706 rpposix_memalign(void **memptr, size_t alignment, size_t size) {
2707  if (memptr)
2708  *memptr = rpaligned_alloc(alignment, size);
2709  else
2710  return EINVAL;
2711  return *memptr ? 0 : ENOMEM;
2712 }
2713 
2714 extern inline size_t
2716  return (ptr ? _rpmalloc_usable_size(ptr) : 0);
2717 }
2718 
2719 extern inline void
2721 }
2722 
2723 void
2725  memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
2726  heap_t *heap = get_thread_heap_raw();
2727  if (!heap)
2728  return;
2729 
2730  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2731  size_class_t *size_class = _memory_size_class + iclass;
2732  span_t *span = heap->partial_span[iclass];
2733  while (span) {
2734  size_t free_count = span->list_size;
2735  size_t block_count = size_class->block_count;
2736  if (span->free_list_limit < block_count)
2737  block_count = span->free_list_limit;
2738  free_count += (block_count - span->used_count);
2739  stats->sizecache = free_count * size_class->block_size;
2740  span = span->next;
2741  }
2742  }
2743 
2744 #if ENABLE_THREAD_CACHE
2745  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2746  if (heap->span_cache[iclass])
2747  stats->spancache = (size_t)heap->span_cache[iclass]->list_size * (iclass + 1) * _memory_span_size;
2748  }
2749 #endif
2750 
2751  span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
2752  while (deferred) {
2753  if (deferred->size_class != SIZE_CLASS_HUGE)
2754  stats->spancache = (size_t)deferred->span_count * _memory_span_size;
2755  deferred = (span_t *)deferred->free_list;
2756  }
2757 
2758 #if ENABLE_STATISTICS
2759  stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
2760  stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
2761 
2762  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2763  stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current);
2764  stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high);
2765  stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
2766  stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
2767  stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
2768  stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
2769  stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
2770  stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
2771  stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
2772  }
2773  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2774  stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
2775  stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak;
2776  stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
2777  stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
2778  stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
2779  stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
2780  stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved);
2781  stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
2782  }
2783 #endif
2784 }
2785 
2786 void
2788  memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
2789 #if ENABLE_STATISTICS
2790  stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2791  stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2792  stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2793  stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2794  stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2795  stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
2796 #endif
2797 #if ENABLE_GLOBAL_CACHE
2798  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2799  stats->cached += (size_t)atomic_load32(&_memory_span_cache[iclass].size) * (iclass + 1) * _memory_span_size;
2800  }
2801 #endif
2802 }
2803 
2804 #if ENABLE_STATISTICS
2805 
2806 static void
2807 _memory_heap_dump_statistics(heap_t *heap, void *file) {
2808  fprintf(file, "Heap %d stats:\n", heap->id);
2809  fprintf(file,
2810  "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n");
2811  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2812  if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
2813  continue;
2814  fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass,
2815  atomic_load32(&heap->size_class_use[iclass].alloc_current),
2816  heap->size_class_use[iclass].alloc_peak,
2817  atomic_load32(&heap->size_class_use[iclass].alloc_total),
2818  atomic_load32(&heap->size_class_use[iclass].free_total),
2819  _memory_size_class[iclass].block_size,
2820  _memory_size_class[iclass].block_count,
2821  atomic_load32(&heap->size_class_use[iclass].spans_current),
2822  heap->size_class_use[iclass].spans_peak,
2823  ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024),
2824  ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2825  ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2826  ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024),
2827  atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2828  }
2829  fprintf(file,
2830  "Spans Current Peak PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n");
2831  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2832  if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2833  continue;
2834  fprintf(file, "%4u: %8d %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1),
2835  atomic_load32(&heap->span_use[iclass].current),
2836  atomic_load32(&heap->span_use[iclass].high),
2837  ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
2839  heap->span_cache[iclass] ? heap->span_cache[iclass]->list_size : 0,
2840  ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2841  ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2842 #else
2843  0, 0ULL, 0ULL,
2844 #endif
2845  ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2846  ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2847  ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(
2848  1024 * 1024),
2849  ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(
2850  1024 * 1024),
2851  atomic_load32(&heap->span_use[iclass].spans_map_calls));
2852  }
2853  fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
2854  fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
2855  (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
2856 }
2857 
2858 #endif
2859 
2860 void
2862 #if ENABLE_STATISTICS
2863  //If you hit this assert, you still have active threads or forgot to finalize some thread(s)
2864  assert(atomic_load32(&_memory_active_heaps) == 0);
2865  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2866  heap_t *heap = atomic_load_ptr(&_memory_heaps[list_idx]);
2867  while (heap) {
2868  int need_dump = 0;
2869  for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) {
2870  if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
2871  assert(!atomic_load32(&heap->size_class_use[iclass].free_total));
2872  assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2873  continue;
2874  }
2875  need_dump = 1;
2876  }
2877  for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) {
2878  if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2879  continue;
2880  need_dump = 1;
2881  }
2882  if (need_dump)
2883  _memory_heap_dump_statistics(heap, file);
2884  heap = heap->next_heap;
2885  }
2886  }
2887  fprintf(file, "Global stats:\n");
2888  size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2889  size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
2890  fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
2891  fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024));
2892 
2893  size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2894  size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
2895  size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2896  size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2897  size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2898  size_t reserved_total = (size_t)atomic_load32(&_reserved_spans) * _memory_span_size;
2899  fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB ReservedTotalMiB\n");
2900  fprintf(file, "%9zu %11zu %13zu %14zu %16zu %16zu\n",
2901  mapped / (size_t)(1024 * 1024),
2902  mapped_os / (size_t)(1024 * 1024),
2903  mapped_peak / (size_t)(1024 * 1024),
2904  mapped_total / (size_t)(1024 * 1024),
2905  unmapped_total / (size_t)(1024 * 1024),
2906  reserved_total / (size_t)(1024 * 1024));
2907 
2908  fprintf(file, "\n");
2909 #else
2910  (void)sizeof(file);
2911 #endif
2912 }
2913 
2914 #if RPMALLOC_FIRST_CLASS_HEAPS
2915 
2916 extern inline rpmalloc_heap_t *
2917 rpmalloc_heap_acquire(void) {
2918  // Must be a pristine heap from newly mapped memory pages, or else memory blocks
2919  // could already be allocated from the heap which would (wrongly) be released when
2920  // heap is cleared with rpmalloc_heap_free_all()
2921  heap_t *heap = _rpmalloc_heap_allocate(1);
2922  _rpmalloc_stat_inc(&_memory_active_heaps);
2923  return heap;
2924 }
2925 
2926 extern inline void
2927 rpmalloc_heap_release(rpmalloc_heap_t *heap) {
2928  if (heap)
2929  _rpmalloc_heap_release(heap, 1);
2930 }
2931 
2932 extern inline RPMALLOC_ALLOCATOR void *
2933 rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
2934 #if ENABLE_VALIDATE_ARGS
2935  if (size >= MAX_ALLOC_SIZE) {
2936  errno = EINVAL;
2937  return ptr;
2938  }
2939 #endif
2940  return _rpmalloc_allocate(heap, size);
2941 }
2942 
2943 extern inline RPMALLOC_ALLOCATOR void *
2944 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment, size_t size) {
2945 #if ENABLE_VALIDATE_ARGS
2946  if (size >= MAX_ALLOC_SIZE) {
2947  errno = EINVAL;
2948  return ptr;
2949  }
2950 #endif
2951  return _rpmalloc_aligned_allocate(heap, alignment, size);
2952 }
2953 
2954 extern inline RPMALLOC_ALLOCATOR void *
2955 rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
2956  return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
2957 }
2958 
2959 extern inline RPMALLOC_ALLOCATOR void *
2960 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment, size_t num, size_t size) {
2961  size_t total;
2962 #if ENABLE_VALIDATE_ARGS
2963 #if PLATFORM_WINDOWS
2964  int err = SizeTMult(num, size, &total);
2965  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2966  errno = EINVAL;
2967  return 0;
2968  }
2969 #else
2970  int err = __builtin_umull_overflow(num, size, &total);
2971  if (err || (total >= MAX_ALLOC_SIZE)) {
2972  errno = EINVAL;
2973  return 0;
2974  }
2975 #endif
2976 #else
2977  total = num * size;
2978 #endif
2979  void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
2980  if (block)
2981  memset(block, 0, total);
2982  return block;
2983 }
2984 
2985 extern inline RPMALLOC_ALLOCATOR void *
2986 rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size, unsigned int flags) {
2987 #if ENABLE_VALIDATE_ARGS
2988  if (size >= MAX_ALLOC_SIZE) {
2989  errno = EINVAL;
2990  return ptr;
2991  }
2992 #endif
2993  return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
2994 }
2995 
2996 extern inline RPMALLOC_ALLOCATOR void *
2997 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size, unsigned int flags) {
2998 #if ENABLE_VALIDATE_ARGS
2999  if ((size + alignment < size) || (alignment > _memory_page_size)) {
3000  errno = EINVAL;
3001  return 0;
3002  }
3003 #endif
3004  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
3005 }
3006 
3007 extern inline void
3008 rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
3009  (void)sizeof(heap);
3010  _rpmalloc_deallocate(ptr);
3011 }
3012 
3013 extern inline void
3014 rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
3015  span_t *span;
3016  span_t *next_span;
3017 
3018  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
3019 
3020  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3021  span = heap->partial_span[iclass];
3022  while (span) {
3023  next_span = span->next;
3024  _rpmalloc_heap_cache_insert(heap, span);
3025  span = next_span;
3026  }
3027  heap->partial_span[iclass] = 0;
3028  span = heap->full_span[iclass];
3029  while (span) {
3030  next_span = span->next;
3031  _rpmalloc_heap_cache_insert(heap, span);
3032  span = next_span;
3033  }
3034  }
3035  memset(heap->free_list, 0, sizeof(heap->free_list));
3036  memset(heap->partial_span, 0, sizeof(heap->partial_span));
3037  memset(heap->full_span, 0, sizeof(heap->full_span));
3038 
3039  span = heap->large_huge_span;
3040  while (span) {
3041  next_span = span->next;
3042  if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
3043  _rpmalloc_deallocate_huge(span);
3044  else
3045  _rpmalloc_heap_cache_insert(heap, span);
3046  span = next_span;
3047  }
3048  heap->large_huge_span = 0;
3049  heap->full_span_count = 0;
3050 
3051 #if ENABLE_THREAD_CACHE
3052  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3053  span = heap->span_cache[iclass];
3054 #if ENABLE_GLOBAL_CACHE
3055  while (span) {
3056  assert(span->span_count == (iclass + 1));
3057  size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
3058  next_span = _rpmalloc_span_list_split(span, (uint32_t)release_count);
3059  _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
3060  _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
3061  _rpmalloc_global_cache_insert_span_list(span);
3062  span = next_span;
3063  }
3064 #else
3065  if (span)
3066  _rpmalloc_span_list_unmap_all(span);
3067 #endif
3068  heap->span_cache[iclass] = 0;
3069  }
3070 #endif
3071 
3072 #if ENABLE_STATISTICS
3073  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3074  atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
3075  atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
3076  }
3077  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3078  atomic_store32(&heap->span_use[iclass].current, 0);
3079  }
3080 #endif
3081 }
3082 
3083 extern inline void
3084 rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
3085  heap_t *prev_heap = get_thread_heap_raw();
3086  if (prev_heap != heap) {
3087  set_thread_heap(heap);
3088  if (prev_heap)
3089  rpmalloc_heap_release(prev_heap);
3090  }
3091 }
3092 
3093 #endif
3094 
3095 #if ENABLE_PRELOAD || ENABLE_OVERRIDE
3096 
3097 #include "malloc.c"
3098 
3099 #endif
LARGE_CLASS_COUNT
#define LARGE_CLASS_COUNT
Number of large block size classes.
Definition: rpmalloc.c:305
rpmalloc_thread_statistics_t
Definition: rpmalloc.h:85
rpmalloc_config_t::memory_map
void *(* memory_map)(size_t size, size_t *offset)
Map memory pages for the given number of bytes. The returned address MUST be.
Definition: rpmalloc.h:147
SMALL_GRANULARITY
#define SMALL_GRANULARITY
Preconfigured limits and sizes.
Definition: rpmalloc.c:289
TLS_MODEL
#define TLS_MODEL
Thread local heap and ID.
Definition: rpmalloc.c:630
rpmalloc_global_statistics
void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats)
Get global statistics.
Definition: rpmalloc.c:2787
rpmalloc_thread_statistics_t::spans_to_cache
size_t spans_to_cache
Number of spans transitioned to cache.
Definition: rpmalloc.h:126
rpcalloc
RPMALLOC_ALLOCATOR void * rpcalloc(size_t num, size_t size)
Definition: rpmalloc.c:2617
rprealloc
RPMALLOC_ALLOCATOR void * rprealloc(void *ptr, size_t size)
Reallocate the given block to at least the given size.
Definition: rpmalloc.c:2644
_rpmalloc_stat_inc_alloc
#define _rpmalloc_stat_inc_alloc(heap, class_idx)
Definition: rpmalloc.c:280
UNEXPECTED
#define UNEXPECTED(x)
Definition: rpmalloc.c:246
pointer_offset
#define pointer_offset(ptr, ofs)
Definition: rpmalloc.c:324
_memory_span_size
#define _memory_span_size
Hardwired span size (64KiB)
Definition: rpmalloc.c:561
MEDIUM_GRANULARITY
#define MEDIUM_GRANULARITY
Granularity of a medium allocation block.
Definition: rpmalloc.c:297
rpmalloc.h
_rpmalloc_stat_inc_free
#define _rpmalloc_stat_inc_free(heap, class_idx)
Definition: rpmalloc.c:281
rpmalloc_thread_statistics_t::free_total
size_t free_total
Total number of frees.
Definition: rpmalloc.h:124
EXPECTED
#define EXPECTED(x)
Definition: rpmalloc.c:245
RPMALLOC_GROW_OR_FAIL
#define RPMALLOC_GROW_OR_FAIL
Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place,...
Definition: rpmalloc.h:66
rpmalloc_global_statistics_t::mapped_peak
size_t mapped_peak
Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STAT...
Definition: rpmalloc.h:72
rpmalloc_thread_statistics_t::from_global
size_t from_global
Number of spans transitioned from global cache.
Definition: rpmalloc.h:103
span_active_t
struct span_active_t span_active_t
Span active data.
Definition: rpmalloc.c:345
rpmalloc_initialize_config
int rpmalloc_initialize_config(const rpmalloc_config_t *config)
Initialize allocator with given configuration.
Definition: rpmalloc.c:2327
_rpmalloc_stat_add
#define _rpmalloc_stat_add(counter, value)
Definition: rpmalloc.c:276
HEAP_ARRAY_SIZE
#define HEAP_ARRAY_SIZE
Build time configurable limits.
Definition: rpmalloc.c:22
rpmalloc_global_statistics_t::unmapped_total
size_t unmapped_total
Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
Definition: rpmalloc.h:82
rpmalloc_thread_statistics_t::to_reserved
size_t to_reserved
Number of spans transitioned to reserved state.
Definition: rpmalloc.h:109
rpmalloc
RPMALLOC_ALLOCATOR void * rpmalloc(size_t size)
Allocate a memory block of at least the given size.
Definition: rpmalloc.c:2600
rpposix_memalign
int rpposix_memalign(void **memptr, size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition: rpmalloc.c:2706
rpmalloc_thread_statistics_t::global_to_thread
size_t global_to_thread
Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
Definition: rpmalloc.h:93
SPAN_HEADER_SIZE
#define SPAN_HEADER_SIZE
Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
Definition: rpmalloc.c:313
rpmalloc_thread_statistics_t::span_use
struct rpmalloc_thread_statistics_t::@0 span_use[32]
Per span count statistics (only if ENABLE_STATISTICS=1)
INVALID_POINTER
#define INVALID_POINTER
Definition: rpmalloc.c:327
SPAN_FLAG_MASTER
#define SPAN_FLAG_MASTER
Flag indicating span is the first (master) span of a split superspan.
Definition: rpmalloc.c:352
rpmalloc_thread_statistics_t::current
size_t current
Currently used number of spans.
Definition: rpmalloc.h:97
SIZE_CLASS_COUNT
#define SIZE_CLASS_COUNT
Total number of small + medium size classes.
Definition: rpmalloc.c:303
rpmalloc_thread_statistics_t::map_calls
size_t map_calls
Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
Definition: rpmalloc.h:113
rpmalloc_config_t
Definition: rpmalloc.h:136
rpmalloc_thread_statistics_t::spans_from_reserved
size_t spans_from_reserved
Number of spans transitioned from reserved state.
Definition: rpmalloc.h:130
rpaligned_realloc
RPMALLOC_ALLOCATOR void * rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags)
Reallocate the given block to at least the given size and alignment,.
Definition: rpmalloc.c:2656
SMALL_GRANULARITY_SHIFT
#define SMALL_GRANULARITY_SHIFT
Small granularity shift count.
Definition: rpmalloc.c:291
rpmalloc_config_t::page_size
size_t page_size
Size of memory pages. The page size MUST be a power of two. All memory mapping.
Definition: rpmalloc.h:159
rpmalloc_thread_statistics_t::sizecache
size_t sizecache
Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
Definition: rpmalloc.h:87
rpmemalign
RPMALLOC_ALLOCATOR void * rpmemalign(size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition: rpmalloc.c:2701
span_list_t
struct span_list_t span_list_t
Span list.
Definition: rpmalloc.c:343
MEDIUM_CLASS_COUNT
#define MEDIUM_CLASS_COUNT
Number of medium block size classes.
Definition: rpmalloc.c:301
THREAD_CACHE_MULTIPLIER
#define THREAD_CACHE_MULTIPLIER
Multiplier for thread cache (cache limit will be span release count multiplied by this value)
Definition: rpmalloc.c:73
SIZE_CLASS_HUGE
#define SIZE_CLASS_HUGE
Definition: rpmalloc.c:330
SMALL_CLASS_COUNT
#define SMALL_CLASS_COUNT
Number of small block size classes.
Definition: rpmalloc.c:293
rpmalloc_thread_statistics
void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats)
Get per-thread statistics.
Definition: rpmalloc.c:2724
rpmalloc_global_statistics_t::huge_alloc
size_t huge_alloc
Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2Mi...
Definition: rpmalloc.h:76
rpaligned_calloc
RPMALLOC_ALLOCATOR void * rpaligned_calloc(size_t alignment, size_t num, size_t size)
Definition: rpmalloc.c:2675
rpmalloc_thread_statistics_t::spancache
size_t spancache
Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
Definition: rpmalloc.h:89
rpmalloc_dump_statistics
void rpmalloc_dump_statistics(void *file)
Dump all statistics in human readable format to file (should be a FILE*)
Definition: rpmalloc.c:2861
rpmalloc_config_t::enable_huge_pages
int enable_huge_pages
Enable use of large/huge pages. If this flag is set to non-zero and page size is.
Definition: rpmalloc.h:177
rpmalloc_thread_statistics_t::alloc_peak
size_t alloc_peak
Peak number of allocations.
Definition: rpmalloc.h:120
_memory_span_mask
#define _memory_span_mask
Definition: rpmalloc.c:563
rpaligned_alloc
RPMALLOC_ALLOCATOR void * rpaligned_alloc(size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition: rpmalloc.c:2669
rpfree
void rpfree(void *ptr)
Free the given memory block.
Definition: rpmalloc.c:2612
rpmalloc_global_statistics_t::huge_alloc_peak
size_t huge_alloc_peak
Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB b...
Definition: rpmalloc.h:78
rpmalloc_config
const rpmalloc_config_t * rpmalloc_config(void)
Get allocator configuration.
Definition: rpmalloc.c:2593
rpmalloc_thread_statistics_t::spans_from_cache
size_t spans_from_cache
Number of spans transitioned from cache.
Definition: rpmalloc.h:128
ENABLE_THREAD_CACHE
#define ENABLE_THREAD_CACHE
Enable per-thread cache.
Definition: rpmalloc.c:26
LARGE_SIZE_LIMIT
#define LARGE_SIZE_LIMIT
Maximum size of a large block.
Definition: rpmalloc.c:309
FORCEINLINE
#define FORCEINLINE
Platform and arch specifics.
Definition: rpmalloc.c:125
rpmalloc_thread_finalize
void rpmalloc_thread_finalize(void)
Finalize thread, orphan heap.
Definition: rpmalloc.c:2577
rpmalloc_finalize
void rpmalloc_finalize(void)
Finalize the allocator.
Definition: rpmalloc.c:2522
HEAP_ORPHAN_ABA_SIZE
#define HEAP_ORPHAN_ABA_SIZE
ABA protection size in orhpan heap list (also becomes limit of smallest page size)
Definition: rpmalloc.c:311
rpmalloc_global_statistics_t
Definition: rpmalloc.h:68
rpmalloc_global_statistics_t::mapped_total
size_t mapped_total
Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
Definition: rpmalloc.h:80
RPMALLOC_NO_PRESERVE
#define RPMALLOC_NO_PRESERVE
Flag to rpaligned_realloc to not preserve content in reallocation.
Definition: rpmalloc.h:62
MAP_UNINITIALIZED
#define MAP_UNINITIALIZED
Definition: rpmalloc.c:173
rpmalloc_thread_statistics_t::from_cache
size_t from_cache
Number of spans transitioned from thread cache.
Definition: rpmalloc.h:107
rpmalloc_config_t::span_map_count
size_t span_map_count
Number of spans to map at each request to map new virtual memory blocks. This can.
Definition: rpmalloc.h:169
rpmalloc_usable_size
size_t rpmalloc_usable_size(void *ptr)
Query the usable size of the given memory block (from given pointer to the end of block)
Definition: rpmalloc.c:2715
_Static_assert
_Static_assert((SMALL_GRANULARITY &(SMALL_GRANULARITY - 1))==0, "Small granularity must be power of two")
RPMALLOC_ALLOCATOR
#define RPMALLOC_ALLOCATOR
Definition: rpmalloc.h:42
rpmalloc_thread_statistics_t::thread_to_global
size_t thread_to_global
Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
Definition: rpmalloc.h:91
rpmalloc_config_t::memory_unmap
void(* memory_unmap)(void *address, size_t size, size_t offset, size_t release)
Unmap the memory pages starting at address and spanning the given number of bytes.
Definition: rpmalloc.h:155
MEDIUM_GRANULARITY_SHIFT
#define MEDIUM_GRANULARITY_SHIFT
Medium granularity shift count.
Definition: rpmalloc.c:299
SMALL_SIZE_LIMIT
#define SMALL_SIZE_LIMIT
Maximum size of a small block.
Definition: rpmalloc.c:295
rpmalloc_thread_statistics_t::from_reserved
size_t from_reserved
Number of spans transitioned from reserved state.
Definition: rpmalloc.h:111
rpmalloc_thread_statistics_t::size_use
struct rpmalloc_thread_statistics_t::@1 size_use[128]
Per size class statistics (only if ENABLE_STATISTICS=1)
SIZE_CLASS_LARGE
#define SIZE_CLASS_LARGE
Definition: rpmalloc.c:329
rpmalloc_thread_statistics_t::to_cache
size_t to_cache
Number of spans transitioned to thread cache.
Definition: rpmalloc.h:105
_rpmalloc_stat_add_peak
#define _rpmalloc_stat_add_peak(counter, value, peak)
Definition: rpmalloc.c:278
SPAN_FLAG_SUBSPAN
#define SPAN_FLAG_SUBSPAN
Flag indicating span is a secondary (sub) span of a split superspan.
Definition: rpmalloc.c:354
_memory_span_size_shift
#define _memory_span_size_shift
Definition: rpmalloc.c:562
assert
#define assert(x)
Definition: rpmalloc.c:186
rpmalloc_global_statistics_t::mapped
size_t mapped
Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_S...
Definition: rpmalloc.h:70
_rpmalloc_stat_inc
#define _rpmalloc_stat_inc(counter)
Statistics related functions (evaluate to nothing when statistics not enabled)
Definition: rpmalloc.c:274
rpmalloc_thread_statistics_t::alloc_total
size_t alloc_total
Total number of allocations.
Definition: rpmalloc.h:122
_Atomic
volatile _Atomic(int32_t)
Atomic access abstraction (since MSVC does not do C11 yet)
Definition: rpmalloc.c:226
rpmalloc_is_thread_initialized
int rpmalloc_is_thread_initialized(void)
Query if allocator is initialized for calling thread.
Definition: rpmalloc.c:2588
GLOBAL_CACHE_MULTIPLIER
#define GLOBAL_CACHE_MULTIPLIER
Multiplier for global cache (cache limit will be span release count multiplied by this value)
Definition: rpmalloc.c:93
rpmalloc_thread_statistics_t::to_global
size_t to_global
Number of spans transitioned to global cache.
Definition: rpmalloc.h:101
rpmalloc_thread_collect
void rpmalloc_thread_collect(void)
Perform deferred deallocations pending for the calling thread heap.
Definition: rpmalloc.c:2720
rpmalloc_config_t::span_size
size_t span_size
Size of a span of memory blocks. MUST be a power of two, and in [4096,262144].
Definition: rpmalloc.h:163
rpmalloc_thread_statistics_t::peak
size_t peak
High water mark of spans used.
Definition: rpmalloc.h:99
_rpmalloc_stat_dec
#define _rpmalloc_stat_dec(counter)
Definition: rpmalloc.c:275
rpmalloc_initialize
int rpmalloc_initialize(void)
Initialize the allocator and setup global data.
Definition: rpmalloc.c:2318
_rpmalloc_stat_add64
#define _rpmalloc_stat_add64(counter, value)
Definition: rpmalloc.c:277
FALSE
#define FALSE
Definition: videoplugin.h:60
rpmalloc_global_statistics_t::cached
size_t cached
Current amount of memory in global caches for small and medium sizes (<32KiB)
Definition: rpmalloc.h:74
_rpmalloc_stat_sub
#define _rpmalloc_stat_sub(counter, value)
Definition: rpmalloc.c:279
DEFAULT_SPAN_MAP_COUNT
#define DEFAULT_SPAN_MAP_COUNT
Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
Definition: rpmalloc.c:58
pointer_diff
#define pointer_diff(first, second)
Definition: rpmalloc.c:325
SPAN_FLAG_ALIGNED_BLOCKS
#define SPAN_FLAG_ALIGNED_BLOCKS
Flag indicating span has blocks with increased alignment.
Definition: rpmalloc.c:356
rpmalloc_thread_statistics_t::alloc_current
size_t alloc_current
Current number of allocations.
Definition: rpmalloc.h:118
rpmalloc_thread_initialize
void rpmalloc_thread_initialize(void)
Initialize thread, assign heap.
Definition: rpmalloc.c:2562
MEDIUM_SIZE_LIMIT
#define MEDIUM_SIZE_LIMIT
Maximum size of a medium block.
Definition: rpmalloc.c:307