xtool/contrib/mORMot/SynFPCx64MM.pas

2864 lines
101 KiB
ObjectPascal

/// Fast Memory Manager for FPC x86_64
// - this unit is a part of the freeware Synopse mORMot framework
// licensed under a MPL/GPL/LGPL three license - see LICENSE.md
unit SynFPCx64MM;
{
*****************************************************************************
A Multi-thread Friendly Memory Manager for FPC written in x86_64 assembly
- targetting Linux (and Windows) multi-threaded Services
- only for FPC on the x86_64 target - use the RTL MM on Delphi or ARM
- based on FastMM4 proven algorithms by Pierre le Riche
- code has been reduced to the only necessary featureset for production
- deep asm refactoring for cross-platform, compactness and efficiency
- can report detailed statistics (with threads contention and memory leaks)
- mremap() makes large block ReallocMem a breeze on Linux :)
- inlined SSE2 movaps loop is more efficient that subfunction(s)
- lockless round-robin of tiny blocks (<=128/256 bytes) for better scaling
- optional lockless bin list to avoid freemem() thread contention
- three app modes: default mono-thread friendly, FPCMM_SERVER or FPCMM_BOOST
Usage: include this unit as the very first in your FPC project uses clause
Why another Memory Manager on FPC?
- The built-in heap.inc is well written and cross-platform and cross-CPU,
but its threadvar arena for small blocks tends to consume a lot of memory
on multi-threaded servers, and has suboptimal allocation performance
- C memory managers (glibc, Intel TBB, jemalloc) have a very high RAM
consumption (especially Intel TBB) and do panic/SIGKILL on any GPF
- Pascal alternatives (FastMM4,ScaleMM2,BrainMM) are Windows+Delphi specific
- Our lockess round-robin of tiny blocks is a unique algorithm in MM AFAIK
- It was so fun diving into SSE2 x86_64 assembly and Pierre's insight
- Resulting code is still easy to understand and maintain
IMPORTANT NOTICE: seems stable on Linux and Win64 but feedback is welcome!
*****************************************************************************
This file is part of Synopse framework.
Synopse framework. Copyright (C) 2020 Arnaud Bouchez
Synopse Informatique - https://synopse.info
*** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the License.
The Original Code is Synopse mORMot framework.
The Initial Developer of the Original Code is Arnaud Bouchez.
Portions created by the Initial Developer are Copyright (C) 2020
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
}
{ ---- Ready-To-Use Scenarios for Memory Manager Tuning }
// by default, we target LCL/console mono-threaded apps to replace the RTL MM
// - you may define FPCMM_SERVER or even FPCMM_BOOST for a service/daemon
// if defined, set FPCMM_DEBUG and FPCMM_ASSUMEMULTITHREAD
// - those flags target well a multi-threaded service
// - consider FPCMM_BOOST to try more aggressive settings
{.$define FPCMM_SERVER}
// if defined, tiny blocks <= 256 bytes will have a bigger round-robin cycle
// - try to enable it if unexpected SmallGetmemSleepCount/SmallFreememSleepCount
// and SleepCount/SleepCycles contentions are reported by CurrentHeapStatus
// - will also use 2x (FPCMM_BOOST) or 4x (FPCMM_BOOSTER) more tiny blocks
// arenas to share among the threads - so process will consume slightly more RAM
// - warning: depending on the workload and hardware, it may actually be slower;
// consider FPCMM_SERVER as a fair alternative
{.$define FPCMM_BOOST}
{.$define FPCMM_BOOSTER}
{ ---- Fine Grained Memory Manager Tuning }
// includes more detailed information to WriteHeapStatus()
{.$define FPCMM_DEBUG}
// checks leaks and write them to the console at process shutdown
// - only basic information will be included: more debugging information (e.g.
// call stack) may be gathered using heaptrc or valgrid
{.$define FPCMM_REPORTMEMORYLEAKS}
// won't check the IsMultiThread global, but assume it is true
// - multi-threaded apps (e.g. a Server Daemon instance) will be faster with it
// - mono-threaded (console/LCL) apps are faster without this conditional
{.$define FPCMM_ASSUMEMULTITHREAD}
// let Freemem multi-thread contention use a lockless algorithm
// - on contention, Freemem won't yield the thread using an OS call, but fill
// an internal Bin list which will be released when the lock becomes available
// - from our tests on high thread contention, this may be slower on Linux, but
// sometimes slightly faster on Win64 (in a VM at least)
{.$define FPCMM_LOCKLESSFREE}
// won't use mremap but a regular getmem/move/freemem pattern
// - depending on the actual system (e.g. on a VM), mremap may be slower
{.$define FPCMM_NOMREMAP}
// on contention problem, execute "pause" opcode and spin retrying the lock
// - you may try to define this if you have more than one core, to follow Intel
// recommendation from https://software.intel.com/en-us/comment/1134767
// - on SkylakeX (Intel 7th gen), "pause" opcode went from 10-20 to 140 cycles,
// so we use rdtsc and a given number of cycles - see http://tiny.cc/toeaqz
// - from our tests on high thread contention, spinning is slower on both
// Linux and Windows, whatever Intel is advising
{.$define FPCMM_PAUSE}
// will export libc-like functions, and not replace the FPC MM
// - e.g. to use this unit as a stand-alone C memory allocator
{.$define FPCMM_STANDALONE}
interface
{$ifdef FPC}
// cut-down version of Synopse.inc to make this unit standalone
{$mode Delphi}
{$asmmode Intel}
{$inline on}
{$R-} // disable Range checking
{$S-} // disable Stack checking
{$W-} // disable stack frame generation
{$Q-} // disable overflow checking
{$B-} // expect short circuit boolean
{$ifdef CPUX64}
{$define FPC_CPUX64} // this unit is for FPC + x86_64 only
{$endif CPUX64}
{$ifdef FPCMM_BOOSTER}
{$define FPCMM_BOOST}
{$undef FPCMM_DEBUG} // when performance matters more than stats
{$endif FPCMM_BOOSTER}
{$ifdef FPCMM_BOOST}
{$undef FPCMM_SERVER}
{$define FPCMM_ASSUMEMULTITHREAD}
{$endif FPCMM_BOOST}
{$ifdef FPCMM_SERVER}
{$define FPCMM_DEBUG}
{$define FPCMM_ASSUMEMULTITHREAD}
{$endif FPCMM_SERVER}
{$endif FPC}
{$ifdef FPC_CPUX64}
// this unit is available only for FPC + X86_64 CPU
type
/// Arena (middle/large) heap information as returned by CurrentHeapStatus
TMMStatusArena = record
/// how many bytes are currently reserved (mmap) to the Operating System
CurrentBytes: PtrUInt;
/// how many bytes have been reserved (mmap) to the Operating System
CumulativeBytes: PtrUInt;
{$ifdef FPCMM_DEBUG}
/// maximum bytes count reserved (mmap) to the Operating System
PeakBytes: PtrUInt;
/// how many VirtualAlloc/mmap calls to the Operating System did occur
CumulativeAlloc: PtrUInt;
/// how many VirtualFree/munmap calls to the Operating System did occur
CumulativeFree: PtrUInt;
{$endif FPCMM_DEBUG}
/// how many times this Arena did wait from been unlocked by another thread
SleepCount: PtrUInt;
end;
/// heap information as returned by CurrentHeapStatus
TMMStatus = record
/// how many tiny/small memory blocks (<=2600) are currently allocated
SmallBlocks: PtrUInt;
/// how many bytes of tiny/small memory blocks are currently allocated
// - this size is part of the Medium.CurrentBytes arena
SmallBlocksSize: PtrUInt;
/// contain blocks up to 256KB (small and medium blocks)
Medium: TMMStatusArena;
/// large blocks > 256KB which are directly handled by the Operating System
Large: TMMStatusArena;
{$ifdef FPCMM_DEBUG}
/// how much rdtsc cycles were spent within SwitchToThread/NanoSleep API
// - we rdtsc since it is an indicative but very fast way of timing
SleepCycles: PtrUInt;
{$ifdef FPCMM_LOCKLESSFREE}
/// how many types Freemem() did spin to acquire its lock-less bin list
SmallFreememLockLessSpin: PtrUInt;
{$endif FPCMM_LOCKLESSFREE}
{$endif FPCMM_DEBUG}
/// how many times the Operating System Sleep/NanoSleep API was called
// - in a perfect world, should be as small as possible
SleepCount: PtrUInt;
/// how many times Getmem() did block and wait for a small block
// - see also GetSmallBlockContention()
SmallGetmemSleepCount: PtrUInt;
/// how many times Freemem() did block and wait for a small block
// - see also GetSmallBlockContention()
SmallFreememSleepCount: PtrUInt;
end;
PMMStatus = ^TMMStatus;
/// allocate a new memory buffer
// - as FPC default heap, _Getmem(0) returns _Getmem(1)
function _GetMem(size: PtrUInt): pointer;
/// allocate a new zeroed memory buffer
function _AllocMem(Size: PtrUInt): pointer;
/// release a memory buffer
// - returns the allocated size of the supplied pointer (as FPC default heap)
function _FreeMem(P: pointer): PtrUInt;
/// change the size of a memory buffer
// - won't move any data if in-place reallocation is possible
// - as FPC default heap, _ReallocMem(P=nil,Size) maps P := _getmem(Size) and
// _ReallocMem(P,0) maps _Freemem(P)
function _ReallocMem(var P: pointer; Size: PtrUInt): pointer;
/// retrieve the maximum size (i.e. the allocated size) of a memory buffer
function _MemSize(P: pointer): PtrUInt; inline;
/// retrieve high-level statistics about the current memory manager state
// - see also GetSmallBlockContention for detailed small blocks information
function CurrentHeapStatus: TMMStatus;
{$ifdef FPCMM_STANDALONE}
/// should be called before using any memory function
procedure InitializeMemoryManager;
/// should be called to finalize this memory manager process and release all RAM
procedure FreeAllMemory;
{$undef FPCMM_DEBUG} // excluded FPC-specific debugging
/// IsMultiThread global variable is not correct outside of the FPC RTL
{$define FPCMM_ASSUMEMULTITHREAD}
/// not supported to reduce dependencies and console writing
{$undef FPCMM_REPORTMEMORYLEAKS}
{$else}
type
/// one GetSmallBlockContention info about unexpected multi-thread waiting
// - a single GetmemBlockSize or FreememBlockSize non 0 field is set
TSmallBlockContention = packed record
/// how many times a small block getmem/freemem has been waiting for unlock
SleepCount: cardinal;
/// the small block size on which Getmem() has been blocked - or 0
GetmemBlockSize: cardinal;
/// the small block size on which Freemem() has been blocked - or 0
FreememBlockSize: cardinal;
end;
/// small blocks detailed information as returned GetSmallBlockContention
TSmallBlockContentionDynArray = array of TSmallBlockContention;
/// one GetSmallBlockStatus information
TSmallBlockStatus = packed record
/// how many times a memory block of this size has been allocated
Total: cardinal;
/// how many memory blocks of this size are currently allocated
Current: cardinal;
/// the standard size of the small memory block
BlockSize: cardinal;
end;
/// small blocks detailed information as returned GetSmallBlockStatus
TSmallBlockStatusDynArray = array of TSmallBlockStatus;
/// sort order of detailed information as returned GetSmallBlockStatus
TSmallBlockOrderBy = (obTotal, obCurrent, obBlockSize);
/// retrieve the use counts of allocated small blocks
// - returns maxcount biggest results, sorted by "orderby" field occurence
function GetSmallBlockStatus(maxcount: integer = 10;
orderby: TSmallBlockOrderBy = obTotal; count: PPtrUInt = nil;
bytes: PPtrUInt = nil): TSmallBlockStatusDynArray;
/// retrieve all small blocks which suffered from blocking during multi-thread
// - returns maxcount biggest results, sorted by SleepCount occurence
function GetSmallBlockContention(maxcount: integer = 10): TSmallBlockContentionDynArray;
/// convenient debugging function into the console
// - if smallblockcontentioncount > 0, includes GetSmallBlockContention() info
// up to the smallblockcontentioncount biggest occurences
procedure WriteHeapStatus(const context: shortstring = '';
smallblockstatuscount: integer = 8; smallblockcontentioncount: integer = 8;
compilationflags: boolean = false);
{$endif FPCMM_STANDALONE}
{$endif FPC_CPUX64}
implementation
{
High-level Algorithms Description
-----------------------------------
The allocator handles the following families of memory blocks:
- TINY <= 128 B (or <= 256 B for FPCMM_BOOST) - not existing in FastMM4
Round-robin distribution into several arenas, fed from medium blocks
(fair scaling from multi-threaded calls, with no threadvar nor GC involved)
- SMALL <= 2600 B
Single arena per block size, fed from medium blocks
- MEDIUM <= 256 KB
Pool of bitmap-marked chunks, fed from 1MB of OS mmap/virtualalloc
- LARGE > 256 KB
Directly fed from OS mmap/virtualalloc with mremap when growing
About locking:
- Tiny and Small blocks have their own per-size lock, in every arena
- Medium and Large blocks have one giant lock each (seldom used)
- SwitchToThread/FpNanoSleep OS call is done after initial spinning
- FPCMM_LOCKLESSFREE reduces OS calls on Freemem() thread contention
- FPCMM_DEBUG / WriteHeapStatus allows to identify the lock contention
}
{$ifdef FPC_CPUX64}
// this unit is available only for FPC + X86_64 CPU
{ ********* Operating System Specific API Calls }
{$ifdef MSWINDOWS}
var
HeapStatus: TMMStatus;
const
kernel32 = 'kernel32.dll';
MEM_COMMIT = $1000;
MEM_RESERVE = $2000;
MEM_RELEASE = $8000;
MEM_FREE = $10000;
MEM_TOP_DOWN = $100000;
PAGE_READWRITE = 4;
function VirtualAlloc(lpAddress: pointer;
dwSize: PtrUInt; flAllocationType, flProtect: Cardinal): pointer; stdcall;
external kernel32 name 'VirtualAlloc';
function VirtualFree(lpAddress: pointer; dwSize: PtrUInt;
dwFreeType: Cardinal): LongBool; stdcall;
external kernel32 name 'VirtualFree';
procedure SwitchToThread; stdcall;
external kernel32 name 'SwitchToThread';
function AllocMedium(Size: PtrInt): pointer; inline;
begin
// bottom-up allocation to reduce fragmentation
result := VirtualAlloc(nil, Size, MEM_COMMIT, PAGE_READWRITE);
end;
function AllocLarge(Size: PtrInt): pointer; inline;
begin
// top-down allocation to reduce fragmentation
result := VirtualAlloc(nil, Size, MEM_COMMIT or MEM_TOP_DOWN, PAGE_READWRITE);
end;
procedure Free(ptr: pointer; Size: PtrInt); inline;
begin
VirtualFree(ptr, 0, MEM_RELEASE);
end;
{$define FPCMM_NOMREMAP}
{$else}
uses
{$ifndef DARWIN}
syscall,
{$endif DARWIN}
BaseUnix;
var
HeapStatus: TMMStatus;
// we directly call the Kernel, so this unit doesn't require any libc
function AllocMedium(Size: PtrInt): pointer; inline;
begin
result := fpmmap(nil, Size, PROT_READ or PROT_WRITE,
MAP_PRIVATE or MAP_ANONYMOUS, -1, 0);
end;
function AllocLarge(Size: PtrInt): pointer; inline;
begin
result := fpmmap(nil, Size, PROT_READ or PROT_WRITE,
MAP_PRIVATE or MAP_ANONYMOUS, -1, 0);
end;
procedure Free(ptr: pointer; Size: PtrInt); inline;
begin
Size := fpmunmap(ptr, Size);
// assert(Size = 0);
end;
{$ifdef LINUX}
{$ifndef FPCMM_NOMREMAP}
const
syscall_nr_mremap = 25; // valid on x86_64 Linux and Android
MREMAP_MAYMOVE = 1;
function fpmremap(addr: pointer; old_len, new_len: size_t; may_move: longint): pointer; inline;
begin
result := pointer(do_syscall(syscall_nr_mremap, TSysParam(addr),
TSysParam(old_len), TSysParam(new_len), TSysParam(may_move)));
end;
{$endif FPCMM_NOMREMAP}
{$else BSD}
{$define FPCMM_NOMREMAP} // mremap is a Linux-specific syscall
{$endif LINUX}
procedure SwitchToThread; inline;
var
t: Ttimespec;
begin
// note: nanosleep() adds a few dozen of microsecs for context switching
t.tv_sec := 0;
t.tv_nsec := 10; // empirically identified on a recent Linux Kernel
fpnanosleep(@t, nil);
end;
{$endif MSWINDOWS}
{$ifdef FPCMM_DEBUG}
procedure ReleaseCore; nostackframe; assembler;
asm
rdtsc
shl rdx, 32
or rax, rdx
push rax
call SwitchToThread
pop rcx
rdtsc
shl rdx, 32
or rax, rdx
lea rdx, [rip + HeapStatus]
sub rax, rcx
lock xadd qword ptr [rdx + TMMStatus.SleepCycles], rax
lock inc qword ptr [rdx + TMMStatus.SleepCount]
end;
{$else}
procedure ReleaseCore;
begin
SwitchToThread;
inc(HeapStatus.SleepCount); // indicative counter
end;
{$endif FPCMM_DEBUG}
{ ********* Some Assembly Helpers }
procedure NotifyAlloc(var Arena: TMMStatusArena; Size: PtrUInt);
nostackframe; assembler;
asm
mov rax, Size
lock xadd qword ptr [Arena].TMMStatusArena.CurrentBytes, rax
lock xadd qword ptr [Arena].TMMStatusArena.CumulativeBytes, Size
{$ifdef FPCMM_DEBUG}
lock inc qword ptr [Arena].TMMStatusArena.CumulativeAlloc
mov rax, qword ptr [Arena].TMMStatusArena.CurrentBytes
cmp rax, qword ptr [Arena].TMMStatusArena.PeakBytes
jbe @s
mov qword ptr [Arena].TMMStatusArena.PeakBytes, rax
@s: {$endif FPCMM_DEBUG}
end;
procedure NotifyFree(var Arena: TMMStatusArena; Size: PtrUInt);
nostackframe; assembler;
asm
neg Size
lock xadd qword ptr [Arena].TMMStatusArena.CurrentBytes, Size
{$ifdef FPCMM_DEBUG}
lock inc qword ptr [Arena].TMMStatusArena.CumulativeFree
{$endif FPCMM_DEBUG}
end;
// faster than Move() as called from ReallocateLargeBlock
procedure MoveLarge(src, dst: pointer; cnt: PtrInt); nostackframe; assembler;
asm
sub cnt, 8
add src, cnt
add dst, cnt
neg cnt
jns @z
align 16
@s: movaps xmm0, oword ptr [src + cnt] // AVX move is not really faster
movntdq oword ptr [dst + cnt], xmm0 // non-temporal loop
add cnt, 16
js @s
sfence
@z: mov rax, qword ptr [src + cnt]
mov qword ptr [dst + cnt], rax
end;
{ ********* Constants and Data Structures Definitions }
const
{$ifdef FPCMM_BOOST} // someimtes the more arenas, the better multi-threadable
{$ifdef FPCMM_BOOSTER}
NumTinyBlockTypesPO2 = 4;
NumTinyBlockArenasPO2 = 5; // will probably end up with Medium lock contention
{$else}
NumTinyBlockTypesPO2 = 4; // tiny are <= 256 bytes
NumTinyBlockArenasPO2 = 4; // 16 + 1 arenas
{$endif FPCMM_BOOSTER}
{$else}
NumTinyBlockTypesPO2 = 3; // multiple arenas for tiny blocks <= 128 bytes
NumTinyBlockArenasPO2 = 3; // 8 round-robin arenas + 1 main by default
{$endif FPCMM_BOOST}
NumSmallBlockTypes = 46;
MaximumSmallBlockSize = 2608;
SmallBlockSizes: array[0..NumSmallBlockTypes - 1] of word = (
16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256,
272, 288, 304, 320, 352, 384, 416, 448, 480, 528, 576, 624, 672, 736, 800,
880, 960, 1056, 1152, 1264, 1376, 1504, 1648, 1808, 1984, 2176, 2384,
MaximumSmallBlockSize, MaximumSmallBlockSize, MaximumSmallBlockSize);
NumTinyBlockTypes = 1 shl NumTinyBlockTypesPO2;
NumTinyBlockArenas = 1 shl NumTinyBlockArenasPO2;
NumSmallInfoBlock = NumSmallBlockTypes + NumTinyBlockArenas * NumTinyBlockTypes;
SmallBlockGranularity = 16;
TargetSmallBlocksPerPool = 48;
MinimumSmallBlocksPerPool = 12;
SmallBlockDownsizeCheckAdder = 64;
SmallBlockUpsizeAdder = 32;
{$ifdef FPCMM_LOCKLESSFREE}
SmallBlockTypePO2 = 8; // SizeOf(TSmallBlockType)=256
SmallBlockBinCount = (((1 shl SmallBlockTypePO2) - 64) div 8) - 1;
{$else}
SmallBlockTypePO2 = 6;
{$endif FPCMM_LOCKLESSFREE}
MediumBlockPoolSizeMem = 20 * 64 * 1024;
MediumBlockPoolSize = MediumBlockPoolSizeMem - 16;
MediumBlockSizeOffset = 48;
MinimumMediumBlockSize = 11 * 256 + MediumBlockSizeOffset;
MediumBlockBinsPerGroup = 32;
MediumBlockBinGroupCount = 32;
MediumBlockBinCount = MediumBlockBinGroupCount * MediumBlockBinsPerGroup;
MediumBlockGranularity = 256;
MaximumMediumBlockSize =
MinimumMediumBlockSize + (MediumBlockBinCount - 1) * MediumBlockGranularity;
OptimalSmallBlockPoolSizeLowerLimit =
29 * 1024 - MediumBlockGranularity + MediumBlockSizeOffset;
OptimalSmallBlockPoolSizeUpperLimit =
64 * 1024 - MediumBlockGranularity + MediumBlockSizeOffset;
MaximumSmallBlockPoolSize =
OptimalSmallBlockPoolSizeUpperLimit + MinimumMediumBlockSize;
LargeBlockGranularity = 65536;
MediumInPlaceDownsizeLimit = MinimumMediumBlockSize div 4;
IsFreeBlockFlag = 1;
IsMediumBlockFlag = 2;
IsSmallBlockPoolInUseFlag = 4;
IsLargeBlockFlag = 4;
PreviousMediumBlockIsFreeFlag = 8;
LargeBlockIsSegmented = 8;
DropSmallFlagsMask = -8;
ExtractSmallFlagsMask = 7;
DropMediumAndLargeFlagsMask = -16;
ExtractMediumAndLargeFlagsMask = 15;
// use pause before ReleaseCore API call when spinning locks
// pause is 140 cycles since SkylakeX - see http://tiny.cc/010ioz -> use rdtsc
// which has 30 cycles latency; ring3 to ring 0 transition is 1000 cycles
{$ifdef FPCMM_PAUSE}
SpinSmallGetmemLockTSC = 1000;
SpinSmallFreememLockTSC = 1000; // _freemem has more collisions
{$ifdef FPCMM_LOCKLESSFREE}
SpinSmallFreememBinTSC = 2000;
{$endif FPCMM_LOCKLESSFREE}
SpinMediumLockTSC = 2000;
SpinLargeLockTSC = 2000;
{$else}
SpinMediumLockTSC = 1000; // minimum spinning
SpinLargeLockTSC = 1000;
{$endif FPCMM_PAUSE}
type
PSmallBlockPoolHeader = ^TSmallBlockPoolHeader;
// information for each small block size - 64/256 bytes long >= CPU cache line
TSmallBlockType = record
BlockTypeLocked: boolean;
AllowedGroupsForBlockPoolBitmap: Byte;
BlockSize: Word;
MinimumBlockPoolSize: Word;
OptimalBlockPoolSize: Word;
NextPartiallyFreePool: PSmallBlockPoolHeader;
PreviousPartiallyFreePool: PSmallBlockPoolHeader;
NextSequentialFeedBlockAddress: pointer;
MaxSequentialFeedBlockAddress: pointer;
CurrentSequentialFeedPool: PSmallBlockPoolHeader;
GetmemCount: cardinal;
FreememCount: cardinal;
GetmemSleepCount: cardinal;
FreememSleepCount: cardinal;
{$ifdef FPCMM_LOCKLESSFREE} // 192 optional bytes for FreeMem Bin
BinLocked: boolean;
BinCount: byte;
BinSpinCount: cardinal;
BinInstance: array[0.. SmallBlockBinCount - 1] of pointer;
{$endif FPCMM_LOCKLESSFREE}
end;
PSmallBlockType = ^TSmallBlockType;
TSmallBlockTypes = array[0..NumSmallBlockTypes - 1] of TSmallBlockType;
TTinyBlockTypes = array[0..NumTinyBlockTypes - 1] of TSmallBlockType;
TSmallBlockInfo = record
Small: TSmallBlockTypes;
Tiny: array[0..NumTinyBlockArenas - 1] of TTinyBlockTypes;
GetmemLookup: array[0..
(MaximumSmallBlockSize - 1) div SmallBlockGranularity] of byte;
{$ifndef FPCMM_ASSUMEMULTITHREAD}
IsMultiThreadPtr: PBoolean; // safe access to IsMultiThread global variable
{$endif FPCMM_ASSUMEMULTITHREAD}
TinyCurrentArena: integer;
end;
TSmallBlockPoolHeader = record
BlockType: PSmallBlockType;
{$ifdef CPU32}
Padding32Bits: cardinal;
{$endif}
NextPartiallyFreePool: PSmallBlockPoolHeader;
PreviousPartiallyFreePool: PSmallBlockPoolHeader;
FirstFreeBlock: pointer;
BlocksInUse: Cardinal;
SmallBlockPoolSignature: Cardinal;
FirstBlockPoolPointerAndFlags: PtrUInt;
end;
PMediumBlockPoolHeader = ^TMediumBlockPoolHeader;
TMediumBlockPoolHeader = record
PreviousMediumBlockPoolHeader: PMediumBlockPoolHeader;
NextMediumBlockPoolHeader: PMediumBlockPoolHeader;
Reserved1: PtrUInt;
FirstMediumBlockSizeAndFlags: PtrUInt;
end;
PMediumFreeBlock = ^TMediumFreeBlock;
TMediumFreeBlock = record
PreviousFreeBlock: PMediumFreeBlock;
NextFreeBlock: PMediumFreeBlock;
end;
TMediumBlockInfo = record
Locked: boolean;
PoolsCircularList: TMediumBlockPoolHeader;
LastSequentiallyFed: pointer;
SequentialFeedBytesLeft: Cardinal;
BinGroupBitmap: Cardinal;
{$ifndef FPCMM_ASSUMEMULTITHREAD}
IsMultiThreadPtr: PBoolean; // safe access to IsMultiThread global variable
{$endif FPCMM_ASSUMEMULTITHREAD}
BinBitmaps: array[0..MediumBlockBinGroupCount - 1] of Cardinal;
Bins: array[0..MediumBlockBinCount - 1] of TMediumFreeBlock;
end;
PLargeBlockHeader = ^TLargeBlockHeader;
TLargeBlockHeader = record
PreviousLargeBlockHeader: PLargeBlockHeader;
NextLargeBlockHeader: PLargeBlockHeader;
Reserved1: PtrUInt;
BlockSizeAndFlags: PtrUInt;
end;
const
BlockHeaderSize = SizeOf(pointer);
SmallBlockPoolHeaderSize = SizeOf(TSmallBlockPoolHeader);
MediumBlockPoolHeaderSize = SizeOf(TMediumBlockPoolHeader);
LargeBlockHeaderSize = SizeOf(TLargeBlockHeader);
var
SmallBlockInfo: TSmallBlockInfo;
MediumBlockInfo: TMediumBlockInfo;
LargeBlocksLocked: boolean;
LargeBlocksCircularList: TLargeBlockHeader;
{ ********* Shared Routines }
procedure LockMediumBlocks; nostackframe; assembler;
asm
// on input/output: r10=MediumBlockInfo
@s: rdtsc // tsc in edx:eax
shl rdx, 32
lea r9, [rax + rdx + SpinMediumLockTSC] // r9 = endtsc
@sp: pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @rc // timeout
mov rcx, r10
mov eax, $100
cmp byte ptr [r10].TMediumBlockInfo.Locked, true
je @sp
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @ok
jmp @sp
@rc: push rsi // preserve POSIX ABI registers
push rdi
push r10
push r11
call ReleaseCore
pop r11
pop r10
pop rdi
pop rsi
lea rax, [rip + HeapStatus]
lock inc qword ptr [rax].TMMStatus.Medium.SleepCount
jmp @s
@ok:
end;
procedure InsertMediumBlockIntoBin; nostackframe; assembler;
asm
// rcx=MediumFreeBlock edx=MediumBlockSize r10=MediumBlockInfo - even on POSIX
mov rax, rcx
// Get the bin number for this block size
sub edx, MinimumMediumBlockSize
shr edx, 8
// Validate the bin number
sub edx, MediumBlockBinCount - 1
sbb ecx, ecx
and edx, ecx
add edx, MediumBlockBinCount - 1
mov r9, rdx
// Get the bin address in rcx
shl edx, 4
lea rcx, [r10 + rdx + TMediumBlockInfo.Bins]
// Bins are LIFO, se we insert this block as the first free block in the bin
mov rdx, TMediumFreeBlock[rcx].NextFreeBlock
mov TMediumFreeBlock[rax].PreviousFreeBlock, rcx
mov TMediumFreeBlock[rax].NextFreeBlock, rdx
mov TMediumFreeBlock[rdx].PreviousFreeBlock, rax
mov TMediumFreeBlock[rcx].NextFreeBlock, rax
// Was this bin empty?
cmp rdx, rcx
jne @Done
// Get ecx=bin number, edx=group number
mov rcx, r9
mov rdx, r9
shr edx, 5
// Flag this bin as not empty
mov eax, 1
shl eax, cl
or dword ptr [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax
// Flag the group as not empty
mov eax, 1
mov ecx, edx
shl eax, cl
or [r10 + TMediumBlockInfo.BinGroupBitmap], eax
@Done:
end;
procedure RemoveMediumFreeBlock; nostackframe; assembler;
asm
// rcx=MediumFreeBlock r10=MediumBlockInfo - even on POSIX
// Get the current previous and next blocks
mov rdx, TMediumFreeBlock[rcx].PreviousFreeBlock
mov rcx, TMediumFreeBlock[rcx].NextFreeBlock
// Remove this block from the linked list
mov TMediumFreeBlock[rcx].PreviousFreeBlock, rdx
mov TMediumFreeBlock[rdx].NextFreeBlock, rcx
// Is this bin now empty? If the previous and next free block pointers are
// equal, they must point to the bin
cmp rcx, rdx
jne @Done
// Get ecx=bin number, edx=group number
lea r8, [r10 + TMediumBlockInfo.Bins]
sub rcx, r8
mov edx, ecx
shr ecx, 4
shr edx, 9
// Flag this bin as empty
mov eax, -2
rol eax, cl
and dword ptr [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax
jnz @Done
// Flag this group as empty
mov eax, -2
mov ecx, edx
rol eax, cl
and [r10 + TMediumBlockInfo.BinGroupBitmap], eax
@Done:
end;
procedure BinMediumSequentialFeedRemainder; nostackframe; assembler;
asm
// r10=MediumBlockInfo - even on POSIX
mov eax, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
test eax, eax
jz @Done
// Is the last fed sequentially block free?
mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed]
test byte ptr [rax - BlockHeaderSize], IsFreeBlockFlag
jnz @LastBlockFedIsFree
// Set the "previous block is free" flag in the last block fed
or qword ptr [rax - BlockHeaderSize], PreviousMediumBlockIsFreeFlag
// Get edx=remainder size, rax=remainder start
mov edx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
sub rax, rdx
@BinTheRemainder:
// Store the size of the block as well as the flags
lea rcx, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [rax - BlockHeaderSize], rcx
// Store the trailing size marker
mov [rax + rdx - 16], rdx
// Bin this medium block
cmp edx, MinimumMediumBlockSize
jb @Done
mov rcx, rax
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
@Done: ret
@LastBlockFedIsFree:
// Drop the flags
mov rdx, DropMediumAndLargeFlagsMask
and rdx, [rax - BlockHeaderSize]
// Free the last block fed
cmp edx, MinimumMediumBlockSize
jb @DontRemoveLastFed
// Last fed block is free - remove it from its size bin
mov rcx, rax
call RemoveMediumFreeBlock // rcx = APMediumFreeBlock
// Re-read rax and rdx
mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed]
mov rdx, DropMediumAndLargeFlagsMask
and rdx, [rax - BlockHeaderSize]
@DontRemoveLastFed:
// Get the number of bytes left in ecx
mov ecx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
// rax = remainder start, rdx = remainder size
sub rax, rcx
add edx, ecx
jmp @BinTheRemainder
end;
procedure FreeMedium(ptr: PMediumBlockPoolHeader);
begin
Free(ptr, MediumBlockPoolSizeMem);
NotifyFree(HeapStatus.Medium, MediumBlockPoolSizeMem);
end;
function AllocNewSequentialFeedMediumPool(blocksize: Cardinal): pointer;
var
old: PMediumBlockPoolHeader;
new: pointer;
begin
BinMediumSequentialFeedRemainder;
new := AllocMedium(MediumBlockPoolSizeMem);
with MediumblockInfo do
if new <> nil then
begin
old := PoolsCircularList.NextMediumBlockPoolHeader;
PMediumBlockPoolHeader(new).PreviousMediumBlockPoolHeader := @PoolsCircularList;
PoolsCircularList.NextMediumBlockPoolHeader := new;
PMediumBlockPoolHeader(new).NextMediumBlockPoolHeader := old;
old.PreviousMediumBlockPoolHeader := new;
PPtrUInt(PByte(new) + MediumBlockPoolSize - BlockHeaderSize)^ := IsMediumBlockFlag;
SequentialFeedBytesLeft :=
(MediumBlockPoolSize - MediumBlockPoolHeaderSize) - blocksize;
result := pointer(PByte(new) + MediumBlockPoolSize - blocksize);
LastSequentiallyFed := result;
PPtrUInt(PByte(result) - BlockHeaderSize)^ := blocksize or IsMediumBlockFlag;
NotifyAlloc(HeapStatus.Medium, MediumBlockPoolSizeMem);
end
else
begin
SequentialFeedBytesLeft := 0;
result := nil;
end;
end;
procedure LockLargeBlocks; nostackframe; assembler;
asm
@s: mov eax, $100
lea rcx, [rip + LargeBlocksLocked]
lock cmpxchg byte ptr [rcx], ah
je @ok
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinLargeLockTSC] // r9 = endtsc
@sp: pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @rc // timeout
mov eax, $100
cmp byte ptr [rcx], ah // don't flush the CPU cache if Locked still true
je @sp
lock cmpxchg byte ptr [rcx], ah
je @ok
jmp @sp
@rc: call ReleaseCore
lea rax, [rip + HeapStatus]
lock inc qword ptr [rax].TMMStatus.Large.SleepCount
jmp @s
@ok:
end;
function AllocateLargeBlockFrom(size: PtrUInt;
existing: pointer; oldsize: PtrUInt): pointer;
var
blocksize: PtrUInt;
header, old: PLargeBlockHeader;
begin
blocksize := (size + LargeBlockHeaderSize +
LargeBlockGranularity - 1 + BlockHeaderSize) and -LargeBlockGranularity;
if existing = nil then
header := AllocLarge(blocksize)
else
{$ifdef FPCMM_NOMREMAP}
header := nil; // paranoid
{$else}
header := fpmremap(existing, oldsize, blocksize, MREMAP_MAYMOVE);
{$endif FPCMM_NOMREMAP}
if header <> nil then
begin
NotifyAlloc(HeapStatus.Large, blocksize);
if existing <> nil then
NotifyFree(HeapStatus.Large, oldsize);
header.BlockSizeAndFlags := blocksize or IsLargeBlockFlag;
LockLargeBlocks;
old := LargeBlocksCircularList.NextLargeBlockHeader;
header.PreviousLargeBlockHeader := @LargeBlocksCircularList;
LargeBlocksCircularList.NextLargeBlockHeader := header;
header.NextLargeBlockHeader := old;
old.PreviousLargeBlockHeader := header;
LargeBlocksLocked := False;
inc(header);
end;
result := header;
end;
function AllocateLargeBlock(size: PtrUInt): pointer;
begin
result := AllocateLargeBlockFrom(size, nil, 0);
end;
procedure FreeLarge(ptr: PLargeBlockHeader; size: PtrUInt);
begin
NotifyFree(HeapStatus.Large, size);
Free(ptr, size);
end;
function FreeLargeBlock(p: pointer): PtrInt;
var
header, prev, next: PLargeBlockHeader;
begin
header := pointer(PByte(p) - LargeBlockHeaderSize);
if header.BlockSizeAndFlags and IsFreeBlockFlag <> 0 then
begin // try to duplicate the same pointer twice
result := 0;
exit;
end;
LockLargeBlocks;
prev := header.PreviousLargeBlockHeader;
next := header.NextLargeBlockHeader;
next.PreviousLargeBlockHeader := prev;
prev.NextLargeBlockHeader := next;
LargeBlocksLocked := False;
result := DropMediumAndLargeFlagsMask and header.BlockSizeAndFlags;
FreeLarge(header, result);
end;
function ReallocateLargeBlock(p: pointer; size: PtrUInt): pointer;
var
oldavail, minup, new: PtrUInt;
{$ifndef FPCMM_NOMREMAP} prev, next, {$endif} header: PLargeBlockHeader;
begin
header := pointer(PByte(p) - LargeBlockHeaderSize);
oldavail := (DropMediumAndLargeFlagsMask and header^.BlockSizeAndFlags) -
(LargeBlockHeaderSize + BlockHeaderSize);
new := size;
if size > oldavail then
begin
// size-up with 1/8 or 1/4 overhead for any future growing realloc
if oldavail > 128 shl 20 then
minup := oldavail + oldavail shr 3
else
minup := oldavail + oldavail shr 2;
if size < minup then
new := minup;
end
else
begin
result := p;
oldavail := oldavail shr 1;
if size >= oldavail then
// small size-up within current buffer -> no reallocate
exit
else
// size-down and move just the trailing data
oldavail := size;
end;
{$ifdef FPCMM_NOMREMAP}
// no mremap(): reallocate a new block, copy the existing data, free old
result := _GetMem(new);
if result <> nil then
MoveLarge(p, result, oldavail);
_FreeMem(p);
{$else}
// remove from current chain list
LockLargeBlocks;
prev := header^.PreviousLargeBlockHeader;
next := header^.NextLargeBlockHeader;
next.PreviousLargeBlockHeader := prev;
prev.NextLargeBlockHeader := next;
LargeBlocksLocked := False;
// let the Linux Kernel mremap() the memory using its TLB magic
size := DropMediumAndLargeFlagsMask and header^.BlockSizeAndFlags;
result := AllocateLargeBlockFrom(new, header, size);
{$endif FPCMM_NOMREMAP}
end;
{ ********* Main Memory Manager Functions }
procedure LockGetMem; nostackframe; assembler;
asm
// Can use one of the several arenas reserved for tiny blocks?
cmp ecx, SizeOf(TTinyBlockTypes)
jae @NotTinyBlockType
{ ---------- TINY (size<=128B) block lock ---------- }
@LockTinyBlockTypeLoop:
// Round-Robin attempt to lock of SmallBlockInfo.Tiny[]
// -> fair distribution among calls to reduce thread contention
mov edx, NumTinyBlockArenas
@TinyBlockArenaLoop:
mov eax, SizeOf(TTinyBlockTypes)
lock xadd dword ptr [r8 + TSmallBlockInfo.TinyCurrentArena], eax
and eax, (NumTinyBlockArenas * Sizeof(TTinyBlockTypes)) - 1
add rax, rcx
lea rbx, [r8 + rax].TSmallBlockInfo.Tiny
mov eax, $100
cmp [rbx].TSmallBlockType.BlockTypeLocked, ah
je @NextTinyBlockArena
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
jne @NextTinyBlockArena
@GotLockOnTinyBlockType:
ret
@NextTinyBlockArena:
dec edx
jnz @TinyBlockArenaLoop
// Also try the default SmallBlockInfo.Small[]
lea rbx, [r8 + rcx]
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
je @GotLockOnTinyBlockType
// Thread Contention (occurs much less than during _Freemem)
lock inc dword ptr [rbx].TSmallBlockType.GetmemSleepCount
push r8
push rcx
call ReleaseCore
pop rcx
pop r8
jmp @LockTinyBlockTypeLoop
{ ---------- SMALL (size<2600) block lock ---------- }
@NotTinyBlockType:
lea rbx, [r8 + rcx].TSmallBlockInfo.Small
@LockBlockTypeLoopRetry:
{$ifdef FPCMM_PAUSE}
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinSmallGetmemLockTSC] // r9 = endtsc
{$endif FPCMM_PAUSE}
@LockBlockTypeLoop:
// Grab the default block type
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
jne @LockNextSmallBlockType
@GotLockOnSmallBlockType:
ret
@LockNextSmallBlockType:
// Try up to two next sizes
add rbx, SizeOf(TSmallBlockType)
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
je @GotLockOnSmallBlockType
pause
add rbx, SizeOf(TSmallBlockType)
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
je @GotLockOnSmallBlockType
sub rbx, 2 * SizeOf(TSmallBlockType)
{$ifdef FPCMM_PAUSE}
pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
jb @LockBlockTypeLoop // no timeout yet
{$endif FPCMM_PAUSE}
// Block type and two sizes larger are all locked - give up and sleep
lock inc dword ptr [rbx].TSmallBlockType.GetmemSleepCount
call ReleaseCore
jmp @LockBlockTypeLoopRetry
end;
function _GetMem(size: PtrUInt): pointer; nostackframe; assembler;
asm
{$ifndef MSWINDOWS}
mov rcx, size
{$else}
push rsi
push rdi
{$endif MSWINDOWS}
push rbx
// Since most allocations are for small blocks, determine small block type
lea rbx, [rip + SmallBlockInfo]
@VoidSizeToSomething:
lea rdx, [rcx + BlockHeaderSize - 1]
shr rdx, 4 // div SmallBlockGranularity
// Is it a tiny/small block?
cmp rcx, (MaximumSmallBlockSize - BlockHeaderSize)
ja @NotTinySmallBlock
test rcx, rcx
jz @VoidSize
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, qword ptr [rbx].TSmallBlockInfo.IsMultiThreadPtr
{$endif FPCMM_ASSUMEMULTITHREAD}
// Get the tiny/small TSmallBlockType[] offset in rcx
movzx ecx, byte ptr [rbx + rdx].TSmallBlockInfo.GetmemLookup
mov r8, rbx
shl ecx, SmallBlockTypePO2
// Acquire block type lock
{$ifdef FPCMM_ASSUMEMULTITHREAD}
call LockGetMem
{$else}
cmp byte ptr [rax], 0
jne @CheckTinySmallLock
add rbx, rcx
{$endif FPCMM_ASSUMEMULTITHREAD}
{ ---------- TINY/SMALL block registration ---------- }
@GotLockOnSmallBlockType:
// set rdx=NextPartiallyFreePool rax=FirstFreeBlock rcx=DropSmallFlagsMask
mov rdx, [rbx].TSmallBlockType.NextPartiallyFreePool
inc [rbx].TSmallBlockType.GetmemCount
mov rax, [rdx].TSmallBlockPoolHeader.FirstFreeBlock
mov rcx, DropSmallFlagsMask
// Is there a pool with free blocks?
cmp rdx, rbx
je @TrySmallSequentialFeed
inc [rdx].TSmallBlockPoolHeader.BlocksInUse
// Set the new first free block and the block header
and rcx, [rax - BlockHeaderSize]
mov [rdx].TSmallBlockPoolHeader.FirstFreeBlock, rcx
mov [rax - BlockHeaderSize], rdx
// Is the chunk now full?
jz @RemoveSmallPool
// Unlock the block type and leave
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
@Done: pop rbx
{$ifdef MSWINDOWS}
pop rdi
pop rsi
{$endif MSWINDOWS}
ret
@VoidSize:
inc ecx // "we always need to allocate something" (see RTL heap.inc)
jmp @VoidSizeToSomething
{$ifndef FPCMM_ASSUMEMULTITHREAD}
@CheckTinySmallLock:
call LockGetMem
jmp @GotLockOnSmallBlockType
{$endif FPCMM_ASSUMEMULTITHREAD}
@TrySmallSequentialFeed:
// Feed a small block sequentially
movzx ecx, [rbx].TSmallBlockType.BlockSize
mov rdx, [rbx].TSmallBlockType.CurrentSequentialFeedPool
add rcx, rax
// Can another block fit?
cmp rax, [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress
ja @AllocateSmallBlockPool
// Adjust number of used blocks and sequential feed pool
mov [rbx].TSmallBlockType.NextSequentialFeedBlockAddress, rcx
inc [rdx].TSmallBlockPoolHeader.BlocksInUse
// Unlock the block type, set the block header and leave
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
mov [rax - BlockHeaderSize], rdx
pop rbx
{$ifdef MSWINDOWS}
pop rdi
pop rsi
{$endif MSWINDOWS}
ret
@RemoveSmallPool:
// Pool is full - remove it from the partially free list
mov rcx, [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool
mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rbx
mov [rbx].TSmallBlockType.NextPartiallyFreePool, rcx
// Unlock the block type and leave
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
pop rbx
{$ifdef MSWINDOWS}
pop rdi
pop rsi
{$endif MSWINDOWS}
ret
@AllocateSmallBlockPool:
// Access shared information about Medium blocks storage
lea rcx, [rip + MediumBlockInfo]
mov r10, rcx
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [rcx + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumLocked1
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumLocked1
call LockMediumBlocks
@MediumLocked1:
// Are there any available blocks of a suitable size?
movsx esi, [rbx].TSmallBlockType.AllowedGroupsForBlockPoolBitmap
and esi, [r10 + TMediumBlockInfo.BinGroupBitmap]
jz @NoSuitableMediumBlocks
// Compute rax = bin group number with free blocks, rcx = bin number
bsf eax, esi
lea r9, [rax * 4]
mov ecx, [r10 + TMediumBlockInfo.BinBitmaps + r9]
bsf ecx, ecx
lea rcx, [rcx + r9 * 8]
// Set rdi = @bin, rsi = free block
lea rsi, [rcx * 8] // SizeOf(TMediumBlockBin) = 16
lea rdi, [r10 + TMediumBlockInfo.Bins + rsi * 2]
mov rsi, TMediumFreeBlock[rdi].NextFreeBlock
// Remove the first block from the linked list (LIFO)
mov rdx, TMediumFreeBlock[rsi].NextFreeBlock
mov TMediumFreeBlock[rdi].NextFreeBlock, rdx
mov TMediumFreeBlock[rdx].PreviousFreeBlock, rdi
// Is this bin now empty?
cmp rdi, rdx
jne @MediumBinNotEmpty
// rbx = block type, rax = bin group number,
// r9 = bin group number * 4, rcx = bin number, rdi = @bin, rsi = free block
// Flag this bin (and the group if needed) as empty
mov edx, - 2
mov r11d, [r10 + TMediumBlockInfo.BinGroupBitmap]
rol edx, cl
btr r11d, eax // btr reg,reg is faster than btr [mem],reg
and [r10 + TMediumBlockInfo.BinBitmaps + r9], edx
jnz @MediumBinNotEmpty
mov [r10 + TMediumBlockInfo.BinGroupBitmap], r11d
@MediumBinNotEmpty:
// rsi = free block, rbx = block type
// Get the size of the available medium block in edi
mov rdi, DropMediumAndLargeFlagsMask
and rdi, [rsi - BlockHeaderSize]
cmp edi, MaximumSmallBlockPoolSize
jb @UseWholeBlock
// Split the block: new block size is the optimal size
mov edx, edi
movzx edi, [rbx].TSmallBlockType.OptimalBlockPoolSize
sub edx, edi
lea rcx, [rsi + rdi]
lea rax, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [rcx - BlockHeaderSize], rax
// Store the size of the second split as the second last pointer
mov [rcx + rdx - 16], rdx
// Put the remainder in a bin (it will be big enough)
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
jmp @GotMediumBlock
@NoSuitableMediumBlocks:
// Check the sequential feed medium block pool for space
movzx ecx, [rbx].TSmallBlockType.MinimumBlockPoolSize
mov edi, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
cmp edi, ecx
jb @AllocateNewSequentialFeed
// Get the address of the last block that was fed
mov rsi, [r10 + TMediumBlockInfo.LastSequentiallyFed]
// Enough sequential feed space: Will the remainder be usable?
movzx ecx, [rbx].TSmallBlockType.OptimalBlockPoolSize
lea rdx, [rcx + MinimumMediumBlockSize]
cmp edi, edx
cmovae edi, ecx
sub rsi, rdi
// Update the sequential feed parameters
sub [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], edi
mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rsi
jmp @GotMediumBlock
@AllocateNewSequentialFeed:
// Use the optimal size for allocating this small block pool
movzx size, word ptr [rbx].TSmallBlockType.OptimalBlockPoolSize
push size // use "size" variable = first argument in current ABI call
call AllocNewSequentialFeedMediumPool
pop rdi // restore edi=blocksize and r10=MediumBlockInfo
lea r10, [rip + MediumBlockInfo]
mov rsi, rax
test rax, rax
jnz @GotMediumBlock // rsi=freeblock rbx=blocktype edi=blocksize
mov [r10 + TMediumBlockInfo.Locked], al
mov [rbx].TSmallBlockType.BlockTypeLocked, al
{$ifdef MSWINDOWS}
jmp @Done
{$else}
pop rbx
ret
{$endif MSWINDOWS}
@UseWholeBlock:
// rsi = free block, rbx = block type, edi = block size
// Mark this block as used in the block following it
and byte ptr [rsi + rdi - BlockHeaderSize], NOT PreviousMediumBlockIsFreeFlag
@GotMediumBlock:
// rsi = free block, rbx = block type, edi = block size
// Set the size and flags for this block
lea rcx, [rdi + IsMediumBlockFlag + IsSmallBlockPoolInUseFlag]
mov [rsi - BlockHeaderSize], rcx
// Unlock medium blocks and setup the block pool
xor eax, eax
mov [r10 + TMediumBlockInfo.Locked], al
mov TSmallBlockPoolHeader[rsi].BlockType, rbx
mov TSmallBlockPoolHeader[rsi].FirstFreeBlock, rax
mov TSmallBlockPoolHeader[rsi].BlocksInUse, 1
mov [rbx].TSmallBlockType.CurrentSequentialFeedPool, rsi
// Return the pointer to the first block, compute next/last block addresses
lea rax, [rsi + SmallBlockPoolHeaderSize]
movzx ecx, [rbx].TSmallBlockType.BlockSize
lea rdx, [rax + rcx]
mov [rbx].TSmallBlockType.NextSequentialFeedBlockAddress, rdx
add rdi, rsi
sub rdi, rcx
mov [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress, rdi
// Unlock the small block type, set header and leave
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
mov [rax - BlockHeaderSize], rsi
pop rbx
{$ifdef MSWINDOWS}
pop rdi
pop rsi
{$endif MSWINDOWS}
ret
{ ---------- MEDIUM block allocation ---------- }
@NotTinySmallBlock:
// Do we need a Large block?
lea r10, [rip + MediumBlockInfo]
cmp rcx, MaximumMediumBlockSize - BlockHeaderSize
ja @IsALargeBlockRequest
// Get the bin size for this block size (rounded up to the next bin size)
lea rbx, [rcx + MediumBlockGranularity - 1 + BlockHeaderSize - MediumBlockSizeOffset]
mov rcx, r10
and ebx, - MediumBlockGranularity
add ebx, MediumBlockSizeOffset
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumLocked2
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumLocked2
call LockMediumBlocks
@MediumLocked2:
// Compute ecx = bin number in ecx and edx = group number
lea rdx, [rbx - MinimumMediumBlockSize]
mov ecx, edx
shr edx, 8 + 5
shr ecx, 8
mov eax, - 1
shl eax, cl
and eax, [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4]
jz @GroupIsEmpty
and ecx, - 32
bsf eax, eax
or ecx, eax
jmp @GotBinAndGroup
@GroupIsEmpty:
// Try all groups greater than this group
mov eax, - 2
mov ecx, edx
shl eax, cl
and eax, [r10 + TMediumBlockInfo.BinGroupBitmap]
jz @TrySequentialFeedMedium
// There is a suitable group with enough space
bsf edx, eax
mov eax, [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4]
bsf ecx, eax
mov eax, edx
shl eax, 5
or ecx, eax
jmp @GotBinAndGroup
@TrySequentialFeedMedium:
mov ecx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
// Can block be fed sequentially?
sub ecx, ebx
jc @AllocateNewSequentialFeedForMedium
// Get the block address, store remaining bytes, set the flags and unlock
mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed]
sub rax, rbx
mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rax
mov [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], ecx
or rbx, IsMediumBlockFlag
mov [rax - BlockHeaderSize], rbx
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
{$ifdef MSWINDOWS}
jmp @Done
{$else}
pop rbx
ret
{$endif MSWINDOWS}
@AllocateNewSequentialFeedForMedium:
mov size, rbx // 'size' variable is the first argument register in ABI call
call AllocNewSequentialFeedMediumPool
mov byte ptr [rip + MediumBlockInfo.Locked], false // r10 has been overwritten
{$ifdef MSWINDOWS}
jmp @Done
{$else}
pop rbx
ret
{$endif MSWINDOWS}
@GotBinAndGroup:
// ebx = block size, ecx = bin number, edx = group number
// Compute rdi = @bin, rsi = free block
lea rax, [rcx + rcx]
lea rdi, [r10 + TMediumBlockInfo.Bins + rax * 8]
mov rsi, TMediumFreeBlock[rdi].NextFreeBlock
// Remove the first block from the linked list (LIFO)
mov rax, TMediumFreeBlock[rsi].NextFreeBlock
mov TMediumFreeBlock[rdi].NextFreeBlock, rax
mov TMediumFreeBlock[rax].PreviousFreeBlock, rdi
// Is this bin now empty?
cmp rdi, rax
jne @MediumBinNotEmptyForMedium
// edx = bin group number, ecx = bin number, rdi = @bin, rsi = free block, ebx = block size
// Flag this bin and group as empty
mov eax, - 2
mov r11d, [r10 + TMediumBlockInfo.BinGroupBitmap]
rol eax, cl
btr r11d, edx // btr reg,reg is faster than btr [mem],reg
and [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax
jnz @MediumBinNotEmptyForMedium
mov [r10 + TMediumBlockInfo.BinGroupBitmap], r11d
@MediumBinNotEmptyForMedium:
// rsi = free block, ebx = block size
// Get rdi = size of the available medium block, rdx = second split size
mov rdi, DropMediumAndLargeFlagsMask
and rdi, [rsi - BlockHeaderSize]
mov edx, edi
sub edx, ebx
jz @UseWholeBlockForMedium
// Split the block in two
lea rcx, [rsi + rbx]
lea rax, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [rcx - BlockHeaderSize], rax
// Store the size of the second split as the second last pointer
mov [rcx + rdx - 16], rdx
// Put the remainder in a bin
cmp edx, MinimumMediumBlockSize
jb @GotMediumBlockForMedium
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
jmp @GotMediumBlockForMedium
@UseWholeBlockForMedium:
// Mark this block as used in the block following it
and byte ptr [rsi + rdi - BlockHeaderSize], NOT PreviousMediumBlockIsFreeFlag
@GotMediumBlockForMedium:
// Set the size and flags for this block
lea rcx, [rbx + IsMediumBlockFlag]
mov [rsi - BlockHeaderSize], rcx
// Unlock medium blocks and leave
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov rax, rsi
{$ifdef MSWINDOWS}
jmp @Done
{$else}
pop rbx
ret
{$endif MSWINDOWS}
{ ---------- LARGE block allocation ---------- }
@IsALargeBlockRequest:
xor rax, rax
test rcx, rcx
js @DoneLarge
// Note: size is still in the rcx/rdi first param register
call AllocateLargeBlock
@DoneLarge:
{$ifdef MSWINDOWS}
jmp @Done
{$else}
pop rbx
{$endif MSWINDOWS}
end;
function FreeMediumBlock(arg1: pointer): PtrUInt; nostackframe; assembler;
// rcx=P rdx=[P-BlockHeaderSize]
asm
// Drop the flags, set r10=MediumBlockInfo r11=P rbx=blocksize
lea r10, [rip + MediumBlockInfo]
and rdx, DropMediumAndLargeFlagsMask
push rbx
push rdx // save blocksize
mov rbx, rdx
mov r11, rcx
// Lock the Medium blocks
mov rcx, r10
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumBlocksLocked
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumBlocksLocked
call LockMediumBlocks
@MediumBlocksLocked:
// Get rcx = next block size and flags
mov rcx, [r11 + rbx - BlockHeaderSize]
// Can we combine this block with the next free block?
test qword ptr [r11 + rbx - BlockHeaderSize], IsFreeBlockFlag
jnz @NextBlockIsFree
// Set the "PreviousIsFree" flag in the next block
or rcx, PreviousMediumBlockIsFreeFlag
mov [r11 + rbx - BlockHeaderSize], rcx
@NextBlockChecked:
// Re-read the flags and try to combine with previous free block
test byte ptr [r11 - BlockHeaderSize], PreviousMediumBlockIsFreeFlag
jnz @PreviousBlockIsFree
@PreviousBlockChecked:
// Check if entire medium block pool is free
cmp ebx, (MediumBlockPoolSize - MediumBlockPoolHeaderSize)
je @EntireMediumPoolFree
@BinFreeMediumBlock:
// Store size of the block, flags and trailing size marker and insert into bin
lea rax, [rbx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [r11 - BlockHeaderSize], rax
mov [r11 + rbx - 16], rbx
mov rcx, r11
mov rdx, rbx
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
// Unlock medium blocks and leave
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
pop rax // medium block size
pop rbx
ret
@NextBlockIsFree:
// Get rax = next block address, rbx = end of the block
lea rax, [r11 + rbx]
and rcx, DropMediumAndLargeFlagsMask
add rbx, rcx
// Was the block binned?
cmp rcx, MinimumMediumBlockSize
jb @NextBlockChecked
mov rcx, rax
call RemoveMediumFreeBlock // rcx = APMediumFreeBlock
jmp @NextBlockChecked
@PreviousBlockIsFree:
// Get rcx = size/point of the previous free block, rbx = new block end
mov rcx, [r11 - 16]
sub r11, rcx
add rbx, rcx
// Remove the previous block from the linked list
cmp ecx, MinimumMediumBlockSize
jb @PreviousBlockChecked
mov rcx, r11
call RemoveMediumFreeBlock // rcx = APMediumFreeBlock
jmp @PreviousBlockChecked
@EntireMediumPoolFree:
// Ensure current sequential feed pool is free
cmp dword ptr [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], MediumBlockPoolSize - MediumBlockPoolHeaderSize
jne @MakeEmptyMediumPoolSequentialFeed
// Remove this medium block pool from the linked list stored in its header
sub r11, MediumBlockPoolHeaderSize
mov rax, TMediumBlockPoolHeader[r11].PreviousMediumBlockPoolHeader
mov rdx, TMediumBlockPoolHeader[r11].NextMediumBlockPoolHeader
mov TMediumBlockPoolHeader[rax].NextMediumBlockPoolHeader, rdx
mov TMediumBlockPoolHeader[rdx].PreviousMediumBlockPoolHeader, rax
// Unlock medium blocks and free the block pool
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov arg1, r11
call FreeMedium
pop rax // medium block size
pop rbx
ret
@MakeEmptyMediumPoolSequentialFeed:
// Get rbx = end-marker block, and recycle the current sequential feed pool
lea rbx, [r11 + MediumBlockPoolSize - MediumBlockPoolHeaderSize]
call BinMediumSequentialFeedRemainder
// Set this medium pool up as the new sequential feed pool, unlock and leave
mov qword ptr [rbx - BlockHeaderSize], IsMediumBlockFlag
mov dword ptr [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], MediumBlockPoolSize - MediumBlockPoolHeaderSize
mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rbx
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
pop rax
pop rbx
end;
procedure FreeSmallLocked; nostackframe; assembler;
// rbx=TSmallBlockType rcx=P rdx=TSmallBlockPoolHeader
asm
// Adjust number of blocks in use, set rax = old first free block
inc [rbx].TSmallBlockType.FreememCount
mov rax, [rdx].TSmallBlockPoolHeader.FirstFreeBlock
sub [rdx].TSmallBlockPoolHeader.BlocksInUse, 1
jz @PoolIsNowEmpty
// Store this as the new first free block
mov [rdx].TSmallBlockPoolHeader.FirstFreeBlock, rcx
// Store the previous first free block as the block header
lea r9, [rax + IsFreeBlockFlag]
mov [rcx - BlockHeaderSize], r9
// Was the pool full?
test rax, rax
jnz @SmallPoolWasNotFull
// Insert the pool back into the linked list if it was full
mov rcx, [rbx].TSmallBlockType.NextPartiallyFreePool
mov [rdx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rbx
mov [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool, rcx
mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rdx
mov [rbx].TSmallBlockType.NextPartiallyFreePool, rdx
@SmallPoolWasNotFull:
{$ifdef FPCMM_LOCKLESSFREE}
// Try to release all pending bin from this block while we have the lock
cmp byte ptr [rbx].TSmallBlockType.BinCount, 0
jne @ProcessPendingBin
{$endif FPCMM_LOCKLESSFREE}
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
ret
@PoolIsNowEmpty:
// FirstFreeBlock=nil means it is the sequential feed pool with a single block
test rax, rax
jz @IsSequentialFeedPool
// Pool is now empty: Remove it from the linked list and free it
mov rax, [rdx].TSmallBlockPoolHeader.PreviousPartiallyFreePool
mov rcx, [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool
mov TSmallBlockPoolHeader[rax].NextPartiallyFreePool, rcx
mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rax
// Is this the sequential feed pool? If so, stop sequential feeding
xor eax, eax
cmp [rbx].TSmallBlockType.CurrentSequentialFeedPool, rdx
jne @NotSequentialFeedPool
@IsSequentialFeedPool:
mov [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress, rax
@NotSequentialFeedPool:
// Unlock blocktype and release this pool
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
mov rcx, rdx
mov rdx, qword ptr [rdx - BlockHeaderSize]
jmp FreeMediumBlock // no call nor BinLocked to avoid race condition
{$ifdef FPCMM_LOCKLESSFREE}
@ProcessPendingBin:
// Try twice to acquire BinLocked (spinning now may induce race condition)
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
je @BinLocked
pause
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
jne @BinAlreadyLocked
@BinLocked:
movzx eax, byte ptr [rbx].TSmallBlockType.BinCount
test al, al
jz @NoBin
// free last pointer in TSmallBlockType.BinInstance[]
mov rcx, qword ptr [rbx + TSmallBlockType.BinInstance - 8 + rax * 8]
dec byte ptr [rbx].TSmallBlockType.BinCount
mov byte ptr [rbx].TSmallBlockType.BinLocked, false
mov rdx, [rcx - BlockHeaderSize]
jmp FreeSmallLocked
@NoBin: mov byte ptr [rbx].TSmallBlockType.BinLocked, false
@BinAlreadyLocked:
mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false
{$endif FPCMM_LOCKLESSFREE}
end;
function _FreeMem(P: pointer): PtrUInt; nostackframe; assembler;
asm
xor eax, eax
{$ifndef MSWINDOWS}
mov rcx, P
{$endif MSWINDOWS}
test P, P
jz @VoidPointer
{$ifdef FPCMM_REPORTMEMORYLEAKS}
mov qword ptr [P], rax // reset TObject VMT or string/dynarray header
{$endif FPCMM_REPORTMEMORYLEAKS}
mov rdx, qword ptr [P - BlockHeaderSize]
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, qword ptr [rip + SmallBlockInfo].TSmallBlockInfo.IsMultiThreadPtr
{$endif FPCMM_ASSUMEMULTITHREAD}
// Is it a small block in use?
test dl, IsFreeBlockFlag + IsMediumBlockFlag + IsLargeBlockFlag
jnz @NotSmallBlockInUse
// Get the small block type in rbx and try to grab it
push rbx
mov rbx, [rdx].TSmallBlockPoolHeader.BlockType
{$ifdef FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
jne @CheckTinySmallLock
{$else}
cmp byte ptr [rax], 0
jne @TinySmallLockLoop
{$endif FPCMM_ASSUMEMULTITHREAD}
@FreeAndUnlock:
call FreeSmallLocked
movzx eax, word ptr [rbx].TSmallBlockType.BlockSize
pop rbx
@VoidPointer:
ret
@NotSmallBlockInUse:
test dl, IsFreeBlockFlag + IsLargeBlockFlag
jz FreeMediumBlock
jmp FreeLargeBlock // P is still in rcx/rdi first param register
@TinySmallLockLoop:
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
je @FreeAndUnlock
@CheckTinySmallLock:
{$ifdef FPCMM_LOCKLESSFREE}
// Try to put rcx=P in TSmallBlockType.BinInstance[]
cmp byte ptr [rbx].TSmallBlockType.BinCount, SmallBlockBinCount
je @LockBlockTypeSleep
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
je @BinLocked
{$ifdef FPCMM_PAUSE}
push rdx
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinSmallFreememBinTSC] // r9 = endtsc
@SpinBinLock:
pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @SpinTimeout
cmp byte ptr [rbx].TSmallBlockType.BinLocked, true
je @SpinBinLock
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
jne @SpinBinLock
pop rdx
jmp @BinLocked
@SpinTimeout:
pop rdx
{$endif FPCMM_PAUSE}
{$ifdef FPCMM_DEBUG}
inc dword ptr [rbx].TSmallBlockType.BinSpinCount // no lock (informative only)
{$endif FPCMM_DEBUG}
jmp @LockBlockTypeSleep
@BinLocked:
movzx eax, byte ptr [rbx].TSmallBlockType.BinCount
cmp al, SmallBlockBinCount
je @LockBlockType
inc byte ptr [rbx].TSmallBlockType.BinCount
mov [rbx + TSmallBlockType.BinInstance + rax * 8], rcx
mov byte ptr [rbx].TSmallBlockType.BinLocked, false
movzx eax, word ptr [rbx].TSmallBlockType.BlockSize
pop rbx
ret
@LockBlockType:
// Fallback to main block lock if TSmallBlockType.BinInstance[] is full
mov byte ptr [rbx].TSmallBlockType.BinLocked, false
{$endif FPCMM_LOCKLESSFREE}
@LockBlockTypeSleep:
{$ifdef FPCMM_PAUSE}
// Spin to grab the block type (don't try too long due to contention)
push rdx
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinSmallFreememLockTSC] // r9 = endtsc
@SpinLockBlockType:
pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @LockBlockTypeReleaseCore
cmp byte ptr [rbx].TSmallBlockType.BlockTypeLocked, 1
je @SpinLockBlockType
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah
jne @SpinLockBlockType
pop rdx
jmp @FreeAndUnlock
@LockBlockTypeReleaseCore:
pop rdx
{$endif FPCMM_PAUSE}
// Couldn't grab the block type - sleep and try again
lock inc dword ptr [rbx].TSmallBlockType.FreeMemSleepCount
push rdx
push rcx
call ReleaseCore
pop rcx
pop rdx
jmp @TinySmallLockLoop
end;
// warning: FPC signature is not the same than Delphi: requires "var P"
function _ReallocMem(var P: pointer; Size: PtrUInt): pointer; nostackframe; assembler;
asm
{$ifdef MSWINDOWS}
push rdi
push rsi
{$else}
mov rdx, Size
{$endif MSWINDOWS}
push rbx
push r14
push P // for assignement in @Done
mov r14, qword ptr [P]
test rdx, rdx
jz @VoidSize // ReallocMem(P,0)=FreeMem(P)
test r14, r14
jz @GetMemMoveFreeMem // ReallocMem(nil,Size)=GetMem(Size)
mov rcx, [r14 - BlockHeaderSize]
test cl, IsFreeBlockFlag + IsMediumBlockFlag + IsLargeBlockFlag
jnz @NotASmallBlock
{ -------------- TINY/SMALL block ------------- }
// Get rbx=blocktype, rcx=available size, rax=inplaceresize
mov rbx, [rcx].TSmallBlockPoolHeader.BlockType
lea rax, [rdx * 4 + SmallBlockDownsizeCheckAdder]
movzx ecx, [rbx].TSmallBlockType.BlockSize
sub ecx, BlockHeaderSize
cmp rcx, rdx
jb @SmallUpsize
// Downsize or small growup with enough space: reallocate only if need
cmp eax, ecx
jb @GetMemMoveFreeMem // r14=P rdx=size
@NoResize:
mov rax, r14 // keep original pointer
pop rcx
pop r14
pop rbx
{$ifdef MSWINDOWS}
pop rsi
pop rdi
{$endif MSWINDOWS}
ret
@VoidSize:
push rdx // to set P=nil
jmp @DoFree // ReallocMem(P,0)=FreeMem(P)
@SmallUpsize:
// State: r14=pointer, rdx=NewSize, rcx=CurrentBlockSize, rbx=CurrentBlockType
// Small blocks always grow with at least 100% + SmallBlockUpsizeAdder bytes
lea P, qword ptr [rcx * 2 + SmallBlockUpsizeAdder]
movzx ebx, [rbx].TSmallBlockType.BlockSize
sub ebx, BlockHeaderSize + 8
// r14=pointer, P=NextUpBlockSize, rdx=NewSize, rbx=OldSize-8
@AdjustGetMemMoveFreeMem:
// New allocated size is max(requestedsize, minimumupsize)
cmp rdx, P
cmova P, rdx
push rdx
call _GetMem
pop rdx
test rax, rax
jz @Done
jmp @MoveFreeMem // rax=New r14=P rbx=size-8
@GetMemMoveFreeMem:
// reallocate copy and free: r14=P rdx=size
mov rbx, rdx
mov P, rdx // P is the proper first argument register
call _GetMem
test rax, rax
jz @Done
test r14, r14 // ReallocMem(nil,Size)=GetMem(Size)
jz @Done
sub rbx, 8
@MoveFreeMem:
// copy and free: rax=New r14=P rbx=size-8
push rax
lea rcx, [r14 + rbx]
lea rdx, [rax + rbx]
neg rbx
jns @Last8
align 16
@MoveBy16:
movaps xmm0, oword ptr [rcx + rbx]
movaps oword ptr [rdx + rbx], xmm0
add rbx, 16
js @MoveBy16
@Last8: mov rax, qword ptr [rcx + rbx]
mov qword ptr [rdx + rbx], rax
@DoFree:
mov P, r14
call _FreeMem
pop rax
@Done: pop rcx
pop r14
pop rbx
{$ifdef MSWINDOWS}
pop rsi
pop rdi
{$endif MSWINDOWS}
mov qword ptr [rcx], rax // store new pointer in var P
ret
@NotASmallBlock:
// Is this a medium block or a large block?
test cl, IsFreeBlockFlag + IsLargeBlockFlag
jnz @PossibleLargeBlock
{ -------------- MEDIUM block ------------- }
// rcx = Current Size + Flags, r14 = P, rdx = Requested Size, r10 = MediumBlockInfo
lea rsi, [rdx + rdx]
lea r10, [rip + MediumBlockInfo]
mov rbx, rcx
and ecx, DropMediumAndLargeFlagsMask
lea rdi, [r14 + rcx]
sub ecx, BlockHeaderSize
and ebx, ExtractMediumAndLargeFlagsMask
// Is it an upsize or a downsize?
cmp rdx, rcx
ja @MediumBlockUpsize
// rcx = Current Block Size - BlockHeaderSize, rbx = Current Block Flags,
// rdi = @Next Block, r14 = P, rdx = Requested Size
// Downsize reallocate and move data only if less than half the current size
cmp rsi, rcx
jae @NoResize
// In-place downsize? Ensure not smaller than MinimumMediumBlockSize
cmp edx, MinimumMediumBlockSize - BlockHeaderSize
jae @MediumBlockInPlaceDownsize
// Need to move to another Medium block pool, or into a Small block?
cmp edx, MediumInPlaceDownsizeLimit
jb @GetMemMoveFreeMem
// No need to realloc: resize in-place (if not already at the minimum size)
mov edx, MinimumMediumBlockSize - BlockHeaderSize
cmp ecx, MinimumMediumBlockSize - BlockHeaderSize
jna @NoResize
@MediumBlockInPlaceDownsize:
// Round up to the next medium block size
lea rsi, [rdx + BlockHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset]
and rsi, - MediumBlockGranularity
add rsi, MediumBlockSizeOffset
// Get the size of the second split
add ecx, BlockHeaderSize
sub ecx, esi
mov ebx, ecx
// Lock the medium blocks
mov rcx, r10
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumBlocksLocked1
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumBlocksLocked1
call LockMediumBlocks
@MediumBlocksLocked1:
mov ecx, ebx
// Reread the flags - may have changed before medium blocks could be locked
mov rbx, ExtractMediumAndLargeFlagsMask
and rbx, [r14 - BlockHeaderSize]
@DoMediumInPlaceDownsize:
// Set the new size in header, and get rbx = second split size
or rbx, rsi
mov [r14 - BlockHeaderSize], rbx
mov ebx, ecx
// If the next block is used, flag its previous block as free
mov rdx, [rdi - BlockHeaderSize]
test dl, IsFreeBlockFlag
jnz @MediumDownsizeNextBlockFree
or rdx, PreviousMediumBlockIsFreeFlag
mov [rdi - BlockHeaderSize], rdx
jmp @MediumDownsizeDoSplit
@MediumDownsizeNextBlockFree:
// If the next block is free, combine both
mov rcx, rdi
and rdx, DropMediumAndLargeFlagsMask
add rbx, rdx
add rdi, rdx
cmp edx, MinimumMediumBlockSize
jb @MediumDownsizeDoSplit
call RemoveMediumFreeBlock // rcx=APMediumFreeBlock
@MediumDownsizeDoSplit:
// Store the trailing size field and free part header
mov [rdi - 16], rbx
lea rcx, [rbx + IsMediumBlockFlag + IsFreeBlockFlag];
mov [r14 + rsi - BlockHeaderSize], rcx
// Bin this free block (if worth it)
cmp rbx, MinimumMediumBlockSize
jb @MediumBlockDownsizeDone
lea rcx, [r14 + rsi]
mov rdx, rbx
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
@MediumBlockDownsizeDone:
// Unlock the medium blocks, and leave with the new pointer
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov rax, r14
jmp @Done
@MediumBlockUpsize:
// ecx = Current Block Size - BlockHeaderSize, bl = Current Block Flags,
// rdi = @Next Block, r14 = P, rdx = Requested Size
// Try to make in-place upsize
mov rax, [rdi - BlockHeaderSize]
test al, IsFreeBlockFlag
jz @CannotUpsizeMediumBlockInPlace
// Get rax = available size, rsi = available size with the next block
and rax, DropMediumAndLargeFlagsMask
lea rsi, [rax + rcx]
cmp rdx, rsi
ja @CannotUpsizeMediumBlockInPlace
// Grow into the next block
mov rbx, rcx
mov rcx, r10
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumBlocksLocked2
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumBlocksLocked2
mov rsi, rdx
call LockMediumBlocks
mov rdx, rsi
@MediumBlocksLocked2:
// Re-read info once locked, and ensure next block is still free
mov rcx, rbx
mov rbx, ExtractMediumAndLargeFlagsMask
and rbx, [r14 - BlockHeaderSize]
mov rax, [rdi - BlockHeaderSize]
test al, IsFreeBlockFlag
jz @NextMediumBlockChanged
and eax, DropMediumAndLargeFlagsMask
lea rsi, [rax + rcx]
cmp rdx, rsi
ja @NextMediumBlockChanged
@DoMediumInPlaceUpsize:
// Bin next free block (if worth it)
cmp eax, MinimumMediumBlockSize
jb @MediumInPlaceNoNextRemove
push rcx
push rdx
mov rcx, rdi
call RemoveMediumFreeBlock // rcx=APMediumFreeBlock
pop rdx
pop rcx
@MediumInPlaceNoNextRemove:
// Medium blocks grow a minimum of 25% in in-place upsizes
mov eax, ecx
shr eax, 2
add eax, ecx
// Get the maximum of the requested size and the minimum growth size
xor edi, edi
sub eax, edx
adc edi, - 1
and eax, edi
// Round up to the nearest block size granularity
lea rax, [rax + rdx + BlockHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset]
and eax, - MediumBlockGranularity
add eax, MediumBlockSizeOffset
// Calculate the size of the second split and check if it fits
lea rdx, [rsi + BlockHeaderSize]
sub edx, eax
ja @MediumInPlaceUpsizeSplit
// Grab the whole block: Mark it as used in the next block, and adjust size
and qword ptr [r14 + rsi], NOT PreviousMediumBlockIsFreeFlag
add rsi, BlockHeaderSize
jmp @MediumUpsizeInPlaceDone
@MediumInPlaceUpsizeSplit:
// Store the size of the second split as the second last pointer
mov [r14 + rsi - BlockHeaderSize], rdx
// Set the second split header
lea rdi, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [r14 + rax - BlockHeaderSize], rdi
mov rsi, rax
cmp edx, MinimumMediumBlockSize
jb @MediumUpsizeInPlaceDone
lea rcx, [r14 + rax]
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
@MediumUpsizeInPlaceDone:
// No need to move data at upsize: set the size and flags for this block
or rsi, rbx
mov [r14 - BlockHeaderSize], rsi
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov rax, r14
jmp @Done
@NextMediumBlockChanged:
// The next block changed during lock: reallocate and move data
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
@CannotUpsizeMediumBlockInPlace:
// rcx=OldSize-8, rdx=NewSize
mov rbx, rcx
mov eax, ecx
shr eax, 2
lea P, qword ptr [rcx + rax] // NextUpBlockSize = OldSize+25%
jmp @AdjustGetMemMoveFreeMem // P=BlockSize, rdx=NewSize, rbx=OldSize-8
@PossibleLargeBlock:
{ -------------- LARGE block ------------- }
test cl, IsFreeBlockFlag + IsMediumBlockFlag
jnz @Error
{$ifdef MSWINDOWS}
mov rcx, r14
{$else}
mov rdi, r14
mov rsi, rdx
{$endif MSWINDOWS}
call ReallocateLargeBlock // with restored proper registers
jmp @Done
@Error: xor eax, eax
jmp @Done
end;
function _AllocMem(Size: PtrUInt): pointer; nostackframe; assembler;
asm
push rbx
// Get rbx = size rounded down to the previous multiple of SizeOf(pointer)
lea rbx, [Size - 1]
and rbx, - 8
call _GetMem
// Could a block be allocated? rcx = 0 if yes, -1 if no
cmp rax, 1
sbb rcx, rcx
// Point rdx to the last pointer
lea rdx, [rax + rbx]
// Compute Size (1..8 doesn't need to enter the SSE2 loop)
or rbx, rcx
jz @LastQ
// Large blocks from mmap/VirtualAlloc are already zero filled
cmp rbx, MaximumMediumBlockSize - BlockHeaderSize
jae @Done
neg rbx
pxor xmm0, xmm0
align 16
@FillLoop: // non-temporal movntdq not needed when size <256KB (small/medium)
movaps oword ptr [rdx + rbx], xmm0
add rbx, 16
js @FillLoop
@LastQ: xor rcx, rcx
mov qword ptr [rdx], rcx
@Done: pop rbx
end;
function _MemSize(P: pointer): PtrUInt;
begin
// AFAIK used only by fpc_AnsiStr_SetLength() in RTL
P := PPointer(PByte(P) - BlockHeaderSize)^;
if (PtrUInt(P) and (IsMediumBlockFlag or IsLargeBlockFlag)) = 0 then
result := PSmallBlockPoolHeader(PtrUInt(P) and DropSmallFlagsMask).
BlockType.BlockSize - BlockHeaderSize
else
begin
result := (PtrUInt(P) and DropMediumAndLargeFlagsMask) - BlockHeaderSize;
if (PtrUInt(P) and IsMediumBlockFlag) = 0 then
dec(result, LargeBlockHeaderSize);
end;
end;
function _FreeMemSize(P: pointer; size: PtrUInt): PtrInt;
begin
// should return the chunk size - only used by heaptrc AFAIK
if (P <> nil) and (size <> 0) then
result := _FreeMem(P)
else
result := 0;
end;
{ ********* Information Gathering }
{$ifdef FPCMM_STANDALONE}
procedure Assert(flag: boolean);
begin
end;
{$else}
function _GetHeapStatus: THeapStatus;
begin
FillChar(result, sizeof(result), 0);
end;
function _GetFPCHeapStatus: TFPCHeapStatus;
begin
FillChar(result, sizeof(result), 0);
end;
type
// match both TSmallBlockStatus and TSmallBlockContention
TRes = array[0..2] of cardinal;
// details are allocated on the stack, not the heap
TResArray = array[0..(NumSmallInfoBlock * 2) - 1] of TRes;
procedure QuickSortRes(var Res: TResArray; L, R, Level: PtrInt);
var
I, J, P: PtrInt;
pivot: cardinal;
tmp: TRes;
begin
if L < R then
repeat
I := L;
J := R;
P := (L + R) shr 1;
repeat
pivot := Res[P, Level];
while Res[I, Level] > pivot do
inc(I);
while Res[J, Level] < pivot do
dec(J);
if I <= J then
begin
tmp := Res[J];
Res[J] := Res[I];
Res[I] := tmp;
if P = I then
P := J
else if P = J then
P := I;
inc(I);
dec(J);
end;
until I > J;
if J - L < R - I then
begin // use recursion only for smaller range
if L < J then
QuickSortRes(Res, L, J, Level);
L := I;
end
else
begin
if I < R then
QuickSortRes(Res, I, R, Level);
R := J;
end;
until L >= R;
end;
procedure SetSmallBlockStatus(var res: TResArray);
var
i, a: integer;
p: PSmallBlockType;
d: ^TSmallBlockStatus;
begin
d := @res;
p := @SmallBlockInfo;
for i := 1 to NumSmallBlockTypes do
begin
d^.Total := p^.GetmemCount;
d^.Current := p^.GetmemCount - p^.FreememCount;
d^.BlockSize := p^.BlockSize;
inc(d);
inc(p);
end;
for a := 1 to NumTinyBlockArenas do
begin
d := @res; // aggregate counters
for i := 1 to NumTinyBlockTypes do
begin
inc(d^.Total, p^.GetmemCount);
inc(d^.Current, p^.GetmemCount - p^.FreememCount);
inc(d);
inc(p);
end;
end;
assert(p = @SmallBlockInfo.GetmemLookup);
end;
function SortSmallBlockStatus(var res: TResArray; maxcount, orderby: PtrInt;
count, bytes: PPtrUInt): PtrInt;
var
i: PtrInt;
begin
QuickSortRes(res, 0, NumSmallBlockTypes - 1, orderby);
if count <> nil then
begin
count^ := 0;
for i := 0 to NumSmallBlockTypes - 1 do
inc(count^, res[i, orderby]);
end;
if bytes <> nil then
begin
bytes^ := 0;
for i := 0 to NumSmallBlockTypes - 1 do
inc(bytes^, res[i, orderby] * res[i, ord(obBlockSize)]);
end;
result := maxcount;
if result > NumSmallBlockTypes then
result := NumSmallBlockTypes;
while (result > 0) and (res[result - 1, orderby] = 0) do
dec(result);
end;
function SetSmallBlockContention(var res: TResArray; maxcount: integer): integer;
var
i: integer;
p: PSmallBlockType;
d: ^TSmallBlockContention;
begin
result := 0;
d := @res;
p := @SmallBlockInfo;
for i := 1 to NumSmallInfoBlock do
begin
if p^.GetmemSleepCount <> 0 then
begin
d^.SleepCount := p^.GetmemSleepCount;
d^.GetmemBlockSize := p^.BlockSize;
d^.FreememBlockSize := 0;
inc(d);
inc(result);
end;
if p^.FreememSleepCount <> 0 then
begin
d^.SleepCount := p^.FreememSleepCount;
d^.GetmemBlockSize := 0;
d^.FreememBlockSize := p^.BlockSize;
inc(d);
inc(result);
end;
inc(p);
end;
if result = 0 then
exit;
QuickSortRes(res, 0, result - 1, 0);
if result > maxcount then
result := maxcount;
end;
function K(i: PtrUInt): shortstring;
var
tmp: string[1];
begin
if i >= 1 shl 50 then
begin
i := i shr 50;
tmp := 'Z';
end
else
if i >= 1 shl 40 then
begin
i := i shr 40;
tmp := 'T';
end
else
if i >= 1 shl 30 then
begin
i := i shr 30;
tmp := 'G';
end
else
if i >= 1 shl 20 then
begin
i := i shr 20;
tmp := 'M';
end
else
if i >= 1 shl 10 then
begin
i := i shr 10;
tmp := 'K';
end
else
tmp := '';
str(i, result);
result := result + tmp;
end;
{$I-}
procedure WriteHeapStatusDetail(const arena: TMMStatusArena;
const name: shortstring);
begin
write(name, K(arena.CurrentBytes):4,
'B/', K(arena.CumulativeBytes), 'B ');
{$ifdef FPCMM_DEBUG}
write(' peak=', K(arena.PeakBytes),
'B current=', K(arena.CumulativeAlloc - arena.CumulativeFree),
' alloc=', K(arena.CumulativeAlloc), ' free=', K(arena.CumulativeFree));
{$endif FPCMM_DEBUG}
writeln(' sleep=', K(arena.SleepCount));
end;
procedure WriteHeapStatus(const context: shortstring; smallblockstatuscount,
smallblockcontentioncount: integer; compilationflags: boolean);
var
res: TResArray; // no heap allocation involved
i, n, smallcount: PtrInt;
t, b: PtrUInt;
begin
if context[0] <> #0 then
writeln(context);
if compilationflags then
writeln(' Flags: ' {$ifdef FPCMM_BOOSTER} + 'BOOSTER ' {$else}
{$ifdef FPCMM_BOOST} + 'BOOST ' {$endif}{$endif}
{$ifdef FPCMM_SERVER} + 'SERVER ' {$endif}
{$ifdef FPCMM_ASSUMEMULTITHREAD} + ' assumulthrd' {$endif}
{$ifdef FPCMM_LOCKLESSFREE} + ' lockless' {$endif}
{$ifdef FPCMM_PAUSE} + ' pause' {$endif}
{$ifdef FPCMM_NOMREMAP} + ' nomremap' {$endif}
{$ifdef FPCMM_DEBUG} + ' debug' {$endif}
{$ifdef FPCMM_REPORTMEMORYLEAKS} + ' repmemleak' {$endif});
with CurrentHeapStatus do
begin
writeln(' Small: blocks=', K(SmallBlocks), ' size=', K(SmallBlocksSize),
'B (part of Medium arena)');
WriteHeapStatusDetail(Medium, ' Medium: ');
WriteHeapStatusDetail(Large, ' Large: ');
if SleepCount <> 0 then
writeln(' Total Sleep: count=', K(SleepCount)
{$ifdef FPCMM_DEBUG} , ' rdtsc=', K(SleepCycles) {$endif});
smallcount := SmallGetmemSleepCount + SmallFreememSleepCount;
if smallcount <> 0 then
writeln(' Small Sleep: getmem=', K(SmallGetmemSleepCount),
' freemem=', K(SmallFreememSleepCount)
{$ifdef FPCMM_LOCKLESSFREE} {$ifdef FPCMM_DEBUG} ,
' locklessspin=', K(SmallFreememLockLessSpin) {$endif} {$endif} );
end;
if (smallblockcontentioncount > 0) and (smallcount <> 0) then
begin
n := SetSmallBlockContention(res, smallblockcontentioncount);
for i := 0 to n - 1 do
with TSmallBlockContention(res[i]) do
begin
if GetmemBlockSize <> 0 then
write(' getmem(', GetmemBlockSize)
else
write(' freemem(', FreememBlockSize);
write(')=' , K(SleepCount));
if (i and 3 = 3) or (i = n - 1) then
writeln;
end;
end;
if smallblockstatuscount > 0 then
begin
SetSmallBlockStatus(res);
n := SortSmallBlockStatus(res, smallblockstatuscount, ord(obTotal), @t, @b) - 1;
writeln(' Small Blocks since beginning: ', K(t), '/', K(b), 'B');
for i := 0 to n do
with TSmallBlockStatus(res[i]) do
begin
write(' ', BlockSize, '=', K(Total));
if (i and 7 = 7) or (i = n) then
writeln;
end;
n := SortSmallBlockStatus(res, smallblockstatuscount, ord(obCurrent), @t, @b) - 1;
writeln(' Small Blocks current: ', K(t), '/', K(b), 'B');
for i := 0 to n do
with TSmallBlockStatus(res[i]) do
begin
write(' ', BlockSize, '=', K(Current));
if (i and 7 = 7) or (i = n) then
writeln;
end;
end;
end;
{$I+}
function GetSmallBlockStatus(maxcount: integer;
orderby: TSmallBlockOrderBy; count, bytes: PPtrUInt): TSmallBlockStatusDynArray;
var
res: TResArray;
begin
assert(SizeOf(TRes) = SizeOf(TSmallBlockStatus));
result := nil;
if maxcount <= 0 then
exit;
SetSmallBlockStatus(res);
maxcount := SortSmallBlockStatus(res, maxcount, ord(orderby), count, bytes);
if maxcount = 0 then
exit;
SetLength(result, maxcount);
Move(res[0], result[0], maxcount * SizeOf(res[0]));
end;
function GetSmallBlockContention(maxcount: integer): TSmallBlockContentionDynArray;
var
n: integer;
res: TResArray;
begin
result := nil;
if maxcount <= 0 then
exit;
n := SetSmallBlockContention(res, maxcount);
if n = 0 then
exit;
SetLength(result, n);
Move(res[0], result[0], n * SizeOf(res[0]));
end;
{$endif FPCMM_STANDALONE}
function CurrentHeapStatus: TMMStatus;
var
i: integer;
small: PtrUInt;
p: PSmallBlockType;
begin
result := HeapStatus;
p := @SmallBlockInfo;
for i := 1 to NumSmallInfoBlock do
begin
inc(result.SmallGetmemSleepCount, p^.GetmemSleepCount);
inc(result.SmallFreememSleepCount, p^.FreememSleepCount);
small := p^.GetmemCount - p^.FreememCount;
if small <> 0 then
begin
inc(result.SmallBlocks, small);
inc(result.SmallBlocksSize, small * p^.BlockSize);
end;
{$ifdef FPCMM_LOCKLESSFREE}
{$ifdef FPCMM_DEBUG}
inc(result.SmallFreememLockLessSpin, p^.BinSpinCount);
{$endif FPCMM_LOCKLESSFREE}
{$endif FPCMM_DEBUG}
inc(p);
end;
end;
{ ********* Initialization and Finalization }
procedure InitializeMemoryManager;
var
small: PSmallBlockType;
a, i, min, poolsize, num, perpool, size, start, next: PtrInt;
medium: PMediumFreeBlock;
begin
small := @SmallBlockInfo;
assert(SizeOf(small^) = 1 shl SmallBlockTypePO2);
for a := 0 to NumTinyBlockArenas do
for i := 0 to NumSmallBlockTypes - 1 do
begin
if (i = NumTinyBlockTypes) and (a > 0) then
break;
size := SmallBlockSizes[i];
assert(size and 15 = 0);
small^.BlockSize := size;
small^.PreviousPartiallyFreePool := pointer(small);
small^.NextPartiallyFreePool := pointer(small);
small^.MaxSequentialFeedBlockAddress := pointer(0);
small^.NextSequentialFeedBlockAddress := pointer(1);
min := ((size * MinimumSmallBlocksPerPool +
(SmallBlockPoolHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset))
and -MediumBlockGranularity) + MediumBlockSizeOffset;
if min < MinimumMediumBlockSize then
min := MinimumMediumBlockSize;
num := (min + (- MinimumMediumBlockSize +
MediumBlockBinsPerGroup * MediumBlockGranularity div 2)) div
(MediumBlockBinsPerGroup * MediumBlockGranularity);
if num > 7 then
num := 7;
small^.AllowedGroupsForBlockPoolBitmap := Byte(Byte(-1) shl num);
small^.MinimumBlockPoolSize := MinimumMediumBlockSize +
num * (MediumBlockBinsPerGroup * MediumBlockGranularity);
poolsize := ((size * TargetSmallBlocksPerPool +
(SmallBlockPoolHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset))
and -MediumBlockGranularity) + MediumBlockSizeOffset;
if poolsize < OptimalSmallBlockPoolSizeLowerLimit then
poolsize := OptimalSmallBlockPoolSizeLowerLimit;
if poolsize > OptimalSmallBlockPoolSizeUpperLimit then
poolsize := OptimalSmallBlockPoolSizeUpperLimit;
perpool := (poolsize - SmallBlockPoolHeaderSize) div size;
small^.OptimalBlockPoolSize := ((perpool * size +
(SmallBlockPoolHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset))
and -MediumBlockGranularity) + MediumBlockSizeOffset;
inc(small);
end;
assert(small = @SmallBlockInfo.GetmemLookup);
{$ifndef FPCMM_ASSUMEMULTITHREAD}
SmallBlockInfo.IsMultiThreadPtr := @IsMultiThread;
MediumBlockInfo.IsMultiThreadPtr := @IsMultiThread;
{$endif FPCMM_ASSUMEMULTITHREAD}
start := 0;
with SmallBlockInfo do
for i := 0 to NumSmallBlockTypes - 1 do
begin
next := PtrUInt(SmallBlockSizes[i]) div SmallBlockGranularity;
while start < next do
begin
GetmemLookup[start] := i;
inc(start);
end;
end;
with MediumBlockInfo do
begin
PoolsCircularList.PreviousMediumBlockPoolHeader := @PoolsCircularList;
PoolsCircularList.NextMediumBlockPoolHeader := @PoolsCircularList;
for i := 0 to MediumBlockBinCount -1 do
begin
medium := @Bins[i];
medium.PreviousFreeBlock := medium;
medium.NextFreeBlock := medium;
end;
end;
LargeBlocksCircularList.PreviousLargeBlockHeader := @LargeBlocksCircularList;
LargeBlocksCircularList.NextLargeBlockHeader := @LargeBlocksCircularList;
end;
{$I-}
{$ifdef FPCMM_REPORTMEMORYLEAKS}
var
MemoryLeakReported: boolean;
procedure StartReport;
begin
if MemoryLeakReported then
exit;
writeln;
WriteHeapStatus('WARNING! THIS PROGRAM LEAKS MEMORY!'#13#10'Memory Status:');
writeln('Leaks Identified:');
MemoryLeakReported := true;
end;
{$ifdef LINUX}
// experimental detection of object class - use at your own risk
{$define FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
{$endif LINUX}
procedure MediumMemoryLeakReport(p: PMediumBlockPoolHeader);
var
block: PByte;
header, size: PtrUInt;
{$ifdef FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
first, last: PByte;
vmt: PAnsiChar;
small: PSmallBlockPoolHeader;
function SeemsRealPointer(p: pointer): boolean;
begin
result := (PtrUInt(p) > 65535)
{$ifndef MSWINDOWS}
// let the GPF happen silently in the kernel
and (fpaccess(p, F_OK) <> 0) and (fpgeterrno <> ESysEFAULT)
{$endif MSWINDOWS}
end;
{$endif FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
begin
with MediumBlockInfo do
if (SequentialFeedBytesLeft = 0) or (PtrUInt(LastSequentiallyFed) < PtrUInt(p)) or
(PtrUInt(LastSequentiallyFed) > PtrUInt(p) + MediumBlockPoolSize) then
block := Pointer(PByte(p) + MediumBlockPoolHeaderSize)
else if SequentialFeedBytesLeft <> MediumBlockPoolSize - MediumBlockPoolHeaderSize then
block := LastSequentiallyFed
else
exit;
repeat
header := PPtrUInt(block - BlockHeaderSize)^;
size := header and DropMediumAndLargeFlagsMask;
if size = 0 then
exit;
if header and IsFreeBlockFlag = 0 then
if header and IsSmallBlockPoolInUseFlag <> 0 then
begin
{$ifdef FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
if PSmallBlockPoolHeader(block).BlocksInUse > 0 then
begin
first := PByte(block) + SmallBlockPoolHeaderSize;
with PSmallBlockPoolHeader(block).BlockType^ do
if (CurrentSequentialFeedPool <> pointer(block)) or
(PtrUInt(NextSequentialFeedBlockAddress) >
PtrUInt(MaxSequentialFeedBlockAddress)) then
last := PByte(block) + (PPtrUInt(PByte(block) - BlockHeaderSize)^
and DropMediumAndLargeFlagsMask) - BlockSize
else
last := Pointer(PByte(NextSequentialFeedBlockAddress) - 1);
while first <= last do
begin
if ((PPtrUInt(first - BlockHeaderSize)^ and IsFreeBlockFlag) = 0) then
begin
vmt := PPointer(first)^; // _FreeMem() ensured vmt=nil
if (vmt <> nil) and SeemsRealPointer(vmt) then
try
// try to access the TObject VMT (seems to work on Linux)
if (PPtrInt(vmt + vmtInstanceSize)^ >= sizeof(vmt)) and
(PPtrInt(vmt + vmtInstanceSize)^ <=
PSmallBlockPoolHeader(block).BlockType.BlockSize) and
SeemsRealPointer(PPointer(vmt + vmtClassName)^) then
begin
StartReport;
writeln(' probable ', PShortString(PPointer(vmt + vmtClassName)^)^,
' leak (', PSmallBlockPoolHeader(block).BlockType.BlockSize, ' bytes)');
end;
except
// intercept any GPF
end;
end;
inc(first, PSmallBlockPoolHeader(block).BlockType.BlockSize);
end;
end;
{$endif FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
end
else
begin
StartReport;
writeln(' medium block leak of ', K(size), 'B');
end;
inc(block, size);
until false;
end;
{$endif FPCMM_REPORTMEMORYLEAKS}
procedure FreeAllMemory;
var
medium, nextmedium: PMediumBlockPoolHeader;
bin: PMediumFreeBlock;
large, nextlarge: PLargeBlockHeader;
p: PSmallBlockType;
i, size: PtrUInt;
{$ifdef FPCMM_LOCKLESSFREE}
j: PtrInt;
{$endif FPCMM_LOCKLESSFREE}
{$ifdef FPCMM_REPORTMEMORYLEAKS}
leak, leaks: PtrUInt;
{$endif FPCMM_REPORTMEMORYLEAKS}
begin
{$ifdef FPCMM_REPORTMEMORYLEAKS}
leaks := 0;
{$endif FPCMM_REPORTMEMORYLEAKS}
p := @SmallBlockInfo;
for i := 1 to NumSmallInfoBlock do
begin
{$ifdef FPCMM_LOCKLESSFREE}
for j := 0 to p^.BinCount - 1 do
if p^.BinInstance[i] <> nil then
_FreeMem(p^.BinInstance[i]); // release (unlikely) pending instances
{$endif FPCMM_LOCKLESSFREE}
p^.PreviousPartiallyFreePool := pointer(p);
p^.NextPartiallyFreePool := pointer(p);
p^.NextSequentialFeedBlockAddress := pointer(1);
p^.MaxSequentialFeedBlockAddress := nil;
{$ifdef FPCMM_REPORTMEMORYLEAKS}
leak := p^.GetmemCount - p^.FreememCount;
if leak <> 0 then
begin
StartReport;
inc(leaks, leak);
writeln(' small block leak x', leak, ' of size=', p^.BlockSize,
' (getmem=', p^.GetmemCount, ' freemem=', p^.FreememCount, ')');
end;
{$endif FPCMM_REPORTMEMORYLEAKS}
inc(p);
end;
{$ifdef FPCMM_REPORTMEMORYLEAKS}
if leaks <> 0 then
writeln(' Total small block leaks = ', leaks);
{$endif FPCMM_REPORTMEMORYLEAKS}
with MediumBlockInfo do
begin
medium := PoolsCircularList.NextMediumBlockPoolHeader;
while medium <> @PoolsCircularList do
begin
{$ifdef FPCMM_REPORTMEMORYLEAKS}
MediumMemoryLeakReport(medium);
{$endif FPCMM_REPORTMEMORYLEAKS}
nextmedium := medium.NextMediumBlockPoolHeader;
FreeMedium(medium);
medium := nextmedium;
end;
PoolsCircularList.PreviousMediumBlockPoolHeader := @PoolsCircularList;
PoolsCircularList.NextMediumBlockPoolHeader := @PoolsCircularList;
for i := 0 to MediumBlockBinCount - 1 do
begin
bin := @Bins[i];
bin.PreviousFreeBlock := bin;
bin.NextFreeBlock := bin;
end;
BinGroupBitmap := 0;
SequentialFeedBytesLeft := 0;
for i := 0 to MediumBlockBinGroupCount - 1 do
BinBitmaps[i] := 0;
end;
large := LargeBlocksCircularList.NextLargeBlockHeader;
while large <> @LargeBlocksCircularList do
begin
size := large.BlockSizeAndFlags and DropMediumAndLargeFlagsMask;
{$ifdef FPCMM_REPORTMEMORYLEAKS}
StartReport;
writeln(' large block leak of ', K(size), 'B');
{$endif FPCMM_REPORTMEMORYLEAKS}
nextlarge := large.NextLargeBlockHeader;
FreeLarge(large, size);
large := nextlarge;
end;
LargeBlocksCircularList.PreviousLargeBlockHeader := @LargeBlocksCircularList;
LargeBlocksCircularList.NextLargeBlockHeader := @LargeBlocksCircularList;
end;
{$I+}
{$ifndef FPCMM_STANDALONE}
const
NewMM: TMemoryManager = (
NeedLock: false;
GetMem: @_Getmem;
FreeMem: @_FreeMem;
FreememSize: @_FreememSize;
AllocMem: @_AllocMem;
ReallocMem: @_ReAllocMem;
MemSize: @_MemSize;
InitThread: nil;
DoneThread: nil;
RelocateHeap: nil;
GetHeapStatus: @_GetHeapStatus;
GetFPCHeapStatus: @_GetFPCHeapStatus);
var
OldMM: TMemoryManager;
initialization
InitializeMemoryManager;
GetMemoryManager(OldMM);
SetMemoryManager(NewMM);
finalization
SetMemoryManager(OldMM);
FreeAllMemory;
{$endif FPCMM_STANDALONE}
{$endif FPC_CPUX64}
end.