/// Fast Memory Manager for FPC x86_64 // - this unit is a part of the freeware Synopse mORMot framework // licensed under a MPL/GPL/LGPL three license - see LICENSE.md unit SynFPCx64MM; { ***************************************************************************** A Multi-thread Friendly Memory Manager for FPC written in x86_64 assembly - targetting Linux (and Windows) multi-threaded Services - only for FPC on the x86_64 target - use the RTL MM on Delphi or ARM - based on FastMM4 proven algorithms by Pierre le Riche - code has been reduced to the only necessary featureset for production - deep asm refactoring for cross-platform, compactness and efficiency - can report detailed statistics (with threads contention and memory leaks) - mremap() makes large block ReallocMem a breeze on Linux :) - inlined SSE2 movaps loop is more efficient that subfunction(s) - lockless round-robin of tiny blocks (<=128/256 bytes) for better scaling - optional lockless bin list to avoid freemem() thread contention - three app modes: default mono-thread friendly, FPCMM_SERVER or FPCMM_BOOST Usage: include this unit as the very first in your FPC project uses clause Why another Memory Manager on FPC? - The built-in heap.inc is well written and cross-platform and cross-CPU, but its threadvar arena for small blocks tends to consume a lot of memory on multi-threaded servers, and has suboptimal allocation performance - C memory managers (glibc, Intel TBB, jemalloc) have a very high RAM consumption (especially Intel TBB) and do panic/SIGKILL on any GPF - Pascal alternatives (FastMM4,ScaleMM2,BrainMM) are Windows+Delphi specific - Our lockess round-robin of tiny blocks is a unique algorithm in MM AFAIK - It was so fun diving into SSE2 x86_64 assembly and Pierre's insight - Resulting code is still easy to understand and maintain IMPORTANT NOTICE: seems stable on Linux and Win64 but feedback is welcome! ***************************************************************************** This file is part of Synopse framework. Synopse framework. Copyright (C) 2020 Arnaud Bouchez Synopse Informatique - https://synopse.info *** BEGIN LICENSE BLOCK ***** Version: MPL 1.1/GPL 2.0/LGPL 2.1 The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/MPL Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. The Original Code is Synopse mORMot framework. The Initial Developer of the Original Code is Arnaud Bouchez. Portions created by the Initial Developer are Copyright (C) 2020 the Initial Developer. All Rights Reserved. Contributor(s): Alternatively, the contents of this file may be used under the terms of either the GNU General Public License Version 2 or later (the "GPL"), or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), in which case the provisions of the GPL or the LGPL are applicable instead of those above. If you wish to allow use of your version of this file only under the terms of either the GPL or the LGPL, and not to allow others to use your version of this file under the terms of the MPL, indicate your decision by deleting the provisions above and replace them with the notice and other provisions required by the GPL or the LGPL. If you do not delete the provisions above, a recipient may use your version of this file under the terms of any one of the MPL, the GPL or the LGPL. ***** END LICENSE BLOCK ***** } { ---- Ready-To-Use Scenarios for Memory Manager Tuning } // by default, we target LCL/console mono-threaded apps to replace the RTL MM // - you may define FPCMM_SERVER or even FPCMM_BOOST for a service/daemon // if defined, set FPCMM_DEBUG and FPCMM_ASSUMEMULTITHREAD // - those flags target well a multi-threaded service // - consider FPCMM_BOOST to try more aggressive settings {.$define FPCMM_SERVER} // if defined, tiny blocks <= 256 bytes will have a bigger round-robin cycle // - try to enable it if unexpected SmallGetmemSleepCount/SmallFreememSleepCount // and SleepCount/SleepCycles contentions are reported by CurrentHeapStatus // - will also use 2x (FPCMM_BOOST) or 4x (FPCMM_BOOSTER) more tiny blocks // arenas to share among the threads - so process will consume slightly more RAM // - warning: depending on the workload and hardware, it may actually be slower; // consider FPCMM_SERVER as a fair alternative {.$define FPCMM_BOOST} {.$define FPCMM_BOOSTER} { ---- Fine Grained Memory Manager Tuning } // includes more detailed information to WriteHeapStatus() {.$define FPCMM_DEBUG} // checks leaks and write them to the console at process shutdown // - only basic information will be included: more debugging information (e.g. // call stack) may be gathered using heaptrc or valgrid {.$define FPCMM_REPORTMEMORYLEAKS} // won't check the IsMultiThread global, but assume it is true // - multi-threaded apps (e.g. a Server Daemon instance) will be faster with it // - mono-threaded (console/LCL) apps are faster without this conditional {.$define FPCMM_ASSUMEMULTITHREAD} // let Freemem multi-thread contention use a lockless algorithm // - on contention, Freemem won't yield the thread using an OS call, but fill // an internal Bin list which will be released when the lock becomes available // - from our tests on high thread contention, this may be slower on Linux, but // sometimes slightly faster on Win64 (in a VM at least) {.$define FPCMM_LOCKLESSFREE} // won't use mremap but a regular getmem/move/freemem pattern // - depending on the actual system (e.g. on a VM), mremap may be slower {.$define FPCMM_NOMREMAP} // on contention problem, execute "pause" opcode and spin retrying the lock // - you may try to define this if you have more than one core, to follow Intel // recommendation from https://software.intel.com/en-us/comment/1134767 // - on SkylakeX (Intel 7th gen), "pause" opcode went from 10-20 to 140 cycles, // so we use rdtsc and a given number of cycles - see http://tiny.cc/toeaqz // - from our tests on high thread contention, spinning is slower on both // Linux and Windows, whatever Intel is advising {.$define FPCMM_PAUSE} // will export libc-like functions, and not replace the FPC MM // - e.g. to use this unit as a stand-alone C memory allocator {.$define FPCMM_STANDALONE} interface {$ifdef FPC} // cut-down version of Synopse.inc to make this unit standalone {$mode Delphi} {$asmmode Intel} {$inline on} {$R-} // disable Range checking {$S-} // disable Stack checking {$W-} // disable stack frame generation {$Q-} // disable overflow checking {$B-} // expect short circuit boolean {$ifdef CPUX64} {$define FPC_CPUX64} // this unit is for FPC + x86_64 only {$endif CPUX64} {$ifdef FPCMM_BOOSTER} {$define FPCMM_BOOST} {$undef FPCMM_DEBUG} // when performance matters more than stats {$endif FPCMM_BOOSTER} {$ifdef FPCMM_BOOST} {$undef FPCMM_SERVER} {$define FPCMM_ASSUMEMULTITHREAD} {$endif FPCMM_BOOST} {$ifdef FPCMM_SERVER} {$define FPCMM_DEBUG} {$define FPCMM_ASSUMEMULTITHREAD} {$endif FPCMM_SERVER} {$endif FPC} {$ifdef FPC_CPUX64} // this unit is available only for FPC + X86_64 CPU type /// Arena (middle/large) heap information as returned by CurrentHeapStatus TMMStatusArena = record /// how many bytes are currently reserved (mmap) to the Operating System CurrentBytes: PtrUInt; /// how many bytes have been reserved (mmap) to the Operating System CumulativeBytes: PtrUInt; {$ifdef FPCMM_DEBUG} /// maximum bytes count reserved (mmap) to the Operating System PeakBytes: PtrUInt; /// how many VirtualAlloc/mmap calls to the Operating System did occur CumulativeAlloc: PtrUInt; /// how many VirtualFree/munmap calls to the Operating System did occur CumulativeFree: PtrUInt; {$endif FPCMM_DEBUG} /// how many times this Arena did wait from been unlocked by another thread SleepCount: PtrUInt; end; /// heap information as returned by CurrentHeapStatus TMMStatus = record /// how many tiny/small memory blocks (<=2600) are currently allocated SmallBlocks: PtrUInt; /// how many bytes of tiny/small memory blocks are currently allocated // - this size is part of the Medium.CurrentBytes arena SmallBlocksSize: PtrUInt; /// contain blocks up to 256KB (small and medium blocks) Medium: TMMStatusArena; /// large blocks > 256KB which are directly handled by the Operating System Large: TMMStatusArena; {$ifdef FPCMM_DEBUG} /// how much rdtsc cycles were spent within SwitchToThread/NanoSleep API // - we rdtsc since it is an indicative but very fast way of timing SleepCycles: PtrUInt; {$ifdef FPCMM_LOCKLESSFREE} /// how many types Freemem() did spin to acquire its lock-less bin list SmallFreememLockLessSpin: PtrUInt; {$endif FPCMM_LOCKLESSFREE} {$endif FPCMM_DEBUG} /// how many times the Operating System Sleep/NanoSleep API was called // - in a perfect world, should be as small as possible SleepCount: PtrUInt; /// how many times Getmem() did block and wait for a small block // - see also GetSmallBlockContention() SmallGetmemSleepCount: PtrUInt; /// how many times Freemem() did block and wait for a small block // - see also GetSmallBlockContention() SmallFreememSleepCount: PtrUInt; end; PMMStatus = ^TMMStatus; /// allocate a new memory buffer // - as FPC default heap, _Getmem(0) returns _Getmem(1) function _GetMem(size: PtrUInt): pointer; /// allocate a new zeroed memory buffer function _AllocMem(Size: PtrUInt): pointer; /// release a memory buffer // - returns the allocated size of the supplied pointer (as FPC default heap) function _FreeMem(P: pointer): PtrUInt; /// change the size of a memory buffer // - won't move any data if in-place reallocation is possible // - as FPC default heap, _ReallocMem(P=nil,Size) maps P := _getmem(Size) and // _ReallocMem(P,0) maps _Freemem(P) function _ReallocMem(var P: pointer; Size: PtrUInt): pointer; /// retrieve the maximum size (i.e. the allocated size) of a memory buffer function _MemSize(P: pointer): PtrUInt; inline; /// retrieve high-level statistics about the current memory manager state // - see also GetSmallBlockContention for detailed small blocks information function CurrentHeapStatus: TMMStatus; {$ifdef FPCMM_STANDALONE} /// should be called before using any memory function procedure InitializeMemoryManager; /// should be called to finalize this memory manager process and release all RAM procedure FreeAllMemory; {$undef FPCMM_DEBUG} // excluded FPC-specific debugging /// IsMultiThread global variable is not correct outside of the FPC RTL {$define FPCMM_ASSUMEMULTITHREAD} /// not supported to reduce dependencies and console writing {$undef FPCMM_REPORTMEMORYLEAKS} {$else} type /// one GetSmallBlockContention info about unexpected multi-thread waiting // - a single GetmemBlockSize or FreememBlockSize non 0 field is set TSmallBlockContention = packed record /// how many times a small block getmem/freemem has been waiting for unlock SleepCount: cardinal; /// the small block size on which Getmem() has been blocked - or 0 GetmemBlockSize: cardinal; /// the small block size on which Freemem() has been blocked - or 0 FreememBlockSize: cardinal; end; /// small blocks detailed information as returned GetSmallBlockContention TSmallBlockContentionDynArray = array of TSmallBlockContention; /// one GetSmallBlockStatus information TSmallBlockStatus = packed record /// how many times a memory block of this size has been allocated Total: cardinal; /// how many memory blocks of this size are currently allocated Current: cardinal; /// the standard size of the small memory block BlockSize: cardinal; end; /// small blocks detailed information as returned GetSmallBlockStatus TSmallBlockStatusDynArray = array of TSmallBlockStatus; /// sort order of detailed information as returned GetSmallBlockStatus TSmallBlockOrderBy = (obTotal, obCurrent, obBlockSize); /// retrieve the use counts of allocated small blocks // - returns maxcount biggest results, sorted by "orderby" field occurence function GetSmallBlockStatus(maxcount: integer = 10; orderby: TSmallBlockOrderBy = obTotal; count: PPtrUInt = nil; bytes: PPtrUInt = nil): TSmallBlockStatusDynArray; /// retrieve all small blocks which suffered from blocking during multi-thread // - returns maxcount biggest results, sorted by SleepCount occurence function GetSmallBlockContention(maxcount: integer = 10): TSmallBlockContentionDynArray; /// convenient debugging function into the console // - if smallblockcontentioncount > 0, includes GetSmallBlockContention() info // up to the smallblockcontentioncount biggest occurences procedure WriteHeapStatus(const context: shortstring = ''; smallblockstatuscount: integer = 8; smallblockcontentioncount: integer = 8; compilationflags: boolean = false); {$endif FPCMM_STANDALONE} {$endif FPC_CPUX64} implementation { High-level Algorithms Description ----------------------------------- The allocator handles the following families of memory blocks: - TINY <= 128 B (or <= 256 B for FPCMM_BOOST) - not existing in FastMM4 Round-robin distribution into several arenas, fed from medium blocks (fair scaling from multi-threaded calls, with no threadvar nor GC involved) - SMALL <= 2600 B Single arena per block size, fed from medium blocks - MEDIUM <= 256 KB Pool of bitmap-marked chunks, fed from 1MB of OS mmap/virtualalloc - LARGE > 256 KB Directly fed from OS mmap/virtualalloc with mremap when growing About locking: - Tiny and Small blocks have their own per-size lock, in every arena - Medium and Large blocks have one giant lock each (seldom used) - SwitchToThread/FpNanoSleep OS call is done after initial spinning - FPCMM_LOCKLESSFREE reduces OS calls on Freemem() thread contention - FPCMM_DEBUG / WriteHeapStatus allows to identify the lock contention } {$ifdef FPC_CPUX64} // this unit is available only for FPC + X86_64 CPU { ********* Operating System Specific API Calls } {$ifdef MSWINDOWS} var HeapStatus: TMMStatus; const kernel32 = 'kernel32.dll'; MEM_COMMIT = $1000; MEM_RESERVE = $2000; MEM_RELEASE = $8000; MEM_FREE = $10000; MEM_TOP_DOWN = $100000; PAGE_READWRITE = 4; function VirtualAlloc(lpAddress: pointer; dwSize: PtrUInt; flAllocationType, flProtect: Cardinal): pointer; stdcall; external kernel32 name 'VirtualAlloc'; function VirtualFree(lpAddress: pointer; dwSize: PtrUInt; dwFreeType: Cardinal): LongBool; stdcall; external kernel32 name 'VirtualFree'; procedure SwitchToThread; stdcall; external kernel32 name 'SwitchToThread'; function AllocMedium(Size: PtrInt): pointer; inline; begin // bottom-up allocation to reduce fragmentation result := VirtualAlloc(nil, Size, MEM_COMMIT, PAGE_READWRITE); end; function AllocLarge(Size: PtrInt): pointer; inline; begin // top-down allocation to reduce fragmentation result := VirtualAlloc(nil, Size, MEM_COMMIT or MEM_TOP_DOWN, PAGE_READWRITE); end; procedure Free(ptr: pointer; Size: PtrInt); inline; begin VirtualFree(ptr, 0, MEM_RELEASE); end; {$define FPCMM_NOMREMAP} {$else} uses {$ifndef DARWIN} syscall, {$endif DARWIN} BaseUnix; var HeapStatus: TMMStatus; // we directly call the Kernel, so this unit doesn't require any libc function AllocMedium(Size: PtrInt): pointer; inline; begin result := fpmmap(nil, Size, PROT_READ or PROT_WRITE, MAP_PRIVATE or MAP_ANONYMOUS, -1, 0); end; function AllocLarge(Size: PtrInt): pointer; inline; begin result := fpmmap(nil, Size, PROT_READ or PROT_WRITE, MAP_PRIVATE or MAP_ANONYMOUS, -1, 0); end; procedure Free(ptr: pointer; Size: PtrInt); inline; begin Size := fpmunmap(ptr, Size); // assert(Size = 0); end; {$ifdef LINUX} {$ifndef FPCMM_NOMREMAP} const syscall_nr_mremap = 25; // valid on x86_64 Linux and Android MREMAP_MAYMOVE = 1; function fpmremap(addr: pointer; old_len, new_len: size_t; may_move: longint): pointer; inline; begin result := pointer(do_syscall(syscall_nr_mremap, TSysParam(addr), TSysParam(old_len), TSysParam(new_len), TSysParam(may_move))); end; {$endif FPCMM_NOMREMAP} {$else BSD} {$define FPCMM_NOMREMAP} // mremap is a Linux-specific syscall {$endif LINUX} procedure SwitchToThread; inline; var t: Ttimespec; begin // note: nanosleep() adds a few dozen of microsecs for context switching t.tv_sec := 0; t.tv_nsec := 10; // empirically identified on a recent Linux Kernel fpnanosleep(@t, nil); end; {$endif MSWINDOWS} {$ifdef FPCMM_DEBUG} procedure ReleaseCore; nostackframe; assembler; asm rdtsc shl rdx, 32 or rax, rdx push rax call SwitchToThread pop rcx rdtsc shl rdx, 32 or rax, rdx lea rdx, [rip + HeapStatus] sub rax, rcx lock xadd qword ptr [rdx + TMMStatus.SleepCycles], rax lock inc qword ptr [rdx + TMMStatus.SleepCount] end; {$else} procedure ReleaseCore; begin SwitchToThread; inc(HeapStatus.SleepCount); // indicative counter end; {$endif FPCMM_DEBUG} { ********* Some Assembly Helpers } procedure NotifyAlloc(var Arena: TMMStatusArena; Size: PtrUInt); nostackframe; assembler; asm mov rax, Size lock xadd qword ptr [Arena].TMMStatusArena.CurrentBytes, rax lock xadd qword ptr [Arena].TMMStatusArena.CumulativeBytes, Size {$ifdef FPCMM_DEBUG} lock inc qword ptr [Arena].TMMStatusArena.CumulativeAlloc mov rax, qword ptr [Arena].TMMStatusArena.CurrentBytes cmp rax, qword ptr [Arena].TMMStatusArena.PeakBytes jbe @s mov qword ptr [Arena].TMMStatusArena.PeakBytes, rax @s: {$endif FPCMM_DEBUG} end; procedure NotifyFree(var Arena: TMMStatusArena; Size: PtrUInt); nostackframe; assembler; asm neg Size lock xadd qword ptr [Arena].TMMStatusArena.CurrentBytes, Size {$ifdef FPCMM_DEBUG} lock inc qword ptr [Arena].TMMStatusArena.CumulativeFree {$endif FPCMM_DEBUG} end; // faster than Move() as called from ReallocateLargeBlock procedure MoveLarge(src, dst: pointer; cnt: PtrInt); nostackframe; assembler; asm sub cnt, 8 add src, cnt add dst, cnt neg cnt jns @z align 16 @s: movaps xmm0, oword ptr [src + cnt] // AVX move is not really faster movntdq oword ptr [dst + cnt], xmm0 // non-temporal loop add cnt, 16 js @s sfence @z: mov rax, qword ptr [src + cnt] mov qword ptr [dst + cnt], rax end; { ********* Constants and Data Structures Definitions } const {$ifdef FPCMM_BOOST} // someimtes the more arenas, the better multi-threadable {$ifdef FPCMM_BOOSTER} NumTinyBlockTypesPO2 = 4; NumTinyBlockArenasPO2 = 5; // will probably end up with Medium lock contention {$else} NumTinyBlockTypesPO2 = 4; // tiny are <= 256 bytes NumTinyBlockArenasPO2 = 4; // 16 + 1 arenas {$endif FPCMM_BOOSTER} {$else} NumTinyBlockTypesPO2 = 3; // multiple arenas for tiny blocks <= 128 bytes NumTinyBlockArenasPO2 = 3; // 8 round-robin arenas + 1 main by default {$endif FPCMM_BOOST} NumSmallBlockTypes = 46; MaximumSmallBlockSize = 2608; SmallBlockSizes: array[0..NumSmallBlockTypes - 1] of word = ( 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 320, 352, 384, 416, 448, 480, 528, 576, 624, 672, 736, 800, 880, 960, 1056, 1152, 1264, 1376, 1504, 1648, 1808, 1984, 2176, 2384, MaximumSmallBlockSize, MaximumSmallBlockSize, MaximumSmallBlockSize); NumTinyBlockTypes = 1 shl NumTinyBlockTypesPO2; NumTinyBlockArenas = 1 shl NumTinyBlockArenasPO2; NumSmallInfoBlock = NumSmallBlockTypes + NumTinyBlockArenas * NumTinyBlockTypes; SmallBlockGranularity = 16; TargetSmallBlocksPerPool = 48; MinimumSmallBlocksPerPool = 12; SmallBlockDownsizeCheckAdder = 64; SmallBlockUpsizeAdder = 32; {$ifdef FPCMM_LOCKLESSFREE} SmallBlockTypePO2 = 8; // SizeOf(TSmallBlockType)=256 SmallBlockBinCount = (((1 shl SmallBlockTypePO2) - 64) div 8) - 1; {$else} SmallBlockTypePO2 = 6; {$endif FPCMM_LOCKLESSFREE} MediumBlockPoolSizeMem = 20 * 64 * 1024; MediumBlockPoolSize = MediumBlockPoolSizeMem - 16; MediumBlockSizeOffset = 48; MinimumMediumBlockSize = 11 * 256 + MediumBlockSizeOffset; MediumBlockBinsPerGroup = 32; MediumBlockBinGroupCount = 32; MediumBlockBinCount = MediumBlockBinGroupCount * MediumBlockBinsPerGroup; MediumBlockGranularity = 256; MaximumMediumBlockSize = MinimumMediumBlockSize + (MediumBlockBinCount - 1) * MediumBlockGranularity; OptimalSmallBlockPoolSizeLowerLimit = 29 * 1024 - MediumBlockGranularity + MediumBlockSizeOffset; OptimalSmallBlockPoolSizeUpperLimit = 64 * 1024 - MediumBlockGranularity + MediumBlockSizeOffset; MaximumSmallBlockPoolSize = OptimalSmallBlockPoolSizeUpperLimit + MinimumMediumBlockSize; LargeBlockGranularity = 65536; MediumInPlaceDownsizeLimit = MinimumMediumBlockSize div 4; IsFreeBlockFlag = 1; IsMediumBlockFlag = 2; IsSmallBlockPoolInUseFlag = 4; IsLargeBlockFlag = 4; PreviousMediumBlockIsFreeFlag = 8; LargeBlockIsSegmented = 8; DropSmallFlagsMask = -8; ExtractSmallFlagsMask = 7; DropMediumAndLargeFlagsMask = -16; ExtractMediumAndLargeFlagsMask = 15; // use pause before ReleaseCore API call when spinning locks // pause is 140 cycles since SkylakeX - see http://tiny.cc/010ioz -> use rdtsc // which has 30 cycles latency; ring3 to ring 0 transition is 1000 cycles {$ifdef FPCMM_PAUSE} SpinSmallGetmemLockTSC = 1000; SpinSmallFreememLockTSC = 1000; // _freemem has more collisions {$ifdef FPCMM_LOCKLESSFREE} SpinSmallFreememBinTSC = 2000; {$endif FPCMM_LOCKLESSFREE} SpinMediumLockTSC = 2000; SpinLargeLockTSC = 2000; {$else} SpinMediumLockTSC = 1000; // minimum spinning SpinLargeLockTSC = 1000; {$endif FPCMM_PAUSE} type PSmallBlockPoolHeader = ^TSmallBlockPoolHeader; // information for each small block size - 64/256 bytes long >= CPU cache line TSmallBlockType = record BlockTypeLocked: boolean; AllowedGroupsForBlockPoolBitmap: Byte; BlockSize: Word; MinimumBlockPoolSize: Word; OptimalBlockPoolSize: Word; NextPartiallyFreePool: PSmallBlockPoolHeader; PreviousPartiallyFreePool: PSmallBlockPoolHeader; NextSequentialFeedBlockAddress: pointer; MaxSequentialFeedBlockAddress: pointer; CurrentSequentialFeedPool: PSmallBlockPoolHeader; GetmemCount: cardinal; FreememCount: cardinal; GetmemSleepCount: cardinal; FreememSleepCount: cardinal; {$ifdef FPCMM_LOCKLESSFREE} // 192 optional bytes for FreeMem Bin BinLocked: boolean; BinCount: byte; BinSpinCount: cardinal; BinInstance: array[0.. SmallBlockBinCount - 1] of pointer; {$endif FPCMM_LOCKLESSFREE} end; PSmallBlockType = ^TSmallBlockType; TSmallBlockTypes = array[0..NumSmallBlockTypes - 1] of TSmallBlockType; TTinyBlockTypes = array[0..NumTinyBlockTypes - 1] of TSmallBlockType; TSmallBlockInfo = record Small: TSmallBlockTypes; Tiny: array[0..NumTinyBlockArenas - 1] of TTinyBlockTypes; GetmemLookup: array[0.. (MaximumSmallBlockSize - 1) div SmallBlockGranularity] of byte; {$ifndef FPCMM_ASSUMEMULTITHREAD} IsMultiThreadPtr: PBoolean; // safe access to IsMultiThread global variable {$endif FPCMM_ASSUMEMULTITHREAD} TinyCurrentArena: integer; end; TSmallBlockPoolHeader = record BlockType: PSmallBlockType; {$ifdef CPU32} Padding32Bits: cardinal; {$endif} NextPartiallyFreePool: PSmallBlockPoolHeader; PreviousPartiallyFreePool: PSmallBlockPoolHeader; FirstFreeBlock: pointer; BlocksInUse: Cardinal; SmallBlockPoolSignature: Cardinal; FirstBlockPoolPointerAndFlags: PtrUInt; end; PMediumBlockPoolHeader = ^TMediumBlockPoolHeader; TMediumBlockPoolHeader = record PreviousMediumBlockPoolHeader: PMediumBlockPoolHeader; NextMediumBlockPoolHeader: PMediumBlockPoolHeader; Reserved1: PtrUInt; FirstMediumBlockSizeAndFlags: PtrUInt; end; PMediumFreeBlock = ^TMediumFreeBlock; TMediumFreeBlock = record PreviousFreeBlock: PMediumFreeBlock; NextFreeBlock: PMediumFreeBlock; end; TMediumBlockInfo = record Locked: boolean; PoolsCircularList: TMediumBlockPoolHeader; LastSequentiallyFed: pointer; SequentialFeedBytesLeft: Cardinal; BinGroupBitmap: Cardinal; {$ifndef FPCMM_ASSUMEMULTITHREAD} IsMultiThreadPtr: PBoolean; // safe access to IsMultiThread global variable {$endif FPCMM_ASSUMEMULTITHREAD} BinBitmaps: array[0..MediumBlockBinGroupCount - 1] of Cardinal; Bins: array[0..MediumBlockBinCount - 1] of TMediumFreeBlock; end; PLargeBlockHeader = ^TLargeBlockHeader; TLargeBlockHeader = record PreviousLargeBlockHeader: PLargeBlockHeader; NextLargeBlockHeader: PLargeBlockHeader; Reserved1: PtrUInt; BlockSizeAndFlags: PtrUInt; end; const BlockHeaderSize = SizeOf(pointer); SmallBlockPoolHeaderSize = SizeOf(TSmallBlockPoolHeader); MediumBlockPoolHeaderSize = SizeOf(TMediumBlockPoolHeader); LargeBlockHeaderSize = SizeOf(TLargeBlockHeader); var SmallBlockInfo: TSmallBlockInfo; MediumBlockInfo: TMediumBlockInfo; LargeBlocksLocked: boolean; LargeBlocksCircularList: TLargeBlockHeader; { ********* Shared Routines } procedure LockMediumBlocks; nostackframe; assembler; asm // on input/output: r10=MediumBlockInfo @s: rdtsc // tsc in edx:eax shl rdx, 32 lea r9, [rax + rdx + SpinMediumLockTSC] // r9 = endtsc @sp: pause rdtsc shl rdx, 32 or rax, rdx cmp rax, r9 ja @rc // timeout mov rcx, r10 mov eax, $100 cmp byte ptr [r10].TMediumBlockInfo.Locked, true je @sp lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah je @ok jmp @sp @rc: push rsi // preserve POSIX ABI registers push rdi push r10 push r11 call ReleaseCore pop r11 pop r10 pop rdi pop rsi lea rax, [rip + HeapStatus] lock inc qword ptr [rax].TMMStatus.Medium.SleepCount jmp @s @ok: end; procedure InsertMediumBlockIntoBin; nostackframe; assembler; asm // rcx=MediumFreeBlock edx=MediumBlockSize r10=MediumBlockInfo - even on POSIX mov rax, rcx // Get the bin number for this block size sub edx, MinimumMediumBlockSize shr edx, 8 // Validate the bin number sub edx, MediumBlockBinCount - 1 sbb ecx, ecx and edx, ecx add edx, MediumBlockBinCount - 1 mov r9, rdx // Get the bin address in rcx shl edx, 4 lea rcx, [r10 + rdx + TMediumBlockInfo.Bins] // Bins are LIFO, se we insert this block as the first free block in the bin mov rdx, TMediumFreeBlock[rcx].NextFreeBlock mov TMediumFreeBlock[rax].PreviousFreeBlock, rcx mov TMediumFreeBlock[rax].NextFreeBlock, rdx mov TMediumFreeBlock[rdx].PreviousFreeBlock, rax mov TMediumFreeBlock[rcx].NextFreeBlock, rax // Was this bin empty? cmp rdx, rcx jne @Done // Get ecx=bin number, edx=group number mov rcx, r9 mov rdx, r9 shr edx, 5 // Flag this bin as not empty mov eax, 1 shl eax, cl or dword ptr [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax // Flag the group as not empty mov eax, 1 mov ecx, edx shl eax, cl or [r10 + TMediumBlockInfo.BinGroupBitmap], eax @Done: end; procedure RemoveMediumFreeBlock; nostackframe; assembler; asm // rcx=MediumFreeBlock r10=MediumBlockInfo - even on POSIX // Get the current previous and next blocks mov rdx, TMediumFreeBlock[rcx].PreviousFreeBlock mov rcx, TMediumFreeBlock[rcx].NextFreeBlock // Remove this block from the linked list mov TMediumFreeBlock[rcx].PreviousFreeBlock, rdx mov TMediumFreeBlock[rdx].NextFreeBlock, rcx // Is this bin now empty? If the previous and next free block pointers are // equal, they must point to the bin cmp rcx, rdx jne @Done // Get ecx=bin number, edx=group number lea r8, [r10 + TMediumBlockInfo.Bins] sub rcx, r8 mov edx, ecx shr ecx, 4 shr edx, 9 // Flag this bin as empty mov eax, -2 rol eax, cl and dword ptr [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax jnz @Done // Flag this group as empty mov eax, -2 mov ecx, edx rol eax, cl and [r10 + TMediumBlockInfo.BinGroupBitmap], eax @Done: end; procedure BinMediumSequentialFeedRemainder; nostackframe; assembler; asm // r10=MediumBlockInfo - even on POSIX mov eax, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft] test eax, eax jz @Done // Is the last fed sequentially block free? mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed] test byte ptr [rax - BlockHeaderSize], IsFreeBlockFlag jnz @LastBlockFedIsFree // Set the "previous block is free" flag in the last block fed or qword ptr [rax - BlockHeaderSize], PreviousMediumBlockIsFreeFlag // Get edx=remainder size, rax=remainder start mov edx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft] sub rax, rdx @BinTheRemainder: // Store the size of the block as well as the flags lea rcx, [rdx + IsMediumBlockFlag + IsFreeBlockFlag] mov [rax - BlockHeaderSize], rcx // Store the trailing size marker mov [rax + rdx - 16], rdx // Bin this medium block cmp edx, MinimumMediumBlockSize jb @Done mov rcx, rax call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize @Done: ret @LastBlockFedIsFree: // Drop the flags mov rdx, DropMediumAndLargeFlagsMask and rdx, [rax - BlockHeaderSize] // Free the last block fed cmp edx, MinimumMediumBlockSize jb @DontRemoveLastFed // Last fed block is free - remove it from its size bin mov rcx, rax call RemoveMediumFreeBlock // rcx = APMediumFreeBlock // Re-read rax and rdx mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed] mov rdx, DropMediumAndLargeFlagsMask and rdx, [rax - BlockHeaderSize] @DontRemoveLastFed: // Get the number of bytes left in ecx mov ecx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft] // rax = remainder start, rdx = remainder size sub rax, rcx add edx, ecx jmp @BinTheRemainder end; procedure FreeMedium(ptr: PMediumBlockPoolHeader); begin Free(ptr, MediumBlockPoolSizeMem); NotifyFree(HeapStatus.Medium, MediumBlockPoolSizeMem); end; function AllocNewSequentialFeedMediumPool(blocksize: Cardinal): pointer; var old: PMediumBlockPoolHeader; new: pointer; begin BinMediumSequentialFeedRemainder; new := AllocMedium(MediumBlockPoolSizeMem); with MediumblockInfo do if new <> nil then begin old := PoolsCircularList.NextMediumBlockPoolHeader; PMediumBlockPoolHeader(new).PreviousMediumBlockPoolHeader := @PoolsCircularList; PoolsCircularList.NextMediumBlockPoolHeader := new; PMediumBlockPoolHeader(new).NextMediumBlockPoolHeader := old; old.PreviousMediumBlockPoolHeader := new; PPtrUInt(PByte(new) + MediumBlockPoolSize - BlockHeaderSize)^ := IsMediumBlockFlag; SequentialFeedBytesLeft := (MediumBlockPoolSize - MediumBlockPoolHeaderSize) - blocksize; result := pointer(PByte(new) + MediumBlockPoolSize - blocksize); LastSequentiallyFed := result; PPtrUInt(PByte(result) - BlockHeaderSize)^ := blocksize or IsMediumBlockFlag; NotifyAlloc(HeapStatus.Medium, MediumBlockPoolSizeMem); end else begin SequentialFeedBytesLeft := 0; result := nil; end; end; procedure LockLargeBlocks; nostackframe; assembler; asm @s: mov eax, $100 lea rcx, [rip + LargeBlocksLocked] lock cmpxchg byte ptr [rcx], ah je @ok rdtsc shl rdx, 32 lea r9, [rax + rdx + SpinLargeLockTSC] // r9 = endtsc @sp: pause rdtsc shl rdx, 32 or rax, rdx cmp rax, r9 ja @rc // timeout mov eax, $100 cmp byte ptr [rcx], ah // don't flush the CPU cache if Locked still true je @sp lock cmpxchg byte ptr [rcx], ah je @ok jmp @sp @rc: call ReleaseCore lea rax, [rip + HeapStatus] lock inc qword ptr [rax].TMMStatus.Large.SleepCount jmp @s @ok: end; function AllocateLargeBlockFrom(size: PtrUInt; existing: pointer; oldsize: PtrUInt): pointer; var blocksize: PtrUInt; header, old: PLargeBlockHeader; begin blocksize := (size + LargeBlockHeaderSize + LargeBlockGranularity - 1 + BlockHeaderSize) and -LargeBlockGranularity; if existing = nil then header := AllocLarge(blocksize) else {$ifdef FPCMM_NOMREMAP} header := nil; // paranoid {$else} header := fpmremap(existing, oldsize, blocksize, MREMAP_MAYMOVE); {$endif FPCMM_NOMREMAP} if header <> nil then begin NotifyAlloc(HeapStatus.Large, blocksize); if existing <> nil then NotifyFree(HeapStatus.Large, oldsize); header.BlockSizeAndFlags := blocksize or IsLargeBlockFlag; LockLargeBlocks; old := LargeBlocksCircularList.NextLargeBlockHeader; header.PreviousLargeBlockHeader := @LargeBlocksCircularList; LargeBlocksCircularList.NextLargeBlockHeader := header; header.NextLargeBlockHeader := old; old.PreviousLargeBlockHeader := header; LargeBlocksLocked := False; inc(header); end; result := header; end; function AllocateLargeBlock(size: PtrUInt): pointer; begin result := AllocateLargeBlockFrom(size, nil, 0); end; procedure FreeLarge(ptr: PLargeBlockHeader; size: PtrUInt); begin NotifyFree(HeapStatus.Large, size); Free(ptr, size); end; function FreeLargeBlock(p: pointer): PtrInt; var header, prev, next: PLargeBlockHeader; begin header := pointer(PByte(p) - LargeBlockHeaderSize); if header.BlockSizeAndFlags and IsFreeBlockFlag <> 0 then begin // try to duplicate the same pointer twice result := 0; exit; end; LockLargeBlocks; prev := header.PreviousLargeBlockHeader; next := header.NextLargeBlockHeader; next.PreviousLargeBlockHeader := prev; prev.NextLargeBlockHeader := next; LargeBlocksLocked := False; result := DropMediumAndLargeFlagsMask and header.BlockSizeAndFlags; FreeLarge(header, result); end; function ReallocateLargeBlock(p: pointer; size: PtrUInt): pointer; var oldavail, minup, new: PtrUInt; {$ifndef FPCMM_NOMREMAP} prev, next, {$endif} header: PLargeBlockHeader; begin header := pointer(PByte(p) - LargeBlockHeaderSize); oldavail := (DropMediumAndLargeFlagsMask and header^.BlockSizeAndFlags) - (LargeBlockHeaderSize + BlockHeaderSize); new := size; if size > oldavail then begin // size-up with 1/8 or 1/4 overhead for any future growing realloc if oldavail > 128 shl 20 then minup := oldavail + oldavail shr 3 else minup := oldavail + oldavail shr 2; if size < minup then new := minup; end else begin result := p; oldavail := oldavail shr 1; if size >= oldavail then // small size-up within current buffer -> no reallocate exit else // size-down and move just the trailing data oldavail := size; end; {$ifdef FPCMM_NOMREMAP} // no mremap(): reallocate a new block, copy the existing data, free old result := _GetMem(new); if result <> nil then MoveLarge(p, result, oldavail); _FreeMem(p); {$else} // remove from current chain list LockLargeBlocks; prev := header^.PreviousLargeBlockHeader; next := header^.NextLargeBlockHeader; next.PreviousLargeBlockHeader := prev; prev.NextLargeBlockHeader := next; LargeBlocksLocked := False; // let the Linux Kernel mremap() the memory using its TLB magic size := DropMediumAndLargeFlagsMask and header^.BlockSizeAndFlags; result := AllocateLargeBlockFrom(new, header, size); {$endif FPCMM_NOMREMAP} end; { ********* Main Memory Manager Functions } procedure LockGetMem; nostackframe; assembler; asm // Can use one of the several arenas reserved for tiny blocks? cmp ecx, SizeOf(TTinyBlockTypes) jae @NotTinyBlockType { ---------- TINY (size<=128B) block lock ---------- } @LockTinyBlockTypeLoop: // Round-Robin attempt to lock of SmallBlockInfo.Tiny[] // -> fair distribution among calls to reduce thread contention mov edx, NumTinyBlockArenas @TinyBlockArenaLoop: mov eax, SizeOf(TTinyBlockTypes) lock xadd dword ptr [r8 + TSmallBlockInfo.TinyCurrentArena], eax and eax, (NumTinyBlockArenas * Sizeof(TTinyBlockTypes)) - 1 add rax, rcx lea rbx, [r8 + rax].TSmallBlockInfo.Tiny mov eax, $100 cmp [rbx].TSmallBlockType.BlockTypeLocked, ah je @NextTinyBlockArena lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah jne @NextTinyBlockArena @GotLockOnTinyBlockType: ret @NextTinyBlockArena: dec edx jnz @TinyBlockArenaLoop // Also try the default SmallBlockInfo.Small[] lea rbx, [r8 + rcx] mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah je @GotLockOnTinyBlockType // Thread Contention (occurs much less than during _Freemem) lock inc dword ptr [rbx].TSmallBlockType.GetmemSleepCount push r8 push rcx call ReleaseCore pop rcx pop r8 jmp @LockTinyBlockTypeLoop { ---------- SMALL (size<2600) block lock ---------- } @NotTinyBlockType: lea rbx, [r8 + rcx].TSmallBlockInfo.Small @LockBlockTypeLoopRetry: {$ifdef FPCMM_PAUSE} rdtsc shl rdx, 32 lea r9, [rax + rdx + SpinSmallGetmemLockTSC] // r9 = endtsc {$endif FPCMM_PAUSE} @LockBlockTypeLoop: // Grab the default block type mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah jne @LockNextSmallBlockType @GotLockOnSmallBlockType: ret @LockNextSmallBlockType: // Try up to two next sizes add rbx, SizeOf(TSmallBlockType) mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah je @GotLockOnSmallBlockType pause add rbx, SizeOf(TSmallBlockType) mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah je @GotLockOnSmallBlockType sub rbx, 2 * SizeOf(TSmallBlockType) {$ifdef FPCMM_PAUSE} pause rdtsc shl rdx, 32 or rax, rdx cmp rax, r9 jb @LockBlockTypeLoop // no timeout yet {$endif FPCMM_PAUSE} // Block type and two sizes larger are all locked - give up and sleep lock inc dword ptr [rbx].TSmallBlockType.GetmemSleepCount call ReleaseCore jmp @LockBlockTypeLoopRetry end; function _GetMem(size: PtrUInt): pointer; nostackframe; assembler; asm {$ifndef MSWINDOWS} mov rcx, size {$else} push rsi push rdi {$endif MSWINDOWS} push rbx // Since most allocations are for small blocks, determine small block type lea rbx, [rip + SmallBlockInfo] @VoidSizeToSomething: lea rdx, [rcx + BlockHeaderSize - 1] shr rdx, 4 // div SmallBlockGranularity // Is it a tiny/small block? cmp rcx, (MaximumSmallBlockSize - BlockHeaderSize) ja @NotTinySmallBlock test rcx, rcx jz @VoidSize {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, qword ptr [rbx].TSmallBlockInfo.IsMultiThreadPtr {$endif FPCMM_ASSUMEMULTITHREAD} // Get the tiny/small TSmallBlockType[] offset in rcx movzx ecx, byte ptr [rbx + rdx].TSmallBlockInfo.GetmemLookup mov r8, rbx shl ecx, SmallBlockTypePO2 // Acquire block type lock {$ifdef FPCMM_ASSUMEMULTITHREAD} call LockGetMem {$else} cmp byte ptr [rax], 0 jne @CheckTinySmallLock add rbx, rcx {$endif FPCMM_ASSUMEMULTITHREAD} { ---------- TINY/SMALL block registration ---------- } @GotLockOnSmallBlockType: // set rdx=NextPartiallyFreePool rax=FirstFreeBlock rcx=DropSmallFlagsMask mov rdx, [rbx].TSmallBlockType.NextPartiallyFreePool inc [rbx].TSmallBlockType.GetmemCount mov rax, [rdx].TSmallBlockPoolHeader.FirstFreeBlock mov rcx, DropSmallFlagsMask // Is there a pool with free blocks? cmp rdx, rbx je @TrySmallSequentialFeed inc [rdx].TSmallBlockPoolHeader.BlocksInUse // Set the new first free block and the block header and rcx, [rax - BlockHeaderSize] mov [rdx].TSmallBlockPoolHeader.FirstFreeBlock, rcx mov [rax - BlockHeaderSize], rdx // Is the chunk now full? jz @RemoveSmallPool // Unlock the block type and leave mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false @Done: pop rbx {$ifdef MSWINDOWS} pop rdi pop rsi {$endif MSWINDOWS} ret @VoidSize: inc ecx // "we always need to allocate something" (see RTL heap.inc) jmp @VoidSizeToSomething {$ifndef FPCMM_ASSUMEMULTITHREAD} @CheckTinySmallLock: call LockGetMem jmp @GotLockOnSmallBlockType {$endif FPCMM_ASSUMEMULTITHREAD} @TrySmallSequentialFeed: // Feed a small block sequentially movzx ecx, [rbx].TSmallBlockType.BlockSize mov rdx, [rbx].TSmallBlockType.CurrentSequentialFeedPool add rcx, rax // Can another block fit? cmp rax, [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress ja @AllocateSmallBlockPool // Adjust number of used blocks and sequential feed pool mov [rbx].TSmallBlockType.NextSequentialFeedBlockAddress, rcx inc [rdx].TSmallBlockPoolHeader.BlocksInUse // Unlock the block type, set the block header and leave mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false mov [rax - BlockHeaderSize], rdx pop rbx {$ifdef MSWINDOWS} pop rdi pop rsi {$endif MSWINDOWS} ret @RemoveSmallPool: // Pool is full - remove it from the partially free list mov rcx, [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rbx mov [rbx].TSmallBlockType.NextPartiallyFreePool, rcx // Unlock the block type and leave mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false pop rbx {$ifdef MSWINDOWS} pop rdi pop rsi {$endif MSWINDOWS} ret @AllocateSmallBlockPool: // Access shared information about Medium blocks storage lea rcx, [rip + MediumBlockInfo] mov r10, rcx {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, [rcx + TMediumBlockinfo.IsMultiThreadPtr] cmp byte ptr [rax], false je @MediumLocked1 {$endif FPCMM_ASSUMEMULTITHREAD} mov eax, $100 lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah je @MediumLocked1 call LockMediumBlocks @MediumLocked1: // Are there any available blocks of a suitable size? movsx esi, [rbx].TSmallBlockType.AllowedGroupsForBlockPoolBitmap and esi, [r10 + TMediumBlockInfo.BinGroupBitmap] jz @NoSuitableMediumBlocks // Compute rax = bin group number with free blocks, rcx = bin number bsf eax, esi lea r9, [rax * 4] mov ecx, [r10 + TMediumBlockInfo.BinBitmaps + r9] bsf ecx, ecx lea rcx, [rcx + r9 * 8] // Set rdi = @bin, rsi = free block lea rsi, [rcx * 8] // SizeOf(TMediumBlockBin) = 16 lea rdi, [r10 + TMediumBlockInfo.Bins + rsi * 2] mov rsi, TMediumFreeBlock[rdi].NextFreeBlock // Remove the first block from the linked list (LIFO) mov rdx, TMediumFreeBlock[rsi].NextFreeBlock mov TMediumFreeBlock[rdi].NextFreeBlock, rdx mov TMediumFreeBlock[rdx].PreviousFreeBlock, rdi // Is this bin now empty? cmp rdi, rdx jne @MediumBinNotEmpty // rbx = block type, rax = bin group number, // r9 = bin group number * 4, rcx = bin number, rdi = @bin, rsi = free block // Flag this bin (and the group if needed) as empty mov edx, - 2 mov r11d, [r10 + TMediumBlockInfo.BinGroupBitmap] rol edx, cl btr r11d, eax // btr reg,reg is faster than btr [mem],reg and [r10 + TMediumBlockInfo.BinBitmaps + r9], edx jnz @MediumBinNotEmpty mov [r10 + TMediumBlockInfo.BinGroupBitmap], r11d @MediumBinNotEmpty: // rsi = free block, rbx = block type // Get the size of the available medium block in edi mov rdi, DropMediumAndLargeFlagsMask and rdi, [rsi - BlockHeaderSize] cmp edi, MaximumSmallBlockPoolSize jb @UseWholeBlock // Split the block: new block size is the optimal size mov edx, edi movzx edi, [rbx].TSmallBlockType.OptimalBlockPoolSize sub edx, edi lea rcx, [rsi + rdi] lea rax, [rdx + IsMediumBlockFlag + IsFreeBlockFlag] mov [rcx - BlockHeaderSize], rax // Store the size of the second split as the second last pointer mov [rcx + rdx - 16], rdx // Put the remainder in a bin (it will be big enough) call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize jmp @GotMediumBlock @NoSuitableMediumBlocks: // Check the sequential feed medium block pool for space movzx ecx, [rbx].TSmallBlockType.MinimumBlockPoolSize mov edi, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft] cmp edi, ecx jb @AllocateNewSequentialFeed // Get the address of the last block that was fed mov rsi, [r10 + TMediumBlockInfo.LastSequentiallyFed] // Enough sequential feed space: Will the remainder be usable? movzx ecx, [rbx].TSmallBlockType.OptimalBlockPoolSize lea rdx, [rcx + MinimumMediumBlockSize] cmp edi, edx cmovae edi, ecx sub rsi, rdi // Update the sequential feed parameters sub [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], edi mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rsi jmp @GotMediumBlock @AllocateNewSequentialFeed: // Use the optimal size for allocating this small block pool movzx size, word ptr [rbx].TSmallBlockType.OptimalBlockPoolSize push size // use "size" variable = first argument in current ABI call call AllocNewSequentialFeedMediumPool pop rdi // restore edi=blocksize and r10=MediumBlockInfo lea r10, [rip + MediumBlockInfo] mov rsi, rax test rax, rax jnz @GotMediumBlock // rsi=freeblock rbx=blocktype edi=blocksize mov [r10 + TMediumBlockInfo.Locked], al mov [rbx].TSmallBlockType.BlockTypeLocked, al {$ifdef MSWINDOWS} jmp @Done {$else} pop rbx ret {$endif MSWINDOWS} @UseWholeBlock: // rsi = free block, rbx = block type, edi = block size // Mark this block as used in the block following it and byte ptr [rsi + rdi - BlockHeaderSize], NOT PreviousMediumBlockIsFreeFlag @GotMediumBlock: // rsi = free block, rbx = block type, edi = block size // Set the size and flags for this block lea rcx, [rdi + IsMediumBlockFlag + IsSmallBlockPoolInUseFlag] mov [rsi - BlockHeaderSize], rcx // Unlock medium blocks and setup the block pool xor eax, eax mov [r10 + TMediumBlockInfo.Locked], al mov TSmallBlockPoolHeader[rsi].BlockType, rbx mov TSmallBlockPoolHeader[rsi].FirstFreeBlock, rax mov TSmallBlockPoolHeader[rsi].BlocksInUse, 1 mov [rbx].TSmallBlockType.CurrentSequentialFeedPool, rsi // Return the pointer to the first block, compute next/last block addresses lea rax, [rsi + SmallBlockPoolHeaderSize] movzx ecx, [rbx].TSmallBlockType.BlockSize lea rdx, [rax + rcx] mov [rbx].TSmallBlockType.NextSequentialFeedBlockAddress, rdx add rdi, rsi sub rdi, rcx mov [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress, rdi // Unlock the small block type, set header and leave mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false mov [rax - BlockHeaderSize], rsi pop rbx {$ifdef MSWINDOWS} pop rdi pop rsi {$endif MSWINDOWS} ret { ---------- MEDIUM block allocation ---------- } @NotTinySmallBlock: // Do we need a Large block? lea r10, [rip + MediumBlockInfo] cmp rcx, MaximumMediumBlockSize - BlockHeaderSize ja @IsALargeBlockRequest // Get the bin size for this block size (rounded up to the next bin size) lea rbx, [rcx + MediumBlockGranularity - 1 + BlockHeaderSize - MediumBlockSizeOffset] mov rcx, r10 and ebx, - MediumBlockGranularity add ebx, MediumBlockSizeOffset {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr] cmp byte ptr [rax], false je @MediumLocked2 {$endif FPCMM_ASSUMEMULTITHREAD} mov eax, $100 lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah je @MediumLocked2 call LockMediumBlocks @MediumLocked2: // Compute ecx = bin number in ecx and edx = group number lea rdx, [rbx - MinimumMediumBlockSize] mov ecx, edx shr edx, 8 + 5 shr ecx, 8 mov eax, - 1 shl eax, cl and eax, [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4] jz @GroupIsEmpty and ecx, - 32 bsf eax, eax or ecx, eax jmp @GotBinAndGroup @GroupIsEmpty: // Try all groups greater than this group mov eax, - 2 mov ecx, edx shl eax, cl and eax, [r10 + TMediumBlockInfo.BinGroupBitmap] jz @TrySequentialFeedMedium // There is a suitable group with enough space bsf edx, eax mov eax, [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4] bsf ecx, eax mov eax, edx shl eax, 5 or ecx, eax jmp @GotBinAndGroup @TrySequentialFeedMedium: mov ecx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft] // Can block be fed sequentially? sub ecx, ebx jc @AllocateNewSequentialFeedForMedium // Get the block address, store remaining bytes, set the flags and unlock mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed] sub rax, rbx mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rax mov [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], ecx or rbx, IsMediumBlockFlag mov [rax - BlockHeaderSize], rbx mov byte ptr [r10 + TMediumBlockInfo.Locked], false {$ifdef MSWINDOWS} jmp @Done {$else} pop rbx ret {$endif MSWINDOWS} @AllocateNewSequentialFeedForMedium: mov size, rbx // 'size' variable is the first argument register in ABI call call AllocNewSequentialFeedMediumPool mov byte ptr [rip + MediumBlockInfo.Locked], false // r10 has been overwritten {$ifdef MSWINDOWS} jmp @Done {$else} pop rbx ret {$endif MSWINDOWS} @GotBinAndGroup: // ebx = block size, ecx = bin number, edx = group number // Compute rdi = @bin, rsi = free block lea rax, [rcx + rcx] lea rdi, [r10 + TMediumBlockInfo.Bins + rax * 8] mov rsi, TMediumFreeBlock[rdi].NextFreeBlock // Remove the first block from the linked list (LIFO) mov rax, TMediumFreeBlock[rsi].NextFreeBlock mov TMediumFreeBlock[rdi].NextFreeBlock, rax mov TMediumFreeBlock[rax].PreviousFreeBlock, rdi // Is this bin now empty? cmp rdi, rax jne @MediumBinNotEmptyForMedium // edx = bin group number, ecx = bin number, rdi = @bin, rsi = free block, ebx = block size // Flag this bin and group as empty mov eax, - 2 mov r11d, [r10 + TMediumBlockInfo.BinGroupBitmap] rol eax, cl btr r11d, edx // btr reg,reg is faster than btr [mem],reg and [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax jnz @MediumBinNotEmptyForMedium mov [r10 + TMediumBlockInfo.BinGroupBitmap], r11d @MediumBinNotEmptyForMedium: // rsi = free block, ebx = block size // Get rdi = size of the available medium block, rdx = second split size mov rdi, DropMediumAndLargeFlagsMask and rdi, [rsi - BlockHeaderSize] mov edx, edi sub edx, ebx jz @UseWholeBlockForMedium // Split the block in two lea rcx, [rsi + rbx] lea rax, [rdx + IsMediumBlockFlag + IsFreeBlockFlag] mov [rcx - BlockHeaderSize], rax // Store the size of the second split as the second last pointer mov [rcx + rdx - 16], rdx // Put the remainder in a bin cmp edx, MinimumMediumBlockSize jb @GotMediumBlockForMedium call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize jmp @GotMediumBlockForMedium @UseWholeBlockForMedium: // Mark this block as used in the block following it and byte ptr [rsi + rdi - BlockHeaderSize], NOT PreviousMediumBlockIsFreeFlag @GotMediumBlockForMedium: // Set the size and flags for this block lea rcx, [rbx + IsMediumBlockFlag] mov [rsi - BlockHeaderSize], rcx // Unlock medium blocks and leave mov byte ptr [r10 + TMediumBlockInfo.Locked], false mov rax, rsi {$ifdef MSWINDOWS} jmp @Done {$else} pop rbx ret {$endif MSWINDOWS} { ---------- LARGE block allocation ---------- } @IsALargeBlockRequest: xor rax, rax test rcx, rcx js @DoneLarge // Note: size is still in the rcx/rdi first param register call AllocateLargeBlock @DoneLarge: {$ifdef MSWINDOWS} jmp @Done {$else} pop rbx {$endif MSWINDOWS} end; function FreeMediumBlock(arg1: pointer): PtrUInt; nostackframe; assembler; // rcx=P rdx=[P-BlockHeaderSize] asm // Drop the flags, set r10=MediumBlockInfo r11=P rbx=blocksize lea r10, [rip + MediumBlockInfo] and rdx, DropMediumAndLargeFlagsMask push rbx push rdx // save blocksize mov rbx, rdx mov r11, rcx // Lock the Medium blocks mov rcx, r10 {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr] cmp byte ptr [rax], false je @MediumBlocksLocked {$endif FPCMM_ASSUMEMULTITHREAD} mov eax, $100 lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah je @MediumBlocksLocked call LockMediumBlocks @MediumBlocksLocked: // Get rcx = next block size and flags mov rcx, [r11 + rbx - BlockHeaderSize] // Can we combine this block with the next free block? test qword ptr [r11 + rbx - BlockHeaderSize], IsFreeBlockFlag jnz @NextBlockIsFree // Set the "PreviousIsFree" flag in the next block or rcx, PreviousMediumBlockIsFreeFlag mov [r11 + rbx - BlockHeaderSize], rcx @NextBlockChecked: // Re-read the flags and try to combine with previous free block test byte ptr [r11 - BlockHeaderSize], PreviousMediumBlockIsFreeFlag jnz @PreviousBlockIsFree @PreviousBlockChecked: // Check if entire medium block pool is free cmp ebx, (MediumBlockPoolSize - MediumBlockPoolHeaderSize) je @EntireMediumPoolFree @BinFreeMediumBlock: // Store size of the block, flags and trailing size marker and insert into bin lea rax, [rbx + IsMediumBlockFlag + IsFreeBlockFlag] mov [r11 - BlockHeaderSize], rax mov [r11 + rbx - 16], rbx mov rcx, r11 mov rdx, rbx call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize // Unlock medium blocks and leave mov byte ptr [r10 + TMediumBlockInfo.Locked], false pop rax // medium block size pop rbx ret @NextBlockIsFree: // Get rax = next block address, rbx = end of the block lea rax, [r11 + rbx] and rcx, DropMediumAndLargeFlagsMask add rbx, rcx // Was the block binned? cmp rcx, MinimumMediumBlockSize jb @NextBlockChecked mov rcx, rax call RemoveMediumFreeBlock // rcx = APMediumFreeBlock jmp @NextBlockChecked @PreviousBlockIsFree: // Get rcx = size/point of the previous free block, rbx = new block end mov rcx, [r11 - 16] sub r11, rcx add rbx, rcx // Remove the previous block from the linked list cmp ecx, MinimumMediumBlockSize jb @PreviousBlockChecked mov rcx, r11 call RemoveMediumFreeBlock // rcx = APMediumFreeBlock jmp @PreviousBlockChecked @EntireMediumPoolFree: // Ensure current sequential feed pool is free cmp dword ptr [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], MediumBlockPoolSize - MediumBlockPoolHeaderSize jne @MakeEmptyMediumPoolSequentialFeed // Remove this medium block pool from the linked list stored in its header sub r11, MediumBlockPoolHeaderSize mov rax, TMediumBlockPoolHeader[r11].PreviousMediumBlockPoolHeader mov rdx, TMediumBlockPoolHeader[r11].NextMediumBlockPoolHeader mov TMediumBlockPoolHeader[rax].NextMediumBlockPoolHeader, rdx mov TMediumBlockPoolHeader[rdx].PreviousMediumBlockPoolHeader, rax // Unlock medium blocks and free the block pool mov byte ptr [r10 + TMediumBlockInfo.Locked], false mov arg1, r11 call FreeMedium pop rax // medium block size pop rbx ret @MakeEmptyMediumPoolSequentialFeed: // Get rbx = end-marker block, and recycle the current sequential feed pool lea rbx, [r11 + MediumBlockPoolSize - MediumBlockPoolHeaderSize] call BinMediumSequentialFeedRemainder // Set this medium pool up as the new sequential feed pool, unlock and leave mov qword ptr [rbx - BlockHeaderSize], IsMediumBlockFlag mov dword ptr [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], MediumBlockPoolSize - MediumBlockPoolHeaderSize mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rbx mov byte ptr [r10 + TMediumBlockInfo.Locked], false pop rax pop rbx end; procedure FreeSmallLocked; nostackframe; assembler; // rbx=TSmallBlockType rcx=P rdx=TSmallBlockPoolHeader asm // Adjust number of blocks in use, set rax = old first free block inc [rbx].TSmallBlockType.FreememCount mov rax, [rdx].TSmallBlockPoolHeader.FirstFreeBlock sub [rdx].TSmallBlockPoolHeader.BlocksInUse, 1 jz @PoolIsNowEmpty // Store this as the new first free block mov [rdx].TSmallBlockPoolHeader.FirstFreeBlock, rcx // Store the previous first free block as the block header lea r9, [rax + IsFreeBlockFlag] mov [rcx - BlockHeaderSize], r9 // Was the pool full? test rax, rax jnz @SmallPoolWasNotFull // Insert the pool back into the linked list if it was full mov rcx, [rbx].TSmallBlockType.NextPartiallyFreePool mov [rdx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rbx mov [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool, rcx mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rdx mov [rbx].TSmallBlockType.NextPartiallyFreePool, rdx @SmallPoolWasNotFull: {$ifdef FPCMM_LOCKLESSFREE} // Try to release all pending bin from this block while we have the lock cmp byte ptr [rbx].TSmallBlockType.BinCount, 0 jne @ProcessPendingBin {$endif FPCMM_LOCKLESSFREE} mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false ret @PoolIsNowEmpty: // FirstFreeBlock=nil means it is the sequential feed pool with a single block test rax, rax jz @IsSequentialFeedPool // Pool is now empty: Remove it from the linked list and free it mov rax, [rdx].TSmallBlockPoolHeader.PreviousPartiallyFreePool mov rcx, [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool mov TSmallBlockPoolHeader[rax].NextPartiallyFreePool, rcx mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rax // Is this the sequential feed pool? If so, stop sequential feeding xor eax, eax cmp [rbx].TSmallBlockType.CurrentSequentialFeedPool, rdx jne @NotSequentialFeedPool @IsSequentialFeedPool: mov [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress, rax @NotSequentialFeedPool: // Unlock blocktype and release this pool mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false mov rcx, rdx mov rdx, qword ptr [rdx - BlockHeaderSize] jmp FreeMediumBlock // no call nor BinLocked to avoid race condition {$ifdef FPCMM_LOCKLESSFREE} @ProcessPendingBin: // Try twice to acquire BinLocked (spinning now may induce race condition) mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah je @BinLocked pause mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah jne @BinAlreadyLocked @BinLocked: movzx eax, byte ptr [rbx].TSmallBlockType.BinCount test al, al jz @NoBin // free last pointer in TSmallBlockType.BinInstance[] mov rcx, qword ptr [rbx + TSmallBlockType.BinInstance - 8 + rax * 8] dec byte ptr [rbx].TSmallBlockType.BinCount mov byte ptr [rbx].TSmallBlockType.BinLocked, false mov rdx, [rcx - BlockHeaderSize] jmp FreeSmallLocked @NoBin: mov byte ptr [rbx].TSmallBlockType.BinLocked, false @BinAlreadyLocked: mov byte ptr [rbx].TSmallBlockType.BlockTypeLocked, false {$endif FPCMM_LOCKLESSFREE} end; function _FreeMem(P: pointer): PtrUInt; nostackframe; assembler; asm xor eax, eax {$ifndef MSWINDOWS} mov rcx, P {$endif MSWINDOWS} test P, P jz @VoidPointer {$ifdef FPCMM_REPORTMEMORYLEAKS} mov qword ptr [P], rax // reset TObject VMT or string/dynarray header {$endif FPCMM_REPORTMEMORYLEAKS} mov rdx, qword ptr [P - BlockHeaderSize] {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, qword ptr [rip + SmallBlockInfo].TSmallBlockInfo.IsMultiThreadPtr {$endif FPCMM_ASSUMEMULTITHREAD} // Is it a small block in use? test dl, IsFreeBlockFlag + IsMediumBlockFlag + IsLargeBlockFlag jnz @NotSmallBlockInUse // Get the small block type in rbx and try to grab it push rbx mov rbx, [rdx].TSmallBlockPoolHeader.BlockType {$ifdef FPCMM_ASSUMEMULTITHREAD} mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah jne @CheckTinySmallLock {$else} cmp byte ptr [rax], 0 jne @TinySmallLockLoop {$endif FPCMM_ASSUMEMULTITHREAD} @FreeAndUnlock: call FreeSmallLocked movzx eax, word ptr [rbx].TSmallBlockType.BlockSize pop rbx @VoidPointer: ret @NotSmallBlockInUse: test dl, IsFreeBlockFlag + IsLargeBlockFlag jz FreeMediumBlock jmp FreeLargeBlock // P is still in rcx/rdi first param register @TinySmallLockLoop: mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah je @FreeAndUnlock @CheckTinySmallLock: {$ifdef FPCMM_LOCKLESSFREE} // Try to put rcx=P in TSmallBlockType.BinInstance[] cmp byte ptr [rbx].TSmallBlockType.BinCount, SmallBlockBinCount je @LockBlockTypeSleep mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah je @BinLocked {$ifdef FPCMM_PAUSE} push rdx rdtsc shl rdx, 32 lea r9, [rax + rdx + SpinSmallFreememBinTSC] // r9 = endtsc @SpinBinLock: pause rdtsc shl rdx, 32 or rax, rdx cmp rax, r9 ja @SpinTimeout cmp byte ptr [rbx].TSmallBlockType.BinLocked, true je @SpinBinLock mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah jne @SpinBinLock pop rdx jmp @BinLocked @SpinTimeout: pop rdx {$endif FPCMM_PAUSE} {$ifdef FPCMM_DEBUG} inc dword ptr [rbx].TSmallBlockType.BinSpinCount // no lock (informative only) {$endif FPCMM_DEBUG} jmp @LockBlockTypeSleep @BinLocked: movzx eax, byte ptr [rbx].TSmallBlockType.BinCount cmp al, SmallBlockBinCount je @LockBlockType inc byte ptr [rbx].TSmallBlockType.BinCount mov [rbx + TSmallBlockType.BinInstance + rax * 8], rcx mov byte ptr [rbx].TSmallBlockType.BinLocked, false movzx eax, word ptr [rbx].TSmallBlockType.BlockSize pop rbx ret @LockBlockType: // Fallback to main block lock if TSmallBlockType.BinInstance[] is full mov byte ptr [rbx].TSmallBlockType.BinLocked, false {$endif FPCMM_LOCKLESSFREE} @LockBlockTypeSleep: {$ifdef FPCMM_PAUSE} // Spin to grab the block type (don't try too long due to contention) push rdx rdtsc shl rdx, 32 lea r9, [rax + rdx + SpinSmallFreememLockTSC] // r9 = endtsc @SpinLockBlockType: pause rdtsc shl rdx, 32 or rax, rdx cmp rax, r9 ja @LockBlockTypeReleaseCore cmp byte ptr [rbx].TSmallBlockType.BlockTypeLocked, 1 je @SpinLockBlockType mov eax, $100 lock cmpxchg byte ptr [rbx].TSmallBlockType.BlockTypeLocked, ah jne @SpinLockBlockType pop rdx jmp @FreeAndUnlock @LockBlockTypeReleaseCore: pop rdx {$endif FPCMM_PAUSE} // Couldn't grab the block type - sleep and try again lock inc dword ptr [rbx].TSmallBlockType.FreeMemSleepCount push rdx push rcx call ReleaseCore pop rcx pop rdx jmp @TinySmallLockLoop end; // warning: FPC signature is not the same than Delphi: requires "var P" function _ReallocMem(var P: pointer; Size: PtrUInt): pointer; nostackframe; assembler; asm {$ifdef MSWINDOWS} push rdi push rsi {$else} mov rdx, Size {$endif MSWINDOWS} push rbx push r14 push P // for assignement in @Done mov r14, qword ptr [P] test rdx, rdx jz @VoidSize // ReallocMem(P,0)=FreeMem(P) test r14, r14 jz @GetMemMoveFreeMem // ReallocMem(nil,Size)=GetMem(Size) mov rcx, [r14 - BlockHeaderSize] test cl, IsFreeBlockFlag + IsMediumBlockFlag + IsLargeBlockFlag jnz @NotASmallBlock { -------------- TINY/SMALL block ------------- } // Get rbx=blocktype, rcx=available size, rax=inplaceresize mov rbx, [rcx].TSmallBlockPoolHeader.BlockType lea rax, [rdx * 4 + SmallBlockDownsizeCheckAdder] movzx ecx, [rbx].TSmallBlockType.BlockSize sub ecx, BlockHeaderSize cmp rcx, rdx jb @SmallUpsize // Downsize or small growup with enough space: reallocate only if need cmp eax, ecx jb @GetMemMoveFreeMem // r14=P rdx=size @NoResize: mov rax, r14 // keep original pointer pop rcx pop r14 pop rbx {$ifdef MSWINDOWS} pop rsi pop rdi {$endif MSWINDOWS} ret @VoidSize: push rdx // to set P=nil jmp @DoFree // ReallocMem(P,0)=FreeMem(P) @SmallUpsize: // State: r14=pointer, rdx=NewSize, rcx=CurrentBlockSize, rbx=CurrentBlockType // Small blocks always grow with at least 100% + SmallBlockUpsizeAdder bytes lea P, qword ptr [rcx * 2 + SmallBlockUpsizeAdder] movzx ebx, [rbx].TSmallBlockType.BlockSize sub ebx, BlockHeaderSize + 8 // r14=pointer, P=NextUpBlockSize, rdx=NewSize, rbx=OldSize-8 @AdjustGetMemMoveFreeMem: // New allocated size is max(requestedsize, minimumupsize) cmp rdx, P cmova P, rdx push rdx call _GetMem pop rdx test rax, rax jz @Done jmp @MoveFreeMem // rax=New r14=P rbx=size-8 @GetMemMoveFreeMem: // reallocate copy and free: r14=P rdx=size mov rbx, rdx mov P, rdx // P is the proper first argument register call _GetMem test rax, rax jz @Done test r14, r14 // ReallocMem(nil,Size)=GetMem(Size) jz @Done sub rbx, 8 @MoveFreeMem: // copy and free: rax=New r14=P rbx=size-8 push rax lea rcx, [r14 + rbx] lea rdx, [rax + rbx] neg rbx jns @Last8 align 16 @MoveBy16: movaps xmm0, oword ptr [rcx + rbx] movaps oword ptr [rdx + rbx], xmm0 add rbx, 16 js @MoveBy16 @Last8: mov rax, qword ptr [rcx + rbx] mov qword ptr [rdx + rbx], rax @DoFree: mov P, r14 call _FreeMem pop rax @Done: pop rcx pop r14 pop rbx {$ifdef MSWINDOWS} pop rsi pop rdi {$endif MSWINDOWS} mov qword ptr [rcx], rax // store new pointer in var P ret @NotASmallBlock: // Is this a medium block or a large block? test cl, IsFreeBlockFlag + IsLargeBlockFlag jnz @PossibleLargeBlock { -------------- MEDIUM block ------------- } // rcx = Current Size + Flags, r14 = P, rdx = Requested Size, r10 = MediumBlockInfo lea rsi, [rdx + rdx] lea r10, [rip + MediumBlockInfo] mov rbx, rcx and ecx, DropMediumAndLargeFlagsMask lea rdi, [r14 + rcx] sub ecx, BlockHeaderSize and ebx, ExtractMediumAndLargeFlagsMask // Is it an upsize or a downsize? cmp rdx, rcx ja @MediumBlockUpsize // rcx = Current Block Size - BlockHeaderSize, rbx = Current Block Flags, // rdi = @Next Block, r14 = P, rdx = Requested Size // Downsize reallocate and move data only if less than half the current size cmp rsi, rcx jae @NoResize // In-place downsize? Ensure not smaller than MinimumMediumBlockSize cmp edx, MinimumMediumBlockSize - BlockHeaderSize jae @MediumBlockInPlaceDownsize // Need to move to another Medium block pool, or into a Small block? cmp edx, MediumInPlaceDownsizeLimit jb @GetMemMoveFreeMem // No need to realloc: resize in-place (if not already at the minimum size) mov edx, MinimumMediumBlockSize - BlockHeaderSize cmp ecx, MinimumMediumBlockSize - BlockHeaderSize jna @NoResize @MediumBlockInPlaceDownsize: // Round up to the next medium block size lea rsi, [rdx + BlockHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset] and rsi, - MediumBlockGranularity add rsi, MediumBlockSizeOffset // Get the size of the second split add ecx, BlockHeaderSize sub ecx, esi mov ebx, ecx // Lock the medium blocks mov rcx, r10 {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr] cmp byte ptr [rax], false je @MediumBlocksLocked1 {$endif FPCMM_ASSUMEMULTITHREAD} mov eax, $100 lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah je @MediumBlocksLocked1 call LockMediumBlocks @MediumBlocksLocked1: mov ecx, ebx // Reread the flags - may have changed before medium blocks could be locked mov rbx, ExtractMediumAndLargeFlagsMask and rbx, [r14 - BlockHeaderSize] @DoMediumInPlaceDownsize: // Set the new size in header, and get rbx = second split size or rbx, rsi mov [r14 - BlockHeaderSize], rbx mov ebx, ecx // If the next block is used, flag its previous block as free mov rdx, [rdi - BlockHeaderSize] test dl, IsFreeBlockFlag jnz @MediumDownsizeNextBlockFree or rdx, PreviousMediumBlockIsFreeFlag mov [rdi - BlockHeaderSize], rdx jmp @MediumDownsizeDoSplit @MediumDownsizeNextBlockFree: // If the next block is free, combine both mov rcx, rdi and rdx, DropMediumAndLargeFlagsMask add rbx, rdx add rdi, rdx cmp edx, MinimumMediumBlockSize jb @MediumDownsizeDoSplit call RemoveMediumFreeBlock // rcx=APMediumFreeBlock @MediumDownsizeDoSplit: // Store the trailing size field and free part header mov [rdi - 16], rbx lea rcx, [rbx + IsMediumBlockFlag + IsFreeBlockFlag]; mov [r14 + rsi - BlockHeaderSize], rcx // Bin this free block (if worth it) cmp rbx, MinimumMediumBlockSize jb @MediumBlockDownsizeDone lea rcx, [r14 + rsi] mov rdx, rbx call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize @MediumBlockDownsizeDone: // Unlock the medium blocks, and leave with the new pointer mov byte ptr [r10 + TMediumBlockInfo.Locked], false mov rax, r14 jmp @Done @MediumBlockUpsize: // ecx = Current Block Size - BlockHeaderSize, bl = Current Block Flags, // rdi = @Next Block, r14 = P, rdx = Requested Size // Try to make in-place upsize mov rax, [rdi - BlockHeaderSize] test al, IsFreeBlockFlag jz @CannotUpsizeMediumBlockInPlace // Get rax = available size, rsi = available size with the next block and rax, DropMediumAndLargeFlagsMask lea rsi, [rax + rcx] cmp rdx, rsi ja @CannotUpsizeMediumBlockInPlace // Grow into the next block mov rbx, rcx mov rcx, r10 {$ifndef FPCMM_ASSUMEMULTITHREAD} mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr] cmp byte ptr [rax], false je @MediumBlocksLocked2 {$endif FPCMM_ASSUMEMULTITHREAD} mov eax, $100 lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah je @MediumBlocksLocked2 mov rsi, rdx call LockMediumBlocks mov rdx, rsi @MediumBlocksLocked2: // Re-read info once locked, and ensure next block is still free mov rcx, rbx mov rbx, ExtractMediumAndLargeFlagsMask and rbx, [r14 - BlockHeaderSize] mov rax, [rdi - BlockHeaderSize] test al, IsFreeBlockFlag jz @NextMediumBlockChanged and eax, DropMediumAndLargeFlagsMask lea rsi, [rax + rcx] cmp rdx, rsi ja @NextMediumBlockChanged @DoMediumInPlaceUpsize: // Bin next free block (if worth it) cmp eax, MinimumMediumBlockSize jb @MediumInPlaceNoNextRemove push rcx push rdx mov rcx, rdi call RemoveMediumFreeBlock // rcx=APMediumFreeBlock pop rdx pop rcx @MediumInPlaceNoNextRemove: // Medium blocks grow a minimum of 25% in in-place upsizes mov eax, ecx shr eax, 2 add eax, ecx // Get the maximum of the requested size and the minimum growth size xor edi, edi sub eax, edx adc edi, - 1 and eax, edi // Round up to the nearest block size granularity lea rax, [rax + rdx + BlockHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset] and eax, - MediumBlockGranularity add eax, MediumBlockSizeOffset // Calculate the size of the second split and check if it fits lea rdx, [rsi + BlockHeaderSize] sub edx, eax ja @MediumInPlaceUpsizeSplit // Grab the whole block: Mark it as used in the next block, and adjust size and qword ptr [r14 + rsi], NOT PreviousMediumBlockIsFreeFlag add rsi, BlockHeaderSize jmp @MediumUpsizeInPlaceDone @MediumInPlaceUpsizeSplit: // Store the size of the second split as the second last pointer mov [r14 + rsi - BlockHeaderSize], rdx // Set the second split header lea rdi, [rdx + IsMediumBlockFlag + IsFreeBlockFlag] mov [r14 + rax - BlockHeaderSize], rdi mov rsi, rax cmp edx, MinimumMediumBlockSize jb @MediumUpsizeInPlaceDone lea rcx, [r14 + rax] call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize @MediumUpsizeInPlaceDone: // No need to move data at upsize: set the size and flags for this block or rsi, rbx mov [r14 - BlockHeaderSize], rsi mov byte ptr [r10 + TMediumBlockInfo.Locked], false mov rax, r14 jmp @Done @NextMediumBlockChanged: // The next block changed during lock: reallocate and move data mov byte ptr [r10 + TMediumBlockInfo.Locked], false @CannotUpsizeMediumBlockInPlace: // rcx=OldSize-8, rdx=NewSize mov rbx, rcx mov eax, ecx shr eax, 2 lea P, qword ptr [rcx + rax] // NextUpBlockSize = OldSize+25% jmp @AdjustGetMemMoveFreeMem // P=BlockSize, rdx=NewSize, rbx=OldSize-8 @PossibleLargeBlock: { -------------- LARGE block ------------- } test cl, IsFreeBlockFlag + IsMediumBlockFlag jnz @Error {$ifdef MSWINDOWS} mov rcx, r14 {$else} mov rdi, r14 mov rsi, rdx {$endif MSWINDOWS} call ReallocateLargeBlock // with restored proper registers jmp @Done @Error: xor eax, eax jmp @Done end; function _AllocMem(Size: PtrUInt): pointer; nostackframe; assembler; asm push rbx // Get rbx = size rounded down to the previous multiple of SizeOf(pointer) lea rbx, [Size - 1] and rbx, - 8 call _GetMem // Could a block be allocated? rcx = 0 if yes, -1 if no cmp rax, 1 sbb rcx, rcx // Point rdx to the last pointer lea rdx, [rax + rbx] // Compute Size (1..8 doesn't need to enter the SSE2 loop) or rbx, rcx jz @LastQ // Large blocks from mmap/VirtualAlloc are already zero filled cmp rbx, MaximumMediumBlockSize - BlockHeaderSize jae @Done neg rbx pxor xmm0, xmm0 align 16 @FillLoop: // non-temporal movntdq not needed when size <256KB (small/medium) movaps oword ptr [rdx + rbx], xmm0 add rbx, 16 js @FillLoop @LastQ: xor rcx, rcx mov qword ptr [rdx], rcx @Done: pop rbx end; function _MemSize(P: pointer): PtrUInt; begin // AFAIK used only by fpc_AnsiStr_SetLength() in RTL P := PPointer(PByte(P) - BlockHeaderSize)^; if (PtrUInt(P) and (IsMediumBlockFlag or IsLargeBlockFlag)) = 0 then result := PSmallBlockPoolHeader(PtrUInt(P) and DropSmallFlagsMask). BlockType.BlockSize - BlockHeaderSize else begin result := (PtrUInt(P) and DropMediumAndLargeFlagsMask) - BlockHeaderSize; if (PtrUInt(P) and IsMediumBlockFlag) = 0 then dec(result, LargeBlockHeaderSize); end; end; function _FreeMemSize(P: pointer; size: PtrUInt): PtrInt; begin // should return the chunk size - only used by heaptrc AFAIK if (P <> nil) and (size <> 0) then result := _FreeMem(P) else result := 0; end; { ********* Information Gathering } {$ifdef FPCMM_STANDALONE} procedure Assert(flag: boolean); begin end; {$else} function _GetHeapStatus: THeapStatus; begin FillChar(result, sizeof(result), 0); end; function _GetFPCHeapStatus: TFPCHeapStatus; begin FillChar(result, sizeof(result), 0); end; type // match both TSmallBlockStatus and TSmallBlockContention TRes = array[0..2] of cardinal; // details are allocated on the stack, not the heap TResArray = array[0..(NumSmallInfoBlock * 2) - 1] of TRes; procedure QuickSortRes(var Res: TResArray; L, R, Level: PtrInt); var I, J, P: PtrInt; pivot: cardinal; tmp: TRes; begin if L < R then repeat I := L; J := R; P := (L + R) shr 1; repeat pivot := Res[P, Level]; while Res[I, Level] > pivot do inc(I); while Res[J, Level] < pivot do dec(J); if I <= J then begin tmp := Res[J]; Res[J] := Res[I]; Res[I] := tmp; if P = I then P := J else if P = J then P := I; inc(I); dec(J); end; until I > J; if J - L < R - I then begin // use recursion only for smaller range if L < J then QuickSortRes(Res, L, J, Level); L := I; end else begin if I < R then QuickSortRes(Res, I, R, Level); R := J; end; until L >= R; end; procedure SetSmallBlockStatus(var res: TResArray); var i, a: integer; p: PSmallBlockType; d: ^TSmallBlockStatus; begin d := @res; p := @SmallBlockInfo; for i := 1 to NumSmallBlockTypes do begin d^.Total := p^.GetmemCount; d^.Current := p^.GetmemCount - p^.FreememCount; d^.BlockSize := p^.BlockSize; inc(d); inc(p); end; for a := 1 to NumTinyBlockArenas do begin d := @res; // aggregate counters for i := 1 to NumTinyBlockTypes do begin inc(d^.Total, p^.GetmemCount); inc(d^.Current, p^.GetmemCount - p^.FreememCount); inc(d); inc(p); end; end; assert(p = @SmallBlockInfo.GetmemLookup); end; function SortSmallBlockStatus(var res: TResArray; maxcount, orderby: PtrInt; count, bytes: PPtrUInt): PtrInt; var i: PtrInt; begin QuickSortRes(res, 0, NumSmallBlockTypes - 1, orderby); if count <> nil then begin count^ := 0; for i := 0 to NumSmallBlockTypes - 1 do inc(count^, res[i, orderby]); end; if bytes <> nil then begin bytes^ := 0; for i := 0 to NumSmallBlockTypes - 1 do inc(bytes^, res[i, orderby] * res[i, ord(obBlockSize)]); end; result := maxcount; if result > NumSmallBlockTypes then result := NumSmallBlockTypes; while (result > 0) and (res[result - 1, orderby] = 0) do dec(result); end; function SetSmallBlockContention(var res: TResArray; maxcount: integer): integer; var i: integer; p: PSmallBlockType; d: ^TSmallBlockContention; begin result := 0; d := @res; p := @SmallBlockInfo; for i := 1 to NumSmallInfoBlock do begin if p^.GetmemSleepCount <> 0 then begin d^.SleepCount := p^.GetmemSleepCount; d^.GetmemBlockSize := p^.BlockSize; d^.FreememBlockSize := 0; inc(d); inc(result); end; if p^.FreememSleepCount <> 0 then begin d^.SleepCount := p^.FreememSleepCount; d^.GetmemBlockSize := 0; d^.FreememBlockSize := p^.BlockSize; inc(d); inc(result); end; inc(p); end; if result = 0 then exit; QuickSortRes(res, 0, result - 1, 0); if result > maxcount then result := maxcount; end; function K(i: PtrUInt): shortstring; var tmp: string[1]; begin if i >= 1 shl 50 then begin i := i shr 50; tmp := 'Z'; end else if i >= 1 shl 40 then begin i := i shr 40; tmp := 'T'; end else if i >= 1 shl 30 then begin i := i shr 30; tmp := 'G'; end else if i >= 1 shl 20 then begin i := i shr 20; tmp := 'M'; end else if i >= 1 shl 10 then begin i := i shr 10; tmp := 'K'; end else tmp := ''; str(i, result); result := result + tmp; end; {$I-} procedure WriteHeapStatusDetail(const arena: TMMStatusArena; const name: shortstring); begin write(name, K(arena.CurrentBytes):4, 'B/', K(arena.CumulativeBytes), 'B '); {$ifdef FPCMM_DEBUG} write(' peak=', K(arena.PeakBytes), 'B current=', K(arena.CumulativeAlloc - arena.CumulativeFree), ' alloc=', K(arena.CumulativeAlloc), ' free=', K(arena.CumulativeFree)); {$endif FPCMM_DEBUG} writeln(' sleep=', K(arena.SleepCount)); end; procedure WriteHeapStatus(const context: shortstring; smallblockstatuscount, smallblockcontentioncount: integer; compilationflags: boolean); var res: TResArray; // no heap allocation involved i, n, smallcount: PtrInt; t, b: PtrUInt; begin if context[0] <> #0 then writeln(context); if compilationflags then writeln(' Flags: ' {$ifdef FPCMM_BOOSTER} + 'BOOSTER ' {$else} {$ifdef FPCMM_BOOST} + 'BOOST ' {$endif}{$endif} {$ifdef FPCMM_SERVER} + 'SERVER ' {$endif} {$ifdef FPCMM_ASSUMEMULTITHREAD} + ' assumulthrd' {$endif} {$ifdef FPCMM_LOCKLESSFREE} + ' lockless' {$endif} {$ifdef FPCMM_PAUSE} + ' pause' {$endif} {$ifdef FPCMM_NOMREMAP} + ' nomremap' {$endif} {$ifdef FPCMM_DEBUG} + ' debug' {$endif} {$ifdef FPCMM_REPORTMEMORYLEAKS} + ' repmemleak' {$endif}); with CurrentHeapStatus do begin writeln(' Small: blocks=', K(SmallBlocks), ' size=', K(SmallBlocksSize), 'B (part of Medium arena)'); WriteHeapStatusDetail(Medium, ' Medium: '); WriteHeapStatusDetail(Large, ' Large: '); if SleepCount <> 0 then writeln(' Total Sleep: count=', K(SleepCount) {$ifdef FPCMM_DEBUG} , ' rdtsc=', K(SleepCycles) {$endif}); smallcount := SmallGetmemSleepCount + SmallFreememSleepCount; if smallcount <> 0 then writeln(' Small Sleep: getmem=', K(SmallGetmemSleepCount), ' freemem=', K(SmallFreememSleepCount) {$ifdef FPCMM_LOCKLESSFREE} {$ifdef FPCMM_DEBUG} , ' locklessspin=', K(SmallFreememLockLessSpin) {$endif} {$endif} ); end; if (smallblockcontentioncount > 0) and (smallcount <> 0) then begin n := SetSmallBlockContention(res, smallblockcontentioncount); for i := 0 to n - 1 do with TSmallBlockContention(res[i]) do begin if GetmemBlockSize <> 0 then write(' getmem(', GetmemBlockSize) else write(' freemem(', FreememBlockSize); write(')=' , K(SleepCount)); if (i and 3 = 3) or (i = n - 1) then writeln; end; end; if smallblockstatuscount > 0 then begin SetSmallBlockStatus(res); n := SortSmallBlockStatus(res, smallblockstatuscount, ord(obTotal), @t, @b) - 1; writeln(' Small Blocks since beginning: ', K(t), '/', K(b), 'B'); for i := 0 to n do with TSmallBlockStatus(res[i]) do begin write(' ', BlockSize, '=', K(Total)); if (i and 7 = 7) or (i = n) then writeln; end; n := SortSmallBlockStatus(res, smallblockstatuscount, ord(obCurrent), @t, @b) - 1; writeln(' Small Blocks current: ', K(t), '/', K(b), 'B'); for i := 0 to n do with TSmallBlockStatus(res[i]) do begin write(' ', BlockSize, '=', K(Current)); if (i and 7 = 7) or (i = n) then writeln; end; end; end; {$I+} function GetSmallBlockStatus(maxcount: integer; orderby: TSmallBlockOrderBy; count, bytes: PPtrUInt): TSmallBlockStatusDynArray; var res: TResArray; begin assert(SizeOf(TRes) = SizeOf(TSmallBlockStatus)); result := nil; if maxcount <= 0 then exit; SetSmallBlockStatus(res); maxcount := SortSmallBlockStatus(res, maxcount, ord(orderby), count, bytes); if maxcount = 0 then exit; SetLength(result, maxcount); Move(res[0], result[0], maxcount * SizeOf(res[0])); end; function GetSmallBlockContention(maxcount: integer): TSmallBlockContentionDynArray; var n: integer; res: TResArray; begin result := nil; if maxcount <= 0 then exit; n := SetSmallBlockContention(res, maxcount); if n = 0 then exit; SetLength(result, n); Move(res[0], result[0], n * SizeOf(res[0])); end; {$endif FPCMM_STANDALONE} function CurrentHeapStatus: TMMStatus; var i: integer; small: PtrUInt; p: PSmallBlockType; begin result := HeapStatus; p := @SmallBlockInfo; for i := 1 to NumSmallInfoBlock do begin inc(result.SmallGetmemSleepCount, p^.GetmemSleepCount); inc(result.SmallFreememSleepCount, p^.FreememSleepCount); small := p^.GetmemCount - p^.FreememCount; if small <> 0 then begin inc(result.SmallBlocks, small); inc(result.SmallBlocksSize, small * p^.BlockSize); end; {$ifdef FPCMM_LOCKLESSFREE} {$ifdef FPCMM_DEBUG} inc(result.SmallFreememLockLessSpin, p^.BinSpinCount); {$endif FPCMM_LOCKLESSFREE} {$endif FPCMM_DEBUG} inc(p); end; end; { ********* Initialization and Finalization } procedure InitializeMemoryManager; var small: PSmallBlockType; a, i, min, poolsize, num, perpool, size, start, next: PtrInt; medium: PMediumFreeBlock; begin small := @SmallBlockInfo; assert(SizeOf(small^) = 1 shl SmallBlockTypePO2); for a := 0 to NumTinyBlockArenas do for i := 0 to NumSmallBlockTypes - 1 do begin if (i = NumTinyBlockTypes) and (a > 0) then break; size := SmallBlockSizes[i]; assert(size and 15 = 0); small^.BlockSize := size; small^.PreviousPartiallyFreePool := pointer(small); small^.NextPartiallyFreePool := pointer(small); small^.MaxSequentialFeedBlockAddress := pointer(0); small^.NextSequentialFeedBlockAddress := pointer(1); min := ((size * MinimumSmallBlocksPerPool + (SmallBlockPoolHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset)) and -MediumBlockGranularity) + MediumBlockSizeOffset; if min < MinimumMediumBlockSize then min := MinimumMediumBlockSize; num := (min + (- MinimumMediumBlockSize + MediumBlockBinsPerGroup * MediumBlockGranularity div 2)) div (MediumBlockBinsPerGroup * MediumBlockGranularity); if num > 7 then num := 7; small^.AllowedGroupsForBlockPoolBitmap := Byte(Byte(-1) shl num); small^.MinimumBlockPoolSize := MinimumMediumBlockSize + num * (MediumBlockBinsPerGroup * MediumBlockGranularity); poolsize := ((size * TargetSmallBlocksPerPool + (SmallBlockPoolHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset)) and -MediumBlockGranularity) + MediumBlockSizeOffset; if poolsize < OptimalSmallBlockPoolSizeLowerLimit then poolsize := OptimalSmallBlockPoolSizeLowerLimit; if poolsize > OptimalSmallBlockPoolSizeUpperLimit then poolsize := OptimalSmallBlockPoolSizeUpperLimit; perpool := (poolsize - SmallBlockPoolHeaderSize) div size; small^.OptimalBlockPoolSize := ((perpool * size + (SmallBlockPoolHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset)) and -MediumBlockGranularity) + MediumBlockSizeOffset; inc(small); end; assert(small = @SmallBlockInfo.GetmemLookup); {$ifndef FPCMM_ASSUMEMULTITHREAD} SmallBlockInfo.IsMultiThreadPtr := @IsMultiThread; MediumBlockInfo.IsMultiThreadPtr := @IsMultiThread; {$endif FPCMM_ASSUMEMULTITHREAD} start := 0; with SmallBlockInfo do for i := 0 to NumSmallBlockTypes - 1 do begin next := PtrUInt(SmallBlockSizes[i]) div SmallBlockGranularity; while start < next do begin GetmemLookup[start] := i; inc(start); end; end; with MediumBlockInfo do begin PoolsCircularList.PreviousMediumBlockPoolHeader := @PoolsCircularList; PoolsCircularList.NextMediumBlockPoolHeader := @PoolsCircularList; for i := 0 to MediumBlockBinCount -1 do begin medium := @Bins[i]; medium.PreviousFreeBlock := medium; medium.NextFreeBlock := medium; end; end; LargeBlocksCircularList.PreviousLargeBlockHeader := @LargeBlocksCircularList; LargeBlocksCircularList.NextLargeBlockHeader := @LargeBlocksCircularList; end; {$I-} {$ifdef FPCMM_REPORTMEMORYLEAKS} var MemoryLeakReported: boolean; procedure StartReport; begin if MemoryLeakReported then exit; writeln; WriteHeapStatus('WARNING! THIS PROGRAM LEAKS MEMORY!'#13#10'Memory Status:'); writeln('Leaks Identified:'); MemoryLeakReported := true; end; {$ifdef LINUX} // experimental detection of object class - use at your own risk {$define FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL} {$endif LINUX} procedure MediumMemoryLeakReport(p: PMediumBlockPoolHeader); var block: PByte; header, size: PtrUInt; {$ifdef FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL} first, last: PByte; vmt: PAnsiChar; small: PSmallBlockPoolHeader; function SeemsRealPointer(p: pointer): boolean; begin result := (PtrUInt(p) > 65535) {$ifndef MSWINDOWS} // let the GPF happen silently in the kernel and (fpaccess(p, F_OK) <> 0) and (fpgeterrno <> ESysEFAULT) {$endif MSWINDOWS} end; {$endif FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL} begin with MediumBlockInfo do if (SequentialFeedBytesLeft = 0) or (PtrUInt(LastSequentiallyFed) < PtrUInt(p)) or (PtrUInt(LastSequentiallyFed) > PtrUInt(p) + MediumBlockPoolSize) then block := Pointer(PByte(p) + MediumBlockPoolHeaderSize) else if SequentialFeedBytesLeft <> MediumBlockPoolSize - MediumBlockPoolHeaderSize then block := LastSequentiallyFed else exit; repeat header := PPtrUInt(block - BlockHeaderSize)^; size := header and DropMediumAndLargeFlagsMask; if size = 0 then exit; if header and IsFreeBlockFlag = 0 then if header and IsSmallBlockPoolInUseFlag <> 0 then begin {$ifdef FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL} if PSmallBlockPoolHeader(block).BlocksInUse > 0 then begin first := PByte(block) + SmallBlockPoolHeaderSize; with PSmallBlockPoolHeader(block).BlockType^ do if (CurrentSequentialFeedPool <> pointer(block)) or (PtrUInt(NextSequentialFeedBlockAddress) > PtrUInt(MaxSequentialFeedBlockAddress)) then last := PByte(block) + (PPtrUInt(PByte(block) - BlockHeaderSize)^ and DropMediumAndLargeFlagsMask) - BlockSize else last := Pointer(PByte(NextSequentialFeedBlockAddress) - 1); while first <= last do begin if ((PPtrUInt(first - BlockHeaderSize)^ and IsFreeBlockFlag) = 0) then begin vmt := PPointer(first)^; // _FreeMem() ensured vmt=nil if (vmt <> nil) and SeemsRealPointer(vmt) then try // try to access the TObject VMT (seems to work on Linux) if (PPtrInt(vmt + vmtInstanceSize)^ >= sizeof(vmt)) and (PPtrInt(vmt + vmtInstanceSize)^ <= PSmallBlockPoolHeader(block).BlockType.BlockSize) and SeemsRealPointer(PPointer(vmt + vmtClassName)^) then begin StartReport; writeln(' probable ', PShortString(PPointer(vmt + vmtClassName)^)^, ' leak (', PSmallBlockPoolHeader(block).BlockType.BlockSize, ' bytes)'); end; except // intercept any GPF end; end; inc(first, PSmallBlockPoolHeader(block).BlockType.BlockSize); end; end; {$endif FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL} end else begin StartReport; writeln(' medium block leak of ', K(size), 'B'); end; inc(block, size); until false; end; {$endif FPCMM_REPORTMEMORYLEAKS} procedure FreeAllMemory; var medium, nextmedium: PMediumBlockPoolHeader; bin: PMediumFreeBlock; large, nextlarge: PLargeBlockHeader; p: PSmallBlockType; i, size: PtrUInt; {$ifdef FPCMM_LOCKLESSFREE} j: PtrInt; {$endif FPCMM_LOCKLESSFREE} {$ifdef FPCMM_REPORTMEMORYLEAKS} leak, leaks: PtrUInt; {$endif FPCMM_REPORTMEMORYLEAKS} begin {$ifdef FPCMM_REPORTMEMORYLEAKS} leaks := 0; {$endif FPCMM_REPORTMEMORYLEAKS} p := @SmallBlockInfo; for i := 1 to NumSmallInfoBlock do begin {$ifdef FPCMM_LOCKLESSFREE} for j := 0 to p^.BinCount - 1 do if p^.BinInstance[i] <> nil then _FreeMem(p^.BinInstance[i]); // release (unlikely) pending instances {$endif FPCMM_LOCKLESSFREE} p^.PreviousPartiallyFreePool := pointer(p); p^.NextPartiallyFreePool := pointer(p); p^.NextSequentialFeedBlockAddress := pointer(1); p^.MaxSequentialFeedBlockAddress := nil; {$ifdef FPCMM_REPORTMEMORYLEAKS} leak := p^.GetmemCount - p^.FreememCount; if leak <> 0 then begin StartReport; inc(leaks, leak); writeln(' small block leak x', leak, ' of size=', p^.BlockSize, ' (getmem=', p^.GetmemCount, ' freemem=', p^.FreememCount, ')'); end; {$endif FPCMM_REPORTMEMORYLEAKS} inc(p); end; {$ifdef FPCMM_REPORTMEMORYLEAKS} if leaks <> 0 then writeln(' Total small block leaks = ', leaks); {$endif FPCMM_REPORTMEMORYLEAKS} with MediumBlockInfo do begin medium := PoolsCircularList.NextMediumBlockPoolHeader; while medium <> @PoolsCircularList do begin {$ifdef FPCMM_REPORTMEMORYLEAKS} MediumMemoryLeakReport(medium); {$endif FPCMM_REPORTMEMORYLEAKS} nextmedium := medium.NextMediumBlockPoolHeader; FreeMedium(medium); medium := nextmedium; end; PoolsCircularList.PreviousMediumBlockPoolHeader := @PoolsCircularList; PoolsCircularList.NextMediumBlockPoolHeader := @PoolsCircularList; for i := 0 to MediumBlockBinCount - 1 do begin bin := @Bins[i]; bin.PreviousFreeBlock := bin; bin.NextFreeBlock := bin; end; BinGroupBitmap := 0; SequentialFeedBytesLeft := 0; for i := 0 to MediumBlockBinGroupCount - 1 do BinBitmaps[i] := 0; end; large := LargeBlocksCircularList.NextLargeBlockHeader; while large <> @LargeBlocksCircularList do begin size := large.BlockSizeAndFlags and DropMediumAndLargeFlagsMask; {$ifdef FPCMM_REPORTMEMORYLEAKS} StartReport; writeln(' large block leak of ', K(size), 'B'); {$endif FPCMM_REPORTMEMORYLEAKS} nextlarge := large.NextLargeBlockHeader; FreeLarge(large, size); large := nextlarge; end; LargeBlocksCircularList.PreviousLargeBlockHeader := @LargeBlocksCircularList; LargeBlocksCircularList.NextLargeBlockHeader := @LargeBlocksCircularList; end; {$I+} {$ifndef FPCMM_STANDALONE} const NewMM: TMemoryManager = ( NeedLock: false; GetMem: @_Getmem; FreeMem: @_FreeMem; FreememSize: @_FreememSize; AllocMem: @_AllocMem; ReallocMem: @_ReAllocMem; MemSize: @_MemSize; InitThread: nil; DoneThread: nil; RelocateHeap: nil; GetHeapStatus: @_GetHeapStatus; GetFPCHeapStatus: @_GetFPCHeapStatus); var OldMM: TMemoryManager; initialization InitializeMemoryManager; GetMemoryManager(OldMM); SetMemoryManager(NewMM); finalization SetMemoryManager(OldMM); FreeAllMemory; {$endif FPCMM_STANDALONE} {$endif FPC_CPUX64} end.