353 lines
9.7 KiB
ObjectPascal
353 lines
9.7 KiB
ObjectPascal
// Based on TOmniBaseBoundedStack class from the OmniThreadLibrary,
|
|
// originally written by GJ and Primoz Gabrijelcic.
|
|
|
|
unit FastMM4LockFreeStack;
|
|
|
|
interface
|
|
|
|
type
|
|
PReferencedPtr = ^TReferencedPtr;
|
|
TReferencedPtr = record
|
|
PData : pointer;
|
|
Reference: NativeInt;
|
|
end;
|
|
|
|
PLinkedData = ^TLinkedData;
|
|
TLinkedData = packed record
|
|
Next: PLinkedData;
|
|
Data: record end; //user data, variable size
|
|
end;
|
|
|
|
TLFStack = record
|
|
strict private
|
|
FDataBuffer : pointer;
|
|
FElementSize : integer;
|
|
FNumElements : integer;
|
|
FPublicChainP : PReferencedPtr;
|
|
FRecycleChainP: PReferencedPtr;
|
|
class var
|
|
class var obsIsInitialized: boolean; //default is false
|
|
class var obsTaskPopLoops : NativeInt;
|
|
class var obsTaskPushLoops: NativeInt;
|
|
class function PopLink(var chain: TReferencedPtr): PLinkedData; static;
|
|
class procedure PushLink(const link: PLinkedData; var chain: TReferencedPtr); static;
|
|
procedure MeasureExecutionTimes;
|
|
public
|
|
procedure Empty;
|
|
procedure Initialize(numElements, elementSize: integer);
|
|
procedure Finalize;
|
|
function IsEmpty: boolean; inline;
|
|
function IsFull: boolean; inline;
|
|
function Pop(var value): boolean;
|
|
function Push(const value): boolean;
|
|
property ElementSize: integer read FElementSize;
|
|
property NumElements: integer read FNumElements;
|
|
end;
|
|
|
|
implementation
|
|
|
|
uses
|
|
Windows;
|
|
|
|
{$IF CompilerVersion < 23}
|
|
{$IFNDEF CPUX64}
|
|
type
|
|
NativeInt = integer;
|
|
NativeUInt = cardinal;
|
|
{$ENDIF}
|
|
{$IFEND}
|
|
|
|
var
|
|
CASAlignment: integer; //required alignment for the CAS function - 8 or 16, depending on the platform
|
|
|
|
function RoundUpTo(value: pointer; granularity: integer): pointer;
|
|
begin
|
|
Result := pointer((((NativeInt(value) - 1) div granularity) + 1) * granularity);
|
|
end;
|
|
|
|
function GetCPUTimeStamp: int64;
|
|
asm
|
|
rdtsc
|
|
{$IFDEF CPUX64}
|
|
shl rdx, 32
|
|
or rax, rdx
|
|
{$ENDIF CPUX64}
|
|
end;
|
|
|
|
function GetThreadId: NativeInt;
|
|
//result := GetCurrentThreadId;
|
|
asm
|
|
{$IFNDEF CPUX64}
|
|
mov eax, fs:[$18] //eax := thread information block
|
|
mov eax, [eax + $24] //eax := thread id
|
|
{$ELSE CPUX64}
|
|
mov rax, gs:[abs $30]
|
|
mov eax, [rax + $48]
|
|
{$ENDIF CPUX64}
|
|
end;
|
|
|
|
function CAS(const oldValue, newValue: NativeInt; var destination): boolean; overload;
|
|
asm
|
|
{$IFDEF CPUX64}
|
|
mov rax, oldValue
|
|
{$ENDIF CPUX64}
|
|
lock cmpxchg [destination], newValue
|
|
setz al
|
|
end;
|
|
|
|
function CAS(const oldValue, newValue: pointer; var destination): boolean; overload;
|
|
asm
|
|
{$IFDEF CPUX64}
|
|
mov rax, oldValue
|
|
{$ENDIF CPUX64}
|
|
lock cmpxchg [destination], newValue
|
|
setz al
|
|
end;
|
|
|
|
function CAS(const oldData: pointer; oldReference: NativeInt; newData: pointer;
|
|
newReference: NativeInt; var destination): boolean; overload;
|
|
asm
|
|
{$IFNDEF CPUX64}
|
|
push edi
|
|
push ebx
|
|
mov ebx, newData
|
|
mov ecx, newReference
|
|
mov edi, destination
|
|
lock cmpxchg8b qword ptr [edi]
|
|
pop ebx
|
|
pop edi
|
|
{$ELSE CPUX64}
|
|
.noframe
|
|
push rbx //rsp := rsp - 8 !
|
|
mov rax, oldData
|
|
mov rbx, newData
|
|
mov rcx, newReference
|
|
mov r8, [destination + 8] //+8 with respect to .noframe
|
|
lock cmpxchg16b [r8]
|
|
pop rbx
|
|
{$ENDIF CPUX64}
|
|
setz al
|
|
end;
|
|
|
|
{ TLFStack }
|
|
|
|
procedure TLFStack.Empty;
|
|
var
|
|
linkedData: PLinkedData;
|
|
begin
|
|
repeat
|
|
linkedData := PopLink(FPublicChainP^);
|
|
if not assigned(linkedData) then
|
|
break; //repeat
|
|
PushLink(linkedData, FRecycleChainP^);
|
|
until false;
|
|
end;
|
|
|
|
procedure TLFStack.Finalize;
|
|
begin
|
|
HeapFree(GetProcessHeap, 0, FDataBuffer);
|
|
end;
|
|
|
|
procedure TLFStack.Initialize(numElements, elementSize: integer);
|
|
var
|
|
bufferElementSize : integer;
|
|
currElement : PLinkedData;
|
|
dataBuffer : pointer;
|
|
iElement : integer;
|
|
nextElement : PLinkedData;
|
|
roundedElementSize: integer;
|
|
begin
|
|
Assert(SizeOf(NativeInt) = SizeOf(pointer));
|
|
Assert(numElements > 0);
|
|
Assert(elementSize > 0);
|
|
FNumElements := numElements;
|
|
FElementSize := elementSize;
|
|
//calculate element size, round up to next aligned value
|
|
roundedElementSize := (elementSize + SizeOf(pointer) - 1) AND NOT (SizeOf(pointer) - 1);
|
|
//calculate buffer element size, round up to next aligned value
|
|
bufferElementSize := ((SizeOf(TLinkedData) + roundedElementSize) + SizeOf(pointer) - 1) AND NOT (SizeOf(pointer) - 1);
|
|
//calculate DataBuffer
|
|
FDataBuffer := HeapAlloc(GetProcessHeap, HEAP_GENERATE_EXCEPTIONS, bufferElementSize * numElements + 2 * SizeOf(TReferencedPtr) + CASAlignment);
|
|
dataBuffer := RoundUpTo(FDataBuffer, CASAlignment);
|
|
if NativeInt(dataBuffer) AND (SizeOf(pointer) - 1) <> 0 then
|
|
// TODO 1 raise exception - how?
|
|
Halt; //raise Exception.Create('TOmniBaseContainer: obcBuffer is not aligned');
|
|
FPublicChainP := dataBuffer;
|
|
inc(NativeInt(dataBuffer), SizeOf(TReferencedPtr));
|
|
FRecycleChainP := dataBuffer;
|
|
inc(NativeInt(dataBuffer), SizeOf(TReferencedPtr));
|
|
//Format buffer to recycleChain, init obsRecycleChain and obsPublicChain.
|
|
//At the beginning, all elements are linked into the recycle chain.
|
|
FRecycleChainP^.PData := dataBuffer;
|
|
currElement := FRecycleChainP^.PData;
|
|
for iElement := 0 to FNumElements - 2 do begin
|
|
nextElement := PLinkedData(NativeInt(currElement) + bufferElementSize);
|
|
currElement.Next := nextElement;
|
|
currElement := nextElement;
|
|
end;
|
|
currElement.Next := nil; // terminate the chain
|
|
FPublicChainP^.PData := nil;
|
|
MeasureExecutionTimes;
|
|
end;
|
|
|
|
function TLFStack.IsEmpty: boolean;
|
|
begin
|
|
Result := not assigned(FPublicChainP^.PData);
|
|
end;
|
|
|
|
function TLFStack.IsFull: boolean;
|
|
begin
|
|
Result := not assigned(FRecycleChainP^.PData);
|
|
end;
|
|
|
|
procedure TLFStack.MeasureExecutionTimes;
|
|
const
|
|
NumOfSamples = 10;
|
|
var
|
|
TimeTestField: array [0..1] of array [1..NumOfSamples] of int64;
|
|
|
|
function GetMinAndClear(routine, count: cardinal): int64;
|
|
var
|
|
m: cardinal;
|
|
n: integer;
|
|
x: integer;
|
|
begin
|
|
Result := 0;
|
|
for m := 1 to count do begin
|
|
x:= 1;
|
|
for n:= 2 to NumOfSamples do
|
|
if TimeTestField[routine, n] < TimeTestField[routine, x] then
|
|
x := n;
|
|
Inc(Result, TimeTestField[routine, x]);
|
|
TimeTestField[routine, x] := MaxLongInt;
|
|
end;
|
|
end;
|
|
|
|
var
|
|
oldAffinity: NativeUInt;
|
|
currElement: PLinkedData;
|
|
n : integer;
|
|
|
|
begin
|
|
if not obsIsInitialized then begin
|
|
oldAffinity := SetThreadAffinityMask(GetCurrentThread, 1);
|
|
try
|
|
//Calculate TaskPopDelay and TaskPushDelay counter values depend on CPU speed!!!}
|
|
obsTaskPopLoops := 1;
|
|
obsTaskPushLoops := 1;
|
|
for n := 1 to NumOfSamples do begin
|
|
SwitchToThread;
|
|
//Measure RemoveLink rutine delay
|
|
TimeTestField[0, n] := GetCPUTimeStamp;
|
|
currElement := PopLink(FRecycleChainP^);
|
|
TimeTestField[0, n] := GetCPUTimeStamp - TimeTestField[0, n];
|
|
//Measure InsertLink rutine delay
|
|
TimeTestField[1, n] := GetCPUTimeStamp;
|
|
PushLink(currElement, FRecycleChainP^);
|
|
TimeTestField[1, n] := GetCPUTimeStamp - TimeTestField[1, n];
|
|
end;
|
|
//Calculate first 4 minimum average for RemoveLink rutine
|
|
obsTaskPopLoops := GetMinAndClear(0, 4) div 4;
|
|
//Calculate first 4 minimum average for InsertLink rutine
|
|
obsTaskPushLoops := GetMinAndClear(1, 4) div 4;
|
|
|
|
//This gives better performance (determined experimentally)
|
|
obsTaskPopLoops := obsTaskPopLoops * 2;
|
|
obsTaskPushLoops := obsTaskPushLoops * 2;
|
|
|
|
obsIsInitialized := true;
|
|
finally SetThreadAffinityMask(GetCurrentThread, oldAffinity); end;
|
|
end;
|
|
end;
|
|
|
|
function TLFStack.Pop(var value): boolean;
|
|
var
|
|
linkedData: PLinkedData;
|
|
begin
|
|
linkedData := PopLink(FPublicChainP^);
|
|
Result := assigned(linkedData);
|
|
if not Result then
|
|
Exit;
|
|
Move(linkedData.Data, value, ElementSize);
|
|
PushLink(linkedData, FRecycleChainP^);
|
|
end;
|
|
|
|
class function TLFStack.PopLink(var chain: TReferencedPtr): PLinkedData;
|
|
//nil << Link.Next << Link.Next << ... << Link.Next
|
|
// ^------ < chainHead
|
|
var
|
|
AtStartReference: NativeInt;
|
|
CurrentReference: NativeInt;
|
|
TaskCounter : NativeInt;
|
|
ThreadReference : NativeInt;
|
|
label
|
|
TryAgain;
|
|
begin
|
|
ThreadReference := GetThreadId + 1; //Reference.bit0 := 1
|
|
with chain do begin
|
|
TryAgain:
|
|
TaskCounter := obsTaskPopLoops;
|
|
AtStartReference := Reference OR 1; //Reference.bit0 := 1
|
|
repeat
|
|
CurrentReference := Reference;
|
|
Dec(TaskCounter);
|
|
until (TaskCounter = 0) or (CurrentReference AND 1 = 0);
|
|
if (CurrentReference AND 1 <> 0) and (AtStartReference <> CurrentReference) or
|
|
not CAS(CurrentReference, ThreadReference, Reference)
|
|
then
|
|
goto TryAgain;
|
|
//Reference is set...
|
|
Result := PData;
|
|
//Empty test
|
|
if result = nil then
|
|
CAS(ThreadReference, 0, Reference) //Clear Reference if task own reference
|
|
else if not CAS(Result, ThreadReference, Result.Next, 0, chain) then
|
|
goto TryAgain;
|
|
end; //with chain
|
|
end;
|
|
|
|
function TLFStack.Push(const value): boolean;
|
|
var
|
|
linkedData: PLinkedData;
|
|
begin
|
|
linkedData := PopLink(FRecycleChainP^);
|
|
Result := assigned(linkedData);
|
|
if not Result then
|
|
Exit;
|
|
Move(value, linkedData.Data, ElementSize);
|
|
PushLink(linkedData, FPublicChainP^);
|
|
end;
|
|
|
|
class procedure TLFStack.PushLink(const link: PLinkedData; var chain: TReferencedPtr);
|
|
var
|
|
PMemData : pointer;
|
|
TaskCounter: NativeInt;
|
|
begin
|
|
with chain do begin
|
|
for TaskCounter := 0 to obsTaskPushLoops do
|
|
if (Reference AND 1 = 0) then
|
|
break;
|
|
repeat
|
|
PMemData := PData;
|
|
link.Next := PMemData;
|
|
until CAS(PMemData, link, PData);
|
|
end;
|
|
end;
|
|
|
|
procedure InitializeTimingInfo;
|
|
var
|
|
stack: TLFStack;
|
|
begin
|
|
stack.Initialize(10, 4); // enough for initialization
|
|
stack.Finalize;
|
|
end;
|
|
|
|
initialization
|
|
{$IFDEF CPUX64}
|
|
CASAlignment := 16;
|
|
{$ELSE}
|
|
CASAlignment := 8;
|
|
{$ENDIF CPUX64}
|
|
InitializeTimingInfo;
|
|
end.
|