xtool/contrib/bdiff/UBlockSort.pas

154 lines
3.6 KiB
ObjectPascal

{
* Implements block sort.
*
* Based on parts of blksort.c by Stefan Reuther, copyright (c) 1999 Stefan
* Reuther <Streu@gmx.de>.
}
unit UBlockSort;
interface
uses
// Project
UBDiffTypes;
type
TBlockSort = class(TObject)
private
class function Compare(A: Cardinal; B: Cardinal; Data: PSignedAnsiCharArray;
DataSize: Cardinal): Integer;
{ The 'sink element' part of heapsort }
class procedure Sink(Left: Cardinal; Right: Cardinal; Block: PBlock;
Data: PSignedAnsiCharArray; DataSize: Cardinal);
public
{ Returns array of offsets into data, sorted by position.
@param Data [in] Data to be sorted. Must not be nil.
@param DataSize [in] Size of data to be sorted, must be > 0.
@return Pointer to block of sorted indices into Data. Caller must free.
@except raises EOutOfMemory if can't allocate sorted data block.
}
class function Execute(Data: PSignedAnsiCharArray; DataSize: Cardinal):
PBlock;
end;
implementation
{
GENERAL IMPLEMENTATION NOTE (Stefan Reuther)
Block-sort part of bdiff:
Taking the data area of length N, we generate N substrings:
- first substring is data area, length N
- 2nd is data area sans first character, length N-1
- ith is data area sans first i-1 characters, length N-i+1
- Nth is last character of data area, length 1
These strings are sorted to allow fast (i.e., binary) searching in data
area. Of course, we don't really generate these N*N/2 bytes of strings: we
use an array of N size_t's indexing the data.
PASCAL IMPLEMENTATION NOTE (Peter Johnson)
The fact that C's (ansi) Char type is signed and Pascal's is unsigned is
relevant to the string sorting and accessing code described above. Thefore
we use a specially defined SignedAnsiChar to maintain the data buffer to
ensure that the the Pascal performs in the same way as the C code.
}
{ TBlockSort }
class function TBlockSort.Compare(A, B: Cardinal;
Data: PSignedAnsiCharArray; DataSize: Cardinal): Integer;
var
PA: PSignedAnsiChar;
PB: PSignedAnsiChar;
Len: Cardinal;
begin
PA := @Data[A];
PB := @Data[B];
Len := DataSize - A;
if DataSize - B < Len then
Len := DataSize - B;
while (Len <> 0) and (PA^ = PB^) do
begin
Inc(PA);
Inc(PB);
Dec(Len);
end;
if Len = 0 then
begin
Result := A - B;
Exit;
end;
Result := PA^ - PB^;
end;
class function TBlockSort.Execute(Data: PSignedAnsiCharArray;
DataSize: Cardinal): PBlock;
var
I, Temp, Left, Right: Cardinal;
begin
if DataSize = 0 then
begin
Result := nil;
Exit;
end;
GetMem(Result, SizeOf(Cardinal) * DataSize);
// initialize unsorted data
for I := 0 to Pred(DataSize) do
Result[I] := I;
// heapsort
Left := DataSize div 2;
Right := DataSize;
while Left > 0 do
begin
Dec(Left);
Sink(Left, Right, Result, Data, DataSize);
end;
while Right > 0 do
begin
Temp := Result[Left];
Result[Left] := Result[Right-1];
Result[Right-1] := Temp;
Dec(Right);
Sink(Left, Right, Result, Data, DataSize);
end;
end;
class procedure TBlockSort.Sink(Left, Right: Cardinal; Block: PBlock;
Data: PSignedAnsiCharArray; DataSize: Cardinal);
var
I, J, X: Cardinal;
begin
I := Left;
X := Block[I];
while True do
begin
J := 2 * I + 1;
if J >= Right then
Break;
if J < Right - 1 then
if Compare(Block[J], Block[J+1], Data, DataSize) < 0 then
Inc(J);
if Compare(X, Block[J], Data, DataSize) > 0 then
Break;
Block[I] := Block[J];
I := J;
end;
Block[I] := X;
end;
end.