305 lines
8.9 KiB
ObjectPascal
305 lines
8.9 KiB
ObjectPascal
{
|
|
* Class that generates a diff for two files, logging progress as required.
|
|
*
|
|
* Based on bdiff.c and part of blksort.c by Stefan Reuther, copyright (c) 1999
|
|
* Stefan Reuther <Streu@gmx.de>.
|
|
}
|
|
|
|
unit UDiffer;
|
|
|
|
interface
|
|
|
|
uses
|
|
UBDiffTypes, UFileData, ULogger;
|
|
|
|
{ Structure for a matching block }
|
|
type
|
|
TMatch = record
|
|
OldOffset: Cardinal;
|
|
NewOffset: Cardinal;
|
|
BlockLength: Cardinal;
|
|
end;
|
|
|
|
PMatch = ^TMatch;
|
|
|
|
type
|
|
TDiffer = class(TObject)
|
|
private
|
|
fMinMatchLength: Cardinal;
|
|
fFormat: TFormat;
|
|
function FindMaxMatch(OldFile: TFileData; SortedOldData: PBlock;
|
|
SearchText: PSignedAnsiChar; SearchTextLength: Cardinal): TMatch;
|
|
/// <summary>Finds maximum length "sub-string" of CompareData that is in
|
|
/// Data.</summary>
|
|
/// <param name="Data">PSignedAnsiCharArray [in] Data to be searched for
|
|
/// "sub-string".</param>
|
|
/// <param name="Block">PBlock [in] Block of indexes into Data that sort
|
|
/// sub-strings of Data.</param>
|
|
/// <param name="DataSize">Cardinal [in] Size of Data.</param>
|
|
/// <param name="CompareData">PSignedAnsiChar [in] Pointer to data to be
|
|
/// compared to Data.</param>
|
|
/// <param name="CompareDataSize">Cardinal [in] Size of data pointed to by
|
|
/// CompareData.</param>
|
|
/// <param name="FoundPos">Cardinal [out] Position in Data where
|
|
/// "sub-string" was found.</param>
|
|
/// <returns>Cardinal. Length of found "sub-string".</returns>
|
|
function FindString(Data: PSignedAnsiCharArray; Block: PBlock;
|
|
DataSize: Cardinal; CompareData: PSignedAnsiChar;
|
|
CompareDataSize: Cardinal; out FoundPos: Cardinal): Cardinal;
|
|
public
|
|
constructor Create;
|
|
destructor Destroy; override;
|
|
procedure MakeDiff(const OldFileName, NewFileName: string;
|
|
const Logger: TLogger); overload;
|
|
procedure MakeDiff(OldData, NewData, DiffData: Pointer;
|
|
OldSize, NewSize: Integer; var DiffSize: Integer;
|
|
const Logger: TLogger); overload;
|
|
property MinMatchLength: Cardinal read fMinMatchLength write fMinMatchLength
|
|
default 24;
|
|
property Format: TFormat read fFormat write fFormat default FMT_QUOTED;
|
|
end;
|
|
|
|
implementation
|
|
|
|
{$IOCHECKS OFF}
|
|
|
|
uses
|
|
// Project
|
|
UBlockSort, UErrors, UPatchWriters;
|
|
|
|
{ TDiffer }
|
|
|
|
constructor TDiffer.Create;
|
|
begin
|
|
inherited Create;
|
|
fMinMatchLength := 24; // default minimum match length
|
|
fFormat := FMT_QUOTED; // default output format
|
|
end;
|
|
|
|
destructor TDiffer.Destroy;
|
|
begin
|
|
inherited;
|
|
end;
|
|
|
|
function TDiffer.FindMaxMatch(OldFile: TFileData; SortedOldData: PBlock;
|
|
SearchText: PSignedAnsiChar; SearchTextLength: Cardinal): TMatch;
|
|
var
|
|
FoundPos: Cardinal;
|
|
FoundLen: Cardinal;
|
|
begin
|
|
Result.BlockLength := 0; { no match }
|
|
Result.NewOffset := 0;
|
|
while (SearchTextLength <> 0) do
|
|
begin
|
|
FoundLen := FindString(OldFile.Data, SortedOldData, OldFile.Size,
|
|
SearchText, SearchTextLength, FoundPos);
|
|
if FoundLen >= fMinMatchLength then
|
|
begin
|
|
Result.OldOffset := FoundPos;
|
|
Result.BlockLength := FoundLen;
|
|
Exit;
|
|
end;
|
|
Inc(SearchText);
|
|
Inc(Result.NewOffset);
|
|
Dec(SearchTextLength);
|
|
end;
|
|
end;
|
|
|
|
function TDiffer.FindString(Data: PSignedAnsiCharArray; Block: PBlock;
|
|
DataSize: Cardinal; CompareData: PSignedAnsiChar; CompareDataSize: Cardinal;
|
|
out FoundPos: Cardinal): Cardinal;
|
|
var
|
|
First: Cardinal; // first position in Data to search
|
|
Last: Cardinal; // last position in Data to search
|
|
Mid: Cardinal; // mid point of Data to search
|
|
FoundSize: Cardinal; // size of matching "sub-string"
|
|
FoundMax: Cardinal; // maximum size of matching "sub-string"
|
|
PData: PSignedAnsiChar; // ptr to char in Data to be compared
|
|
PCompareData: PSignedAnsiChar; // ptr to char in CompareData to be compared
|
|
begin
|
|
First := 0;
|
|
Last := DataSize - 1;
|
|
Result := 0;
|
|
FoundPos := 0;
|
|
|
|
// Do binary search of Data
|
|
while First <= Last do
|
|
begin
|
|
// Get mid point of (sorted) Data to search
|
|
Mid := (First + Last) div 2;
|
|
// Set pointer to start of Data search string
|
|
PData := @Data[Block[Mid]];
|
|
// Set pointer to start of CompareData
|
|
PCompareData := CompareData;
|
|
// Calculate maximum possible size of matching substring
|
|
FoundMax := DataSize - Block[Mid];
|
|
if FoundMax > CompareDataSize then
|
|
FoundMax := CompareDataSize;
|
|
// Find and count match chars from Data and CompareData
|
|
FoundSize := 0;
|
|
while (FoundSize < FoundMax) and (PData^ = PCompareData^) do
|
|
begin
|
|
Inc(FoundSize);
|
|
Inc(PData);
|
|
Inc(PCompareData);
|
|
end;
|
|
|
|
// We found a "match" of length FoundSize, position Block[Mid]
|
|
if FoundSize > Result then
|
|
begin
|
|
Result := FoundSize;
|
|
FoundPos := Block[Mid];
|
|
end;
|
|
|
|
// Determine next search area
|
|
// Note: If FoundSize = FoundMatch then substrings match
|
|
if (FoundSize = FoundMax) or (PData^ < PCompareData^) then
|
|
// substring <= current data string: search above
|
|
First := Mid + 1
|
|
else
|
|
// substring < current data string: search below
|
|
begin
|
|
Last := Mid;
|
|
if Last <> 0 then
|
|
Dec(Last)
|
|
else
|
|
Break;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TDiffer.MakeDiff(const OldFileName, NewFileName: string;
|
|
const Logger: TLogger);
|
|
var
|
|
OldFile: TFileData;
|
|
NewFile: TFileData;
|
|
NewOffset: Cardinal;
|
|
ToDo: Cardinal;
|
|
SortedOldData: PBlock;
|
|
Match: TMatch;
|
|
PatchWriter: TPatchWriter;
|
|
begin
|
|
{ initialize }
|
|
OldFile := nil;
|
|
NewFile := nil;
|
|
SortedOldData := nil;
|
|
PatchWriter := TPatchWriterFactory.Instance(fFormat);
|
|
try
|
|
Logger.Log('loading old file');
|
|
OldFile := TFileData.Create(OldFileName);
|
|
Logger.Log('loading new file');
|
|
NewFile := TFileData.Create(NewFileName);
|
|
Logger.Log('block sorting old file');
|
|
SortedOldData := TBlockSort.Execute(OldFile.Data, OldFile.Size);
|
|
if not Assigned(SortedOldData) then
|
|
Error('virtual memory exhausted');
|
|
Logger.Log('generating patch');
|
|
PatchWriter.Header(OldFile.Name, NewFile.Name, OldFile.Size, NewFile.Size);
|
|
{ main loop }
|
|
ToDo := NewFile.Size;
|
|
NewOffset := 0;
|
|
while (ToDo <> 0) do
|
|
begin
|
|
Match := FindMaxMatch(OldFile, SortedOldData,
|
|
@NewFile.Data[NewOffset], ToDo);
|
|
if Match.BlockLength <> 0 then
|
|
begin
|
|
{ found a match }
|
|
if Match.NewOffset <> 0 then
|
|
{ preceded by a "copy" block }
|
|
PatchWriter.Add(@NewFile.Data[NewOffset], Match.NewOffset);
|
|
Inc(NewOffset, Match.NewOffset);
|
|
Dec(ToDo, Match.NewOffset);
|
|
PatchWriter.Copy(NewFile.Data, NewOffset, Match.OldOffset,
|
|
Match.BlockLength);
|
|
Inc(NewOffset, Match.BlockLength);
|
|
Dec(ToDo, Match.BlockLength);
|
|
end
|
|
else
|
|
begin
|
|
PatchWriter.Add(@NewFile.Data[NewOffset], ToDo);
|
|
Break;
|
|
end;
|
|
end;
|
|
Logger.Log('done');
|
|
finally
|
|
// finally section new to v1.1
|
|
if Assigned(SortedOldData) then
|
|
FreeMem(SortedOldData);
|
|
OldFile.Free;
|
|
NewFile.Free;
|
|
PatchWriter.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure TDiffer.MakeDiff(OldData, NewData, DiffData: Pointer;
|
|
OldSize, NewSize: Integer; var DiffSize: Integer; const Logger: TLogger);
|
|
var
|
|
OldFile: TFileData;
|
|
NewFile: TFileData;
|
|
NewOffset: Cardinal;
|
|
ToDo: Cardinal;
|
|
SortedOldData: PBlock;
|
|
Match: TMatch;
|
|
PatchWriter: TPatchWriter;
|
|
begin
|
|
{ initialize }
|
|
DiffSize := 0;
|
|
OldFile := nil;
|
|
NewFile := nil;
|
|
SortedOldData := nil;
|
|
PatchWriter := TPatchWriterFactory.Instance(fFormat);
|
|
PatchWriter.Memory := DiffData;
|
|
PatchWriter.Size := 0;
|
|
PatchWriter.WriteToMemory := True;
|
|
try
|
|
Logger.Log('loading old file');
|
|
OldFile := TFileData.Create(OldData, OldSize);
|
|
Logger.Log('loading new file');
|
|
NewFile := TFileData.Create(NewData, NewSize);
|
|
Logger.Log('block sorting old file');
|
|
SortedOldData := TBlockSort.Execute(OldFile.Data, OldFile.Size);
|
|
if not Assigned(SortedOldData) then
|
|
Error('virtual memory exhausted');
|
|
Logger.Log('generating patch');
|
|
PatchWriter.Header(OldFile.Name, NewFile.Name, OldFile.Size, NewFile.Size);
|
|
{ main loop }
|
|
ToDo := NewFile.Size;
|
|
NewOffset := 0;
|
|
while (ToDo <> 0) do
|
|
begin
|
|
Match := FindMaxMatch(OldFile, SortedOldData,
|
|
@NewFile.Data[NewOffset], ToDo);
|
|
if Match.BlockLength <> 0 then
|
|
begin
|
|
{ found a match }
|
|
if Match.NewOffset <> 0 then
|
|
{ preceded by a "copy" block }
|
|
PatchWriter.Add(@NewFile.Data[NewOffset], Match.NewOffset);
|
|
Inc(NewOffset, Match.NewOffset);
|
|
Dec(ToDo, Match.NewOffset);
|
|
PatchWriter.Copy(NewFile.Data, NewOffset, Match.OldOffset,
|
|
Match.BlockLength);
|
|
Inc(NewOffset, Match.BlockLength);
|
|
Dec(ToDo, Match.BlockLength);
|
|
end
|
|
else
|
|
begin
|
|
PatchWriter.Add(@NewFile.Data[NewOffset], ToDo);
|
|
Break;
|
|
end;
|
|
end;
|
|
Logger.Log('done');
|
|
DiffSize := PatchWriter.Size;
|
|
finally
|
|
// finally section new to v1.1
|
|
if Assigned(SortedOldData) then
|
|
FreeMem(SortedOldData);
|
|
OldFile.Free;
|
|
NewFile.Free;
|
|
PatchWriter.Free;
|
|
end;
|
|
end;
|
|
|
|
end.
|