{ * Class that generates a diff for two files, logging progress as required. * * Based on bdiff.c and part of blksort.c by Stefan Reuther, copyright (c) 1999 * Stefan Reuther . } unit UDiffer; interface uses UBDiffTypes, UFileData, ULogger; { Structure for a matching block } type TMatch = record OldOffset: Cardinal; NewOffset: Cardinal; BlockLength: Cardinal; end; PMatch = ^TMatch; type TDiffer = class(TObject) private fMinMatchLength: Cardinal; fFormat: TFormat; function FindMaxMatch(OldFile: TFileData; SortedOldData: PBlock; SearchText: PSignedAnsiChar; SearchTextLength: Cardinal): TMatch; /// Finds maximum length "sub-string" of CompareData that is in /// Data. /// PSignedAnsiCharArray [in] Data to be searched for /// "sub-string". /// PBlock [in] Block of indexes into Data that sort /// sub-strings of Data. /// Cardinal [in] Size of Data. /// PSignedAnsiChar [in] Pointer to data to be /// compared to Data. /// Cardinal [in] Size of data pointed to by /// CompareData. /// Cardinal [out] Position in Data where /// "sub-string" was found. /// Cardinal. Length of found "sub-string". function FindString(Data: PSignedAnsiCharArray; Block: PBlock; DataSize: Cardinal; CompareData: PSignedAnsiChar; CompareDataSize: Cardinal; out FoundPos: Cardinal): Cardinal; public constructor Create; destructor Destroy; override; procedure MakeDiff(const OldFileName, NewFileName: string; const Logger: TLogger); overload; procedure MakeDiff(OldData, NewData, DiffData: Pointer; OldSize, NewSize: Integer; var DiffSize: Integer; const Logger: TLogger); overload; property MinMatchLength: Cardinal read fMinMatchLength write fMinMatchLength default 24; property Format: TFormat read fFormat write fFormat default FMT_QUOTED; end; implementation {$IOCHECKS OFF} uses // Project UBlockSort, UErrors, UPatchWriters; { TDiffer } constructor TDiffer.Create; begin inherited Create; fMinMatchLength := 24; // default minimum match length fFormat := FMT_QUOTED; // default output format end; destructor TDiffer.Destroy; begin inherited; end; function TDiffer.FindMaxMatch(OldFile: TFileData; SortedOldData: PBlock; SearchText: PSignedAnsiChar; SearchTextLength: Cardinal): TMatch; var FoundPos: Cardinal; FoundLen: Cardinal; begin Result.BlockLength := 0; { no match } Result.NewOffset := 0; while (SearchTextLength <> 0) do begin FoundLen := FindString(OldFile.Data, SortedOldData, OldFile.Size, SearchText, SearchTextLength, FoundPos); if FoundLen >= fMinMatchLength then begin Result.OldOffset := FoundPos; Result.BlockLength := FoundLen; Exit; end; Inc(SearchText); Inc(Result.NewOffset); Dec(SearchTextLength); end; end; function TDiffer.FindString(Data: PSignedAnsiCharArray; Block: PBlock; DataSize: Cardinal; CompareData: PSignedAnsiChar; CompareDataSize: Cardinal; out FoundPos: Cardinal): Cardinal; var First: Cardinal; // first position in Data to search Last: Cardinal; // last position in Data to search Mid: Cardinal; // mid point of Data to search FoundSize: Cardinal; // size of matching "sub-string" FoundMax: Cardinal; // maximum size of matching "sub-string" PData: PSignedAnsiChar; // ptr to char in Data to be compared PCompareData: PSignedAnsiChar; // ptr to char in CompareData to be compared begin First := 0; Last := DataSize - 1; Result := 0; FoundPos := 0; // Do binary search of Data while First <= Last do begin // Get mid point of (sorted) Data to search Mid := (First + Last) div 2; // Set pointer to start of Data search string PData := @Data[Block[Mid]]; // Set pointer to start of CompareData PCompareData := CompareData; // Calculate maximum possible size of matching substring FoundMax := DataSize - Block[Mid]; if FoundMax > CompareDataSize then FoundMax := CompareDataSize; // Find and count match chars from Data and CompareData FoundSize := 0; while (FoundSize < FoundMax) and (PData^ = PCompareData^) do begin Inc(FoundSize); Inc(PData); Inc(PCompareData); end; // We found a "match" of length FoundSize, position Block[Mid] if FoundSize > Result then begin Result := FoundSize; FoundPos := Block[Mid]; end; // Determine next search area // Note: If FoundSize = FoundMatch then substrings match if (FoundSize = FoundMax) or (PData^ < PCompareData^) then // substring <= current data string: search above First := Mid + 1 else // substring < current data string: search below begin Last := Mid; if Last <> 0 then Dec(Last) else Break; end; end; end; procedure TDiffer.MakeDiff(const OldFileName, NewFileName: string; const Logger: TLogger); var OldFile: TFileData; NewFile: TFileData; NewOffset: Cardinal; ToDo: Cardinal; SortedOldData: PBlock; Match: TMatch; PatchWriter: TPatchWriter; begin { initialize } OldFile := nil; NewFile := nil; SortedOldData := nil; PatchWriter := TPatchWriterFactory.Instance(fFormat); try Logger.Log('loading old file'); OldFile := TFileData.Create(OldFileName); Logger.Log('loading new file'); NewFile := TFileData.Create(NewFileName); Logger.Log('block sorting old file'); SortedOldData := TBlockSort.Execute(OldFile.Data, OldFile.Size); if not Assigned(SortedOldData) then Error('virtual memory exhausted'); Logger.Log('generating patch'); PatchWriter.Header(OldFile.Name, NewFile.Name, OldFile.Size, NewFile.Size); { main loop } ToDo := NewFile.Size; NewOffset := 0; while (ToDo <> 0) do begin Match := FindMaxMatch(OldFile, SortedOldData, @NewFile.Data[NewOffset], ToDo); if Match.BlockLength <> 0 then begin { found a match } if Match.NewOffset <> 0 then { preceded by a "copy" block } PatchWriter.Add(@NewFile.Data[NewOffset], Match.NewOffset); Inc(NewOffset, Match.NewOffset); Dec(ToDo, Match.NewOffset); PatchWriter.Copy(NewFile.Data, NewOffset, Match.OldOffset, Match.BlockLength); Inc(NewOffset, Match.BlockLength); Dec(ToDo, Match.BlockLength); end else begin PatchWriter.Add(@NewFile.Data[NewOffset], ToDo); Break; end; end; Logger.Log('done'); finally // finally section new to v1.1 if Assigned(SortedOldData) then FreeMem(SortedOldData); OldFile.Free; NewFile.Free; PatchWriter.Free; end; end; procedure TDiffer.MakeDiff(OldData, NewData, DiffData: Pointer; OldSize, NewSize: Integer; var DiffSize: Integer; const Logger: TLogger); var OldFile: TFileData; NewFile: TFileData; NewOffset: Cardinal; ToDo: Cardinal; SortedOldData: PBlock; Match: TMatch; PatchWriter: TPatchWriter; begin { initialize } DiffSize := 0; OldFile := nil; NewFile := nil; SortedOldData := nil; PatchWriter := TPatchWriterFactory.Instance(fFormat); PatchWriter.Memory := DiffData; PatchWriter.Size := 0; PatchWriter.WriteToMemory := True; try Logger.Log('loading old file'); OldFile := TFileData.Create(OldData, OldSize); Logger.Log('loading new file'); NewFile := TFileData.Create(NewData, NewSize); Logger.Log('block sorting old file'); SortedOldData := TBlockSort.Execute(OldFile.Data, OldFile.Size); if not Assigned(SortedOldData) then Error('virtual memory exhausted'); Logger.Log('generating patch'); PatchWriter.Header(OldFile.Name, NewFile.Name, OldFile.Size, NewFile.Size); { main loop } ToDo := NewFile.Size; NewOffset := 0; while (ToDo <> 0) do begin Match := FindMaxMatch(OldFile, SortedOldData, @NewFile.Data[NewOffset], ToDo); if Match.BlockLength <> 0 then begin { found a match } if Match.NewOffset <> 0 then { preceded by a "copy" block } PatchWriter.Add(@NewFile.Data[NewOffset], Match.NewOffset); Inc(NewOffset, Match.NewOffset); Dec(ToDo, Match.NewOffset); PatchWriter.Copy(NewFile.Data, NewOffset, Match.OldOffset, Match.BlockLength); Inc(NewOffset, Match.BlockLength); Dec(ToDo, Match.BlockLength); end else begin PatchWriter.Add(@NewFile.Data[NewOffset], ToDo); Break; end; end; Logger.Log('done'); DiffSize := PatchWriter.Size; finally // finally section new to v1.1 if Assigned(SortedOldData) then FreeMem(SortedOldData); OldFile.Free; NewFile.Free; PatchWriter.Free; end; end; end.