{ ****************************************************************************** } { * delphi:string fpc:UnicodeString by QQ 600585@qq.com * } { * https://zpascal.net * } { * https://github.com/PassByYou888/zAI * } { * https://github.com/PassByYou888/ZServer4D * } { * https://github.com/PassByYou888/PascalString * } { * https://github.com/PassByYou888/zRasterization * } { * https://github.com/PassByYou888/CoreCipher * } { * https://github.com/PassByYou888/zSound * } { * https://github.com/PassByYou888/zChinese * } { * https://github.com/PassByYou888/zExpression * } { * https://github.com/PassByYou888/zGameWare * } { * https://github.com/PassByYou888/zAnalysis * } { * https://github.com/PassByYou888/FFMPEG-Header * } { * https://github.com/PassByYou888/zTranslate * } { * https://github.com/PassByYou888/InfiniteIoT * } { * https://github.com/PassByYou888/FastMD5 * } { ****************************************************************************** } unit UPascalStrings; {$INCLUDE zDefine.inc} interface uses CoreClasses, PascalStrings; type {$IFDEF FPC} USystemChar = UnicodeChar; USystemString = UnicodeString; {$ELSE FPC} USystemChar = PascalStrings.SystemChar; USystemString = PascalStrings.SystemString; {$ENDIF FPC} PUSystemString = ^USystemString; PUPascalString = ^TUPascalString; TUArrayChar = array of USystemChar; TUOrdChar = (uc0to9, uc1to9, uc0to32, uc0to32no10, ucLoAtoF, ucHiAtoF, ucLoAtoZ, ucHiAtoZ, ucHex, ucAtoF, ucAtoZ, ucVisibled); TUOrdChars = set of TUOrdChar; TUHash = Cardinal; TUHash64 = UInt64; TUPascalString = record private function GetText: USystemString; procedure SetText(const Value: USystemString); function GetLen: Integer; procedure SetLen(const Value: Integer); function GetChars(index: Integer): USystemChar; procedure SetChars(index: Integer; const Value: USystemChar); function GetBytes: TBytes; procedure SetBytes(const Value: TBytes); function GetPlatformBytes: TBytes; procedure SetPlatformBytes(const Value: TBytes); function GetANSI: TBytes; procedure SetANSI(const Value: TBytes); function GetLast: USystemChar; procedure SetLast(const Value: USystemChar); function GetFirst: USystemChar; procedure SetFirst(const Value: USystemChar); function GetUpperChar(index: Integer): USystemChar; procedure SetUpperChar(index: Integer; const Value: USystemChar); function GetLowerChar(index: Integer): USystemChar; procedure SetLowerChar(index: Integer; const Value: USystemChar); public buff: TUArrayChar; {$IFDEF DELPHI} class operator Equal(const Lhs, Rhs: TUPascalString): Boolean; class operator NotEqual(const Lhs, Rhs: TUPascalString): Boolean; class operator GreaterThan(const Lhs, Rhs: TUPascalString): Boolean; class operator GreaterThanOrEqual(const Lhs, Rhs: TUPascalString): Boolean; class operator LessThan(const Lhs, Rhs: TUPascalString): Boolean; class operator LessThanOrEqual(const Lhs, Rhs: TUPascalString): Boolean; class operator Add(const Lhs, Rhs: TUPascalString): TUPascalString; class operator Add(const Lhs: USystemString; const Rhs: TUPascalString): TUPascalString; class operator Add(const Lhs: TUPascalString; const Rhs: USystemString): TUPascalString; class operator Add(const Lhs: USystemChar; const Rhs: TUPascalString): TUPascalString; class operator Add(const Lhs: TUPascalString; const Rhs: USystemChar): TUPascalString; class operator Implicit(Value: RawByteString): TUPascalString; class operator Implicit(Value: TPascalString): TUPascalString; class operator Implicit(Value: USystemString): TUPascalString; class operator Implicit(Value: USystemChar): TUPascalString; class operator Implicit(Value: TUPascalString): USystemString; class operator Implicit(Value: TUPascalString): Variant; class operator Explicit(Value: TUPascalString): RawByteString; class operator Explicit(Value: TUPascalString): TPascalString; class operator Explicit(Value: TUPascalString): USystemString; class operator Explicit(Value: TUPascalString): Variant; class operator Explicit(Value: USystemString): TUPascalString; class operator Explicit(Value: Variant): TUPascalString; class operator Explicit(Value: USystemChar): TUPascalString; {$ENDIF} function Copy(index, Count: NativeInt): TUPascalString; function Same(const p: PUPascalString): Boolean; overload; function Same(const t: TUPascalString): Boolean; overload; function Same(const t1, t2: TUPascalString): Boolean; overload; function Same(const t1, t2, t3: TUPascalString): Boolean; overload; function Same(const t1, t2, t3, t4: TUPascalString): Boolean; overload; function Same(const t1, t2, t3, t4, t5: TUPascalString): Boolean; overload; function Same(const IgnoreCase: Boolean; const t: TUPascalString): Boolean; overload; function ComparePos(const Offset: Integer; const p: PUPascalString): Boolean; overload; function ComparePos(const Offset: Integer; const t: TUPascalString): Boolean; overload; function GetPos(const s: TUPascalString; const Offset: Integer = 1): Integer; overload; function GetPos(const s: PUPascalString; const Offset: Integer = 1): Integer; overload; function Exists(c: USystemChar): Boolean; overload; function Exists(c: array of USystemChar): Boolean; overload; function Exists(const s: TUPascalString): Boolean; overload; function GetCharCount(c: USystemChar): Integer; function hash: TUHash; function Hash64: TUHash64; property Last: USystemChar read GetLast write SetLast; property First: USystemChar read GetFirst write SetFirst; procedure DeleteLast; procedure DeleteFirst; procedure Delete(idx, cnt: Integer); procedure Clear; procedure Append(t: TUPascalString); overload; procedure Append(c: USystemChar); overload; procedure Append(const Fmt: SystemString; const Args: array of const); overload; function GetString(bPos, ePos: NativeInt): TUPascalString; procedure Insert(AText: USystemString; idx: Integer); procedure FastAsText(var output: USystemString); procedure FastGetBytes(var output: TBytes); property Text: USystemString read GetText write SetText; function LowerText: USystemString; function UpperText: USystemString; function Invert: TUPascalString; function TrimChar(const Chars: TUPascalString): TUPascalString; function DeleteChar(const Chars: TUPascalString): TUPascalString; overload; function DeleteChar(const Chars: TUOrdChars): TUPascalString; overload; function ReplaceChar(const Chars: TUPascalString; const newChar: USystemChar): TUPascalString; overload; function ReplaceChar(const Chars, newChar: USystemChar): TUPascalString; overload; function ReplaceChar(const Chars: TUOrdChars; const newChar: USystemChar): TUPascalString; overload; function BuildPlatformPChar: Pointer; class procedure FreePlatformPChar(p: Pointer); static; class function RandomString(L_: Integer): TUPascalString; static; { https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm } function SmithWaterman(const p: PUPascalString): Double; overload; function SmithWaterman(const s: TUPascalString): Double; overload; property Len: Integer read GetLen write SetLen; property L: Integer read GetLen write SetLen; property Chars[index: Integer]: USystemChar read GetChars write SetChars; default; property UpperChar[index: Integer]: USystemChar read GetUpperChar write SetUpperChar; property LowerChar[index: Integer]: USystemChar read GetLowerChar write SetLowerChar; property Bytes: TBytes read GetBytes write SetBytes; // UTF8 property PlatformBytes: TBytes read GetPlatformBytes write SetPlatformBytes; // system default property ANSI: TBytes read GetANSI write SetANSI; // Ansi Bytes function BOMBytes: TBytes; end; TUArrayPascalString = array of TUPascalString; PUArrayPascalString = ^TUArrayPascalString; TUArrayPascalStringPtr = array of PUPascalString; PUArrayPascalStringPtr = ^TUArrayPascalStringPtr; TUPStr = TUPascalString; function UCharIn(c: USystemChar; const SomeChars: array of USystemChar): Boolean; overload; function UCharIn(c: USystemChar; const SomeChar: USystemChar): Boolean; overload; function UCharIn(c: USystemChar; const s: TUPascalString): Boolean; overload; function UCharIn(c: USystemChar; const p: PUPascalString): Boolean; overload; function UCharIn(c: USystemChar; const SomeCharsets: TUOrdChars): Boolean; overload; function UCharIn(c: USystemChar; const SomeCharset: TUOrdChar): Boolean; overload; function UCharIn(c: USystemChar; const SomeCharsets: TUOrdChars; const SomeChars: TUPascalString): Boolean; overload; function UCharIn(c: USystemChar; const SomeCharsets: TUOrdChars; const p: PUPascalString): Boolean; overload; function UFastHashPSystemString(const s: PSystemString): TUHash; overload; function UFastHash64PSystemString(const s: PSystemString): TUHash64; overload; function UFastHashSystemString(const s: SystemString): TUHash; overload; function UFastHash64SystemString(const s: SystemString): TUHash64; overload; function UFastHashPPascalString(const s: PPascalString): TUHash; function UFastHash64PPascalString(const s: PPascalString): TUHash64; function UFormat(const Fmt: USystemString; const Args: array of const): USystemString; {$IFDEF FPC} operator := (const s: Variant)r: TUPascalString; operator := (const s: AnsiString)r: TUPascalString; operator := (const s: RawByteString)r: TUPascalString; operator := (const s: UnicodeString)r: TUPascalString; operator := (const s: WideString)r: TUPascalString; operator := (const s: ShortString)r: TUPascalString; operator := (const c: USystemChar)r: TUPascalString; operator := (const c: TPascalString)r: TUPascalString; operator := (const s: TUPascalString)r: AnsiString; operator := (const s: TUPascalString)r: RawByteString; operator := (const s: TUPascalString)r: UnicodeString; operator := (const s: TUPascalString)r: WideString; operator := (const s: TUPascalString)r: ShortString; operator := (const s: TUPascalString)r: Variant; operator := (const s: TUPascalString)r: TPascalString; operator = (const a: TUPascalString; const b: TUPascalString): Boolean; operator <> (const a: TUPascalString; const b: TUPascalString): Boolean; operator > (const a: TUPascalString; const b: TUPascalString): Boolean; operator >= (const a: TUPascalString; const b: TUPascalString): Boolean; operator < (const a: TUPascalString; const b: TUPascalString): Boolean; operator <= (const a: TUPascalString; const b: TUPascalString): Boolean; operator + (const a: TUPascalString; const b: TUPascalString): TUPascalString; operator + (const a: TUPascalString; const b: USystemString): TUPascalString; operator + (const a: USystemString; const b: TUPascalString): TUPascalString; operator + (const a: TUPascalString; const b: USystemChar): TUPascalString; operator + (const a: USystemChar; const b: TUPascalString): TUPascalString; {$ENDIF} { https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm } // short string likeness and out diff function USmithWatermanCompare(const seq1, seq2: PUPascalString; var diff1, diff2: TUPascalString; const NoDiffChar: Boolean; const diffChar: USystemChar): Double; overload; function USmithWatermanCompare(const seq1, seq2: PUPascalString; var diff1, diff2: TUPascalString): Double; overload; function USmithWatermanCompare(const seq1, seq2: TUPascalString; var diff1, diff2: TUPascalString; const NoDiffChar: Boolean; const diffChar: USystemChar): Double; overload; function USmithWatermanCompare(const seq1, seq2: TUPascalString; var diff1, diff2: TUPascalString): Double; overload; // short string likeness function USmithWatermanCompare(const seq1, seq2: PUPascalString; out Same, Diff: Integer): Double; overload; function USmithWatermanCompare(const seq1, seq2: PUPascalString): Double; overload; function USmithWatermanCompare(const seq1, seq2: TUPascalString): Double; overload; function USmithWatermanCompare(const seq1: TUArrayPascalString; const seq2: TUPascalString): Double; overload; // memory likeness function USmithWatermanCompare(const seq1: Pointer; siz1: Integer; const seq2: Pointer; siz2: Integer; out Same, Diff: Integer): Double; overload; function USmithWatermanCompare(const seq1: Pointer; siz1: Integer; const seq2: Pointer; siz2: Integer): Double; overload; // long string likeness function USmithWatermanCompareLongString(const t1, t2: TUPascalString; const MinDiffCharWithPeerLine: Integer; out Same, Diff: Integer): Double; overload; function USmithWatermanCompareLongString(const t1, t2: TUPascalString): Double; overload; var USystemCharSize: NativeInt = SizeOf(USystemChar); {$IFDEF CPU64} UMaxSmithWatermanMatrix: NativeInt = 10000 * 10; {$ELSE} UMaxSmithWatermanMatrix: NativeInt = 8192; {$ENDIF} const {$IFDEF FirstCharInZero} UFirstCharPos = 0; {$ELSE} UFirstCharPos = 1; {$ENDIF} implementation uses SysUtils, Variants; procedure CombineCharsPP(const c1, c2: TUArrayChar; var output: TUArrayChar); var LL, rl: Integer; begin LL := length(c1); rl := length(c2); SetLength(output, LL + rl); if LL > 0 then CopyPtr(@c1[0], @output[0], LL * USystemCharSize); if rl > 0 then CopyPtr(@c2[0], @output[LL], rl * USystemCharSize); end; procedure CombineCharsSP(const c1: USystemString; const c2: TUArrayChar; var output: TUArrayChar); var LL, rl: Integer; begin LL := length(c1); rl := length(c2); SetLength(output, LL + rl); if LL > 0 then CopyPtr(@c1[UFirstCharPos], @output[0], LL * USystemCharSize); if rl > 0 then CopyPtr(@c2[0], @output[LL], rl * USystemCharSize); end; procedure CombineCharsPS(const c1: TUArrayChar; const c2: USystemString; var output: TUArrayChar); var LL, rl: Integer; begin LL := length(c1); rl := length(c2); SetLength(output, LL + rl); if LL > 0 then CopyPtr(@c1[0], @output[0], LL * USystemCharSize); if rl > 0 then CopyPtr(@c2[UFirstCharPos], @output[LL], rl * USystemCharSize); end; procedure CombineCharsCP(const c1: USystemChar; const c2: TUArrayChar; var output: TUArrayChar); var rl: Integer; begin rl := length(c2); SetLength(output, rl + 1); output[0] := c1; if rl > 0 then CopyPtr(@c2[0], @output[1], rl * USystemCharSize); end; procedure CombineCharsPC(const c1: TUArrayChar; const c2: USystemChar; var output: TUArrayChar); var LL: Integer; begin LL := length(c1); SetLength(output, LL + 1); if LL > 0 then CopyPtr(@c1[0], @output[0], LL * USystemCharSize); output[LL] := c2; end; function UCharIn(c: USystemChar; const SomeChars: array of USystemChar): Boolean; var AChar: USystemChar; begin Result := True; for AChar in SomeChars do if AChar = c then Exit; Result := False; end; function UCharIn(c: USystemChar; const SomeChar: USystemChar): Boolean; begin Result := c = SomeChar; end; function UCharIn(c: USystemChar; const s: TUPascalString): Boolean; begin Result := s.Exists(c); end; function UCharIn(c: USystemChar; const p: PUPascalString): Boolean; begin Result := p^.Exists(c); end; function UCharIn(c: USystemChar; const SomeCharset: TUOrdChar): Boolean; const ord0 = Ord('0'); ord1 = Ord('1'); ord9 = Ord('9'); ordLA = Ord('a'); ordHA = Ord('A'); ordLF = Ord('f'); ordHF = Ord('F'); ordLZ = Ord('z'); ordHZ = Ord('Z'); var v: Word; begin v := Ord(c); case SomeCharset of uc0to9: Result := (v >= ord0) and (v <= ord9); uc1to9: Result := (v >= ord1) and (v <= ord9); uc0to32: Result := ((v >= 0) and (v <= 32)); uc0to32no10: Result := ((v >= 0) and (v <= 32) and (v <> 10)); ucLoAtoF: Result := (v >= ordLA) and (v <= ordLF); ucHiAtoF: Result := (v >= ordHA) and (v <= ordHF); ucLoAtoZ: Result := (v >= ordLA) and (v <= ordLZ); ucHiAtoZ: Result := (v >= ordHA) and (v <= ordHZ); ucHex: Result := ((v >= ordLA) and (v <= ordLF)) or ((v >= ordHA) and (v <= ordHF)) or ((v >= ord0) and (v <= ord9)); ucAtoF: Result := ((v >= ordLA) and (v <= ordLF)) or ((v >= ordHA) and (v <= ordHF)); ucAtoZ: Result := ((v >= ordLA) and (v <= ordLZ)) or ((v >= ordHA) and (v <= ordHZ)); ucVisibled: Result := (v <= $20) and (v <= $7E); else Result := False; end; end; function UCharIn(c: USystemChar; const SomeCharsets: TUOrdChars): Boolean; var i: TUOrdChar; begin Result := True; for i in SomeCharsets do if UCharIn(c, i) then Exit; Result := False; end; function UCharIn(c: USystemChar; const SomeCharsets: TUOrdChars; const SomeChars: TUPascalString): Boolean; begin if UCharIn(c, SomeCharsets) then Result := True else Result := UCharIn(c, SomeChars); end; function UCharIn(c: USystemChar; const SomeCharsets: TUOrdChars; const p: PUPascalString): Boolean; begin if UCharIn(c, SomeCharsets) then Result := True else Result := UCharIn(c, p); end; function UFastHashPSystemString(const s: PSystemString): TUHash; var i: Integer; c: USystemChar; begin Result := 0; {$IFDEF FirstCharInZero} for i := 0 to length(s^) - 1 do {$ELSE} for i := 1 to length(s^) do {$ENDIF} begin c := s^[i]; if UCharIn(c, ucHiAtoZ) then inc(c, 32); Result := ((Result shl 7) or (Result shr 25)) + TUHash(c); end; end; function UFastHash64PSystemString(const s: PSystemString): TUHash64; var i: Integer; c: USystemChar; begin Result := 0; {$IFDEF FirstCharInZero} for i := 0 to length(s^) - 1 do {$ELSE} for i := 1 to length(s^) do {$ENDIF} begin c := s^[i]; if UCharIn(c, ucHiAtoZ) then inc(c, 32); Result := ((Result shl 7) or (Result shr 57)) + TUHash64(c); end; end; function UFastHashSystemString(const s: SystemString): TUHash; begin Result := UFastHashPSystemString(@s); end; function UFastHash64SystemString(const s: SystemString): TUHash64; begin Result := UFastHash64PSystemString(@s); end; function UFastHashPPascalString(const s: PPascalString): TUHash; var i: Integer; c: USystemChar; begin Result := 0; for i := 1 to s^.Len do begin c := s^[i]; if UCharIn(c, ucHiAtoZ) then inc(c, 32); Result := ((Result shl 7) or (Result shr 25)) + TUHash(c); end; end; function UFastHash64PPascalString(const s: PPascalString): TUHash64; var i: Integer; c: USystemChar; begin Result := 0; for i := 1 to s^.Len do begin c := s^[i]; if UCharIn(c, ucHiAtoZ) then inc(c, 32); Result := ((Result shl 7) or (Result shr 57)) + TUHash64(c); end; end; function UFormat(const Fmt: USystemString; const Args: array of const): USystemString; begin try {$IFDEF FPC} Result := UnicodeFormat(Fmt, Args); {$ELSE FPC} Result := Format(Fmt, Args); {$ENDIF FPC} except Result := Fmt; end; end; function BytesOfPascalString(const s: TUPascalString): TBytes; begin Result := s.Bytes; end; function PascalStringOfBytes(const s: TBytes): TUPascalString; begin Result.Bytes := s; end; function GetSWMVMemory(const xLen, yLen: NativeInt): Pointer; inline; { optimized matrix performance } begin Result := System.AllocMem((xLen + 1) * (yLen + 1) * SizeOf(NativeInt)); end; function GetSWMV(const p: Pointer; const w, x, y: NativeInt): NativeInt; inline; { optimized matrix performance } begin Result := PNativeInt(nativeUInt(p) + ((x + y * (w + 1)) * SizeOf(NativeInt)))^; end; procedure SetSWMV(const p: Pointer; const w, x, y: NativeInt; const v: NativeInt); inline; { optimized matrix performance } begin PNativeInt(nativeUInt(p) + ((x + y * (w + 1)) * SizeOf(NativeInt)))^ := v; end; function GetMax(const i1, i2: NativeInt): NativeInt; inline; begin if i1 > i2 then Result := i1 else Result := i2; end; const SmithWaterman_MatchOk = 1; mismatch_penalty = -1; gap_penalty = -1; function USmithWatermanCompare(const seq1, seq2: PUPascalString; var diff1, diff2: TUPascalString; const NoDiffChar: Boolean; const diffChar: USystemChar): Double; function InlineMatch(alphaC, betaC: USystemChar; const diffC: USystemChar): Integer; inline; begin if UCharIn(alphaC, ucLoAtoZ) then dec(alphaC, 32); if UCharIn(betaC, ucLoAtoZ) then dec(betaC, 32); if alphaC = betaC then Result := SmithWaterman_MatchOk else if (alphaC = diffC) or (betaC = diffC) then Result := gap_penalty else Result := mismatch_penalty; end; var swMatrixPtr: Pointer; i, j, L1, l2: NativeInt; matched, deleted, inserted: NativeInt; score_current, score_diagonal, score_left, score_right: NativeInt; identity: NativeInt; align1, align2: TUPascalString; begin L1 := seq1^.Len; l2 := seq2^.Len; if (L1 = 0) or (l2 = 0) or (L1 > UMaxSmithWatermanMatrix) or (l2 > UMaxSmithWatermanMatrix) then begin Result := -1; Exit; end; { fast build matrix } swMatrixPtr := GetSWMVMemory(L1, l2); if swMatrixPtr = nil then begin diff1 := ''; diff2 := ''; Result := -1; Exit; end; i := 0; while i <= L1 do begin SetSWMV(swMatrixPtr, L1, i, 0, gap_penalty * i); inc(i); end; j := 0; while j <= l2 do begin SetSWMV(swMatrixPtr, L1, 0, j, gap_penalty * j); inc(j); end; { compute matrix } i := 1; while i <= L1 do begin j := 1; while j <= l2 do begin matched := GetSWMV(swMatrixPtr, L1, i - 1, j - 1) + InlineMatch(seq1^[i], seq2^[j], diffChar); deleted := GetSWMV(swMatrixPtr, L1, i - 1, j) + gap_penalty; inserted := GetSWMV(swMatrixPtr, L1, i, j - 1) + gap_penalty; SetSWMV(swMatrixPtr, L1, i, j, GetMax(matched, GetMax(deleted, inserted))); inc(j); end; inc(i); end; { compute align } i := L1; j := l2; align1 := ''; align2 := ''; identity := 0; while (i > 0) and (j > 0) do begin score_current := GetSWMV(swMatrixPtr, L1, i, j); score_diagonal := GetSWMV(swMatrixPtr, L1, i - 1, j - 1); score_left := GetSWMV(swMatrixPtr, L1, i - 1, j); score_right := GetSWMV(swMatrixPtr, L1, i, j - 1); matched := InlineMatch(seq1^[i], seq2^[j], diffChar); if score_current = score_diagonal + matched then begin if matched = SmithWaterman_MatchOk then begin inc(identity); align1.Append(seq1^[i]); align2.Append(seq2^[j]); end else if NoDiffChar then begin align1.Append(diffChar); align2.Append(diffChar); end else begin align1.Append(seq1^[i]); align2.Append(seq2^[j]); end; dec(i); dec(j); end else if score_current = score_left + gap_penalty then begin if NoDiffChar then align1.Append(diffChar) else align1.Append(seq1^[i]); align2.Append(diffChar); dec(i); end else if score_current = score_right + gap_penalty then begin if NoDiffChar then align2.Append(diffChar) else align2.Append(seq2^[j]); align1.Append(diffChar); dec(j); end else raise Exception.Create('matrix error'); // matrix debug time end; System.FreeMemory(swMatrixPtr); while i > 0 do begin if NoDiffChar then align1.Append(diffChar) else align1.Append(seq1^[i]); align2.Append(diffChar); dec(i); end; while j > 0 do begin if NoDiffChar then align2.Append(diffChar) else align2.Append(seq2^[j]); align1.Append(diffChar); dec(j); end; if identity > 0 then Result := identity / align1.Len else Result := -1; diff1 := align1.Invert; diff2 := align2.Invert; end; function USmithWatermanCompare(const seq1, seq2: PUPascalString; var diff1, diff2: TUPascalString): Double; begin Result := USmithWatermanCompare(seq1, seq2, diff1, diff2, False, '-'); end; function USmithWatermanCompare(const seq1, seq2: TUPascalString; var diff1, diff2: TUPascalString; const NoDiffChar: Boolean; const diffChar: USystemChar): Double; begin Result := USmithWatermanCompare(@seq1, @seq2, diff1, diff2, NoDiffChar, diffChar); end; function USmithWatermanCompare(const seq1, seq2: TUPascalString; var diff1, diff2: TUPascalString): Double; begin Result := USmithWatermanCompare(seq1, seq2, diff1, diff2, False, '-'); end; function USmithWatermanCompare(const seq1, seq2: PUPascalString; out Same, Diff: Integer): Double; function InlineMatch(alphaC, betaC: USystemChar): NativeInt; inline; begin if UCharIn(alphaC, ucLoAtoZ) then dec(alphaC, 32); if UCharIn(betaC, ucLoAtoZ) then dec(betaC, 32); if alphaC = betaC then Result := SmithWaterman_MatchOk else Result := mismatch_penalty; end; var swMatrixPtr: Pointer; i, j, L1, l2: NativeInt; matched, deleted, inserted: NativeInt; score_current, score_diagonal, score_left, score_right: NativeInt; identity, L_: NativeInt; begin L1 := seq1^.Len; l2 := seq2^.Len; if (L1 = 0) or (l2 = 0) or (L1 > UMaxSmithWatermanMatrix) or (l2 > UMaxSmithWatermanMatrix) then begin Result := -1; Same := 0; Diff := L1 + l2; Exit; end; { fast build matrix } swMatrixPtr := GetSWMVMemory(L1, l2); if swMatrixPtr = nil then begin Result := -1; Exit; end; i := 0; while i <= L1 do begin SetSWMV(swMatrixPtr, L1, i, 0, gap_penalty * i); inc(i); end; j := 0; while j <= l2 do begin SetSWMV(swMatrixPtr, L1, 0, j, gap_penalty * j); inc(j); end; { compute matrix } i := 1; while i <= L1 do begin j := 1; while j <= l2 do begin matched := GetSWMV(swMatrixPtr, L1, i - 1, j - 1) + InlineMatch(seq1^[i], seq2^[j]); deleted := GetSWMV(swMatrixPtr, L1, i - 1, j) + gap_penalty; inserted := GetSWMV(swMatrixPtr, L1, i, j - 1) + gap_penalty; SetSWMV(swMatrixPtr, L1, i, j, GetMax(matched, GetMax(deleted, inserted))); inc(j); end; inc(i); end; { compute align } i := L1; j := l2; identity := 0; L_ := 0; while (i > 0) and (j > 0) do begin score_current := GetSWMV(swMatrixPtr, L1, i, j); score_diagonal := GetSWMV(swMatrixPtr, L1, i - 1, j - 1); score_left := GetSWMV(swMatrixPtr, L1, i - 1, j); score_right := GetSWMV(swMatrixPtr, L1, i, j - 1); matched := InlineMatch(seq1^[i], seq2^[j]); if score_current = score_diagonal + matched then begin if matched = SmithWaterman_MatchOk then inc(identity); inc(L_); dec(i); dec(j); end else if score_current = score_left + gap_penalty then begin inc(L_); dec(i); end else if score_current = score_right + gap_penalty then begin inc(L_); dec(j); end else raise Exception.Create('matrix error'); // matrix debug time end; System.FreeMemory(swMatrixPtr); if identity > 0 then begin Result := identity / (L_ + i + j); Same := identity; Diff := (L_ + i + j) - identity; end else begin Result := -1; Same := 0; Diff := L_ + i + j; end; end; function USmithWatermanCompare(const seq1, seq2: PUPascalString): Double; var Same, Diff: Integer; begin Result := USmithWatermanCompare(seq1, seq2, Same, Diff); end; function USmithWatermanCompare(const seq1, seq2: TUPascalString): Double; begin Result := USmithWatermanCompare(@seq1, @seq2); end; function USmithWatermanCompare(const seq1: TUArrayPascalString; const seq2: TUPascalString): Double; var i: Integer; r: Double; begin Result := -1; for i := 0 to length(seq1) - 1 do begin r := USmithWatermanCompare(seq1[i], seq2); if r > Result then Result := r; end; end; function USmithWatermanCompare(const seq1: Pointer; siz1: Integer; const seq2: Pointer; siz2: Integer; out Same, Diff: Integer): Double; function InlineMatch(const alphaB, betaB: Byte): NativeInt; inline; begin if alphaB = betaB then Result := SmithWaterman_MatchOk else Result := mismatch_penalty; end; var swMatrixPtr: Pointer; i, j, L1, l2: NativeInt; matched, deleted, inserted: NativeInt; score_current, score_diagonal, score_left, score_right: NativeInt; identity, L_: NativeInt; begin L1 := siz1; l2 := siz2; if (L1 = 0) or (l2 = 0) or (L1 > UMaxSmithWatermanMatrix) or (l2 > UMaxSmithWatermanMatrix) then begin Result := -1; Same := 0; Diff := L1 + l2; Exit; end; { fast build matrix } swMatrixPtr := GetSWMVMemory(L1, l2); if swMatrixPtr = nil then begin Result := -1; Exit; end; i := 0; while i <= L1 do begin SetSWMV(swMatrixPtr, L1, i, 0, gap_penalty * i); inc(i); end; j := 0; while j <= l2 do begin SetSWMV(swMatrixPtr, L1, 0, j, gap_penalty * j); inc(j); end; { compute matrix } i := 1; while i <= L1 do begin j := 1; while j <= l2 do begin matched := GetSWMV(swMatrixPtr, L1, i - 1, j - 1) + InlineMatch(PByte(nativeUInt(seq1) + (i - 1))^, PByte(nativeUInt(seq2) + (j - 1))^); deleted := GetSWMV(swMatrixPtr, L1, i - 1, j) + gap_penalty; inserted := GetSWMV(swMatrixPtr, L1, i, j - 1) + gap_penalty; SetSWMV(swMatrixPtr, L1, i, j, GetMax(matched, GetMax(deleted, inserted))); inc(j); end; inc(i); end; { compute align } i := L1; j := l2; identity := 0; L_ := 0; while (i > 0) and (j > 0) do begin score_current := GetSWMV(swMatrixPtr, L1, i, j); score_diagonal := GetSWMV(swMatrixPtr, L1, i - 1, j - 1); score_left := GetSWMV(swMatrixPtr, L1, i - 1, j); score_right := GetSWMV(swMatrixPtr, L1, i, j - 1); matched := InlineMatch(PByte(nativeUInt(seq1) + (i - 1))^, PByte(nativeUInt(seq2) + (j - 1))^); if score_current = score_diagonal + matched then begin if matched = SmithWaterman_MatchOk then inc(identity); inc(L_); dec(i); dec(j); end else if score_current = score_left + gap_penalty then begin inc(L_); dec(i); end else if score_current = score_right + gap_penalty then begin inc(L_); dec(j); end else raise Exception.Create('matrix error'); // matrix debug time end; System.FreeMemory(swMatrixPtr); if identity > 0 then begin Result := identity / (L_ + i + j); Same := identity; Diff := (L_ + i + j) - identity; end else begin Result := -1; Same := 0; Diff := L_ + i + j; end; end; function USmithWatermanCompare(const seq1: Pointer; siz1: Integer; const seq2: Pointer; siz2: Integer): Double; var Same, Diff: Integer; begin Result := USmithWatermanCompare(seq1, siz1, seq2, siz2, Same, Diff); end; function USmithWatermanCompareLongString(const t1, t2: TUPascalString; const MinDiffCharWithPeerLine: Integer; out Same, Diff: Integer): Double; type PSRec = ^TSRec; TSRec = record s: TUPascalString; end; procedure _FillText(psPtr: PUPascalString; outLst: TCoreClassList); var L_, i: Integer; n: TUPascalString; p: PSRec; begin L_ := psPtr^.Len; i := 1; n := ''; while i <= L_ do begin if UCharIn(psPtr^[i], [#13, #10]) then begin n := n.DeleteChar(#32#9); if n.Len > 0 then begin new(p); p^.s := n; outLst.Add(p); n := ''; end; repeat inc(i); until (i > L_) or (not UCharIn(psPtr^[i], [#13, #10, #32, #9])); end else begin n.Append(psPtr^[i]); inc(i); end; end; n := n.DeleteChar(#32#9); if n.Len > 0 then begin new(p); p^.s := n; outLst.Add(p); end; end; function InlineMatch(const alpha, beta: PSRec; const MinDiffCharWithPeerLine: Integer; var cSame, cDiff: Integer): NativeInt; inline; begin if USmithWatermanCompare(@alpha^.s, @beta^.s, cSame, cDiff) > 0 then begin if cDiff < MinDiffCharWithPeerLine then Result := SmithWaterman_MatchOk else Result := mismatch_penalty; end else Result := mismatch_penalty; end; var lst1, lst2: TCoreClassList; procedure _Init; begin lst1 := TCoreClassList.Create; lst2 := TCoreClassList.Create; _FillText(@t1, lst1); _FillText(@t2, lst2); end; procedure _Free; var i: Integer; begin for i := 0 to lst1.Count - 1 do Dispose(PSRec(lst1[i])); for i := 0 to lst2.Count - 1 do Dispose(PSRec(lst2[i])); DisposeObject([lst1, lst2]); end; var swMatrixPtr: Pointer; i, j, L1, l2: NativeInt; matched, deleted, inserted: NativeInt; score_current, score_diagonal, score_left, score_right: NativeInt; cSame, cDiff, TotalSame, TotalDiff: Integer; begin _Init; L1 := lst1.Count; l2 := lst2.Count; if (L1 = 0) or (l2 = 0) or (L1 > UMaxSmithWatermanMatrix) or (l2 > UMaxSmithWatermanMatrix) then begin Result := -1; Same := 0; Diff := L1 + l2; _Free; Exit; end; { fast build matrix } swMatrixPtr := GetSWMVMemory(L1, l2); if swMatrixPtr = nil then begin Result := -1; _Free; Exit; end; i := 0; while i <= L1 do begin SetSWMV(swMatrixPtr, L1, i, 0, gap_penalty * i); inc(i); end; j := 0; while j <= l2 do begin SetSWMV(swMatrixPtr, L1, 0, j, gap_penalty * j); inc(j); end; { compute matrix } i := 1; while i <= L1 do begin j := 1; while j <= l2 do begin matched := GetSWMV(swMatrixPtr, L1, i - 1, j - 1) + InlineMatch(PSRec(lst1[i - 1]), PSRec(lst2[j - 1]), MinDiffCharWithPeerLine, cSame, cDiff); deleted := GetSWMV(swMatrixPtr, L1, i - 1, j) + gap_penalty; inserted := GetSWMV(swMatrixPtr, L1, i, j - 1) + gap_penalty; SetSWMV(swMatrixPtr, L1, i, j, GetMax(matched, GetMax(deleted, inserted))); inc(j); end; inc(i); end; { compute align } i := L1; j := l2; TotalSame := 0; TotalDiff := 0; while (i > 0) and (j > 0) do begin score_current := GetSWMV(swMatrixPtr, L1, i, j); score_diagonal := GetSWMV(swMatrixPtr, L1, i - 1, j - 1); score_left := GetSWMV(swMatrixPtr, L1, i - 1, j); score_right := GetSWMV(swMatrixPtr, L1, i, j - 1); matched := InlineMatch(PSRec(lst1[i - 1]), PSRec(lst2[j - 1]), MinDiffCharWithPeerLine, cSame, cDiff); inc(TotalSame, cSame); inc(TotalDiff, cDiff); if score_current = score_diagonal + matched then begin dec(i); dec(j); end else if score_current = score_left + gap_penalty then begin dec(i); end else if score_current = score_right + gap_penalty then begin dec(j); end else raise Exception.Create('matrix error'); // matrix debug time end; System.FreeMemory(swMatrixPtr); _Free; if TotalSame > 0 then begin Result := TotalSame / (TotalSame + TotalDiff); Same := TotalSame; Diff := TotalDiff; end else begin Result := -1; Same := 0; Diff := t2.Len + t1.Len; end; end; function USmithWatermanCompareLongString(const t1, t2: TUPascalString): Double; var Same, Diff: Integer; begin Result := USmithWatermanCompareLongString(t1, t2, 5, Same, Diff); end; {$IFDEF FPC} operator := (const s: Variant)r: TUPascalString; begin r.Text := s; end; operator := (const s: AnsiString)r: TUPascalString; begin r.Text := s; end; operator := (const s: RawByteString)r: TUPascalString; begin r.Text := s; end; operator := (const s: UnicodeString)r: TUPascalString; begin r.Text := s; end; operator := (const s: WideString)r: TUPascalString; begin r.Text := s; end; operator := (const s: ShortString)r: TUPascalString; begin r.Text := s; end; operator := (const c: USystemChar)r: TUPascalString; begin r.Text := c; end; operator := (const c: TPascalString)r: TUPascalString; begin Result.Bytes := c.Bytes; end; operator := (const s: TUPascalString)r: AnsiString; begin r := s.Text; end; operator := (const s: TUPascalString)r: RawByteString; begin r := s.Text; end; operator := (const s: TUPascalString)r: UnicodeString; begin r := s.Text; end; operator := (const s: TUPascalString)r: WideString; begin r := s.Text; end; operator := (const s: TUPascalString)r: ShortString; begin r := s.Text; end; operator := (const s: TUPascalString)r: Variant; begin r := s.Text; end; operator := (const s: TUPascalString)r: TPascalString; begin Result.Bytes := s.Bytes; end; operator = (const a: TUPascalString; const b: TUPascalString): Boolean; begin Result := a.Text = b.Text; end; operator <> (const a: TUPascalString; const b: TUPascalString): Boolean; begin Result := a.Text <> b.Text; end; operator > (const a: TUPascalString; const b: TUPascalString): Boolean; begin Result := a.Text > b.Text; end; operator >= (const a: TUPascalString; const b: TUPascalString): Boolean; begin Result := a.Text >= b.Text; end; operator < (const a: TUPascalString; const b: TUPascalString): Boolean; begin Result := a.Text < b.Text; end; operator <= (const a: TUPascalString; const b: TUPascalString): Boolean; begin Result := a.Text <= b.Text; end; operator + (const a: TUPascalString; const b: TUPascalString): TUPascalString; begin CombineCharsPP(a.buff, b.buff, Result.buff); end; operator + (const a: TUPascalString; const b: USystemString): TUPascalString; begin CombineCharsPS(a.buff, b, Result.buff); end; operator + (const a: USystemString; const b: TUPascalString): TUPascalString; begin CombineCharsSP(a, b.buff, Result.buff); end; operator + (const a: TUPascalString; const b: USystemChar): TUPascalString; begin CombineCharsPC(a.buff, b, Result.buff); end; operator + (const a: USystemChar; const b: TUPascalString): TUPascalString; begin CombineCharsCP(a, b.buff, Result.buff); end; {$ENDIF} function TUPascalString.GetText: USystemString; begin SetLength(Result, length(buff)); if length(buff) > 0 then CopyPtr(@buff[0], @Result[UFirstCharPos], length(buff) * USystemCharSize); end; procedure TUPascalString.SetText(const Value: USystemString); begin SetLength(buff, length(Value)); if length(buff) > 0 then CopyPtr(@Value[UFirstCharPos], @buff[0], length(buff) * USystemCharSize); end; function TUPascalString.GetLen: Integer; begin Result := length(buff); end; procedure TUPascalString.SetLen(const Value: Integer); begin SetLength(buff, Value); end; function TUPascalString.GetChars(index: Integer): USystemChar; begin if (index > length(buff)) or (index <= 0) then Result := #0 else Result := buff[index - 1]; end; procedure TUPascalString.SetChars(index: Integer; const Value: USystemChar); begin buff[index - 1] := Value; end; function TUPascalString.GetBytes: TBytes; begin SetLength(Result, 0); if length(buff) = 0 then Exit; {$IFDEF FPC} Result := SysUtils.TEncoding.UTF8.GetBytes(buff); {$ELSE} Result := SysUtils.TEncoding.UTF8.GetBytes(buff); {$ENDIF} end; procedure TUPascalString.SetBytes(const Value: TBytes); begin SetLength(buff, 0); if length(Value) = 0 then Exit; try Text := SysUtils.TEncoding.UTF8.GetString(Value); except SetPlatformBytes(Value); end; end; function TUPascalString.GetPlatformBytes: TBytes; begin SetLength(Result, 0); if length(buff) = 0 then Exit; {$IFDEF FPC} Result := SysUtils.TEncoding.Default.GetBytes(buff); {$ELSE} Result := SysUtils.TEncoding.Default.GetBytes(buff); {$ENDIF} end; procedure TUPascalString.SetPlatformBytes(const Value: TBytes); begin SetLength(buff, 0); if length(Value) = 0 then Exit; try Text := SysUtils.TEncoding.Default.GetString(Value); except SetLength(buff, 0); end; end; function TUPascalString.GetANSI: TBytes; begin SetLength(Result, 0); if length(buff) = 0 then Exit; {$IFDEF FPC} Result := SysUtils.TEncoding.ANSI.GetBytes(Text); {$ELSE} Result := SysUtils.TEncoding.ANSI.GetBytes(buff); {$ENDIF} end; procedure TUPascalString.SetANSI(const Value: TBytes); begin SetLength(buff, 0); if length(Value) = 0 then Exit; try Text := SysUtils.TEncoding.ANSI.GetString(Value); except SetLength(buff, 0); end; end; function TUPascalString.GetLast: USystemChar; begin if length(buff) > 0 then Result := buff[length(buff) - 1] else Result := #0; end; procedure TUPascalString.SetLast(const Value: USystemChar); begin buff[length(buff) - 1] := Value; end; function TUPascalString.GetFirst: USystemChar; begin if length(buff) > 0 then Result := buff[0] else Result := #0; end; procedure TUPascalString.SetFirst(const Value: USystemChar); begin buff[0] := Value; end; function TUPascalString.GetUpperChar(index: Integer): USystemChar; begin Result := GetChars(index); if CharIn(Result, cLoAtoZ) then Result := USystemChar(Word(Result) xor $0020); end; procedure TUPascalString.SetUpperChar(index: Integer; const Value: USystemChar); begin if CharIn(Value, cLoAtoZ) then SetChars(index, USystemChar(Word(Value) xor $0020)) else SetChars(index, Value); end; function TUPascalString.GetLowerChar(index: Integer): USystemChar; begin Result := GetChars(index); if CharIn(Result, cHiAtoZ) then Result := USystemChar(Word(Result) or $0020); end; procedure TUPascalString.SetLowerChar(index: Integer; const Value: USystemChar); begin if CharIn(Value, cHiAtoZ) then SetChars(index, USystemChar(Word(Value) or $0020)) else SetChars(index, Value); end; {$IFDEF DELPHI} class operator TUPascalString.Equal(const Lhs, Rhs: TUPascalString): Boolean; begin Result := (Lhs.Len = Rhs.Len) and (Lhs.Text = Rhs.Text); end; class operator TUPascalString.NotEqual(const Lhs, Rhs: TUPascalString): Boolean; begin Result := not(Lhs = Rhs); end; class operator TUPascalString.GreaterThan(const Lhs, Rhs: TUPascalString): Boolean; begin Result := Lhs.Text > Rhs.Text; end; class operator TUPascalString.GreaterThanOrEqual(const Lhs, Rhs: TUPascalString): Boolean; begin Result := Lhs.Text >= Rhs.Text; end; class operator TUPascalString.LessThan(const Lhs, Rhs: TUPascalString): Boolean; begin Result := Lhs.Text < Rhs.Text; end; class operator TUPascalString.LessThanOrEqual(const Lhs, Rhs: TUPascalString): Boolean; begin Result := Lhs.Text <= Rhs.Text; end; class operator TUPascalString.Add(const Lhs, Rhs: TUPascalString): TUPascalString; begin CombineCharsPP(Lhs.buff, Rhs.buff, Result.buff); end; class operator TUPascalString.Add(const Lhs: USystemString; const Rhs: TUPascalString): TUPascalString; begin CombineCharsSP(Lhs, Rhs.buff, Result.buff); end; class operator TUPascalString.Add(const Lhs: TUPascalString; const Rhs: USystemString): TUPascalString; begin CombineCharsPS(Lhs.buff, Rhs, Result.buff); end; class operator TUPascalString.Add(const Lhs: USystemChar; const Rhs: TUPascalString): TUPascalString; begin CombineCharsCP(Lhs, Rhs.buff, Result.buff); end; class operator TUPascalString.Add(const Lhs: TUPascalString; const Rhs: USystemChar): TUPascalString; begin CombineCharsPC(Lhs.buff, Rhs, Result.buff); end; class operator TUPascalString.Implicit(Value: RawByteString): TUPascalString; begin Result.Text := Value; end; class operator TUPascalString.Implicit(Value: TPascalString): TUPascalString; begin Result.Bytes := Value.Bytes; end; class operator TUPascalString.Implicit(Value: USystemString): TUPascalString; begin Result.Text := Value; end; class operator TUPascalString.Implicit(Value: USystemChar): TUPascalString; begin Result.Len := 1; Result.buff[0] := Value; end; class operator TUPascalString.Implicit(Value: TUPascalString): USystemString; begin Result := Value.Text; end; class operator TUPascalString.Implicit(Value: TUPascalString): Variant; begin Result := Value.Text; end; class operator TUPascalString.Explicit(Value: TUPascalString): RawByteString; begin Result := Value.Text; end; class operator TUPascalString.Explicit(Value: TUPascalString): TPascalString; begin Result.Bytes := Value.Bytes; end; class operator TUPascalString.Explicit(Value: TUPascalString): USystemString; begin Result := Value.Text; end; class operator TUPascalString.Explicit(Value: TUPascalString): Variant; begin Result := Value.Text; end; class operator TUPascalString.Explicit(Value: USystemString): TUPascalString; begin Result.Text := Value; end; class operator TUPascalString.Explicit(Value: Variant): TUPascalString; begin Result.Text := VarToStr(Value); end; class operator TUPascalString.Explicit(Value: USystemChar): TUPascalString; begin Result.Len := 1; Result.buff[0] := Value; end; {$ENDIF} function TUPascalString.Copy(index, Count: NativeInt): TUPascalString; var L_: NativeInt; begin L_ := length(buff); if (index - 1) + Count > L_ then Count := L_ - (index - 1); SetLength(Result.buff, Count); if Count > 0 then CopyPtr(@buff[index - 1], @Result.buff[0], USystemCharSize * Count); end; function TUPascalString.Same(const p: PUPascalString): Boolean; var i: Integer; s, d: USystemChar; begin Result := (p^.Len = Len); if not Result then Exit; for i := 0 to Len - 1 do begin s := buff[i]; if UCharIn(s, ucHiAtoZ) then inc(s, 32); d := p^.buff[i]; if UCharIn(d, ucHiAtoZ) then inc(d, 32); if s <> d then Exit(False); end; end; function TUPascalString.Same(const t: TUPascalString): Boolean; var i: Integer; s, d: USystemChar; begin Result := (t.Len = Len); if not Result then Exit; for i := 0 to Len - 1 do begin s := buff[i]; if UCharIn(s, ucHiAtoZ) then inc(s, 32); d := t.buff[i]; if UCharIn(d, ucHiAtoZ) then inc(d, 32); if s <> d then Exit(False); end; end; function TUPascalString.Same(const t1, t2: TUPascalString): Boolean; begin Result := Same(@t1) or Same(@t2); end; function TUPascalString.Same(const t1, t2, t3: TUPascalString): Boolean; begin Result := Same(@t1) or Same(@t2) or Same(@t3); end; function TUPascalString.Same(const t1, t2, t3, t4: TUPascalString): Boolean; begin Result := Same(@t1) or Same(@t2) or Same(@t3) or Same(@t4); end; function TUPascalString.Same(const t1, t2, t3, t4, t5: TUPascalString): Boolean; begin Result := Same(@t1) or Same(@t2) or Same(@t3) or Same(@t4) or Same(@t5); end; function TUPascalString.Same(const IgnoreCase: Boolean; const t: TUPascalString): Boolean; var i: Integer; s, d: USystemChar; begin Result := (t.Len = Len); if not Result then Exit; for i := 0 to Len - 1 do begin s := buff[i]; if IgnoreCase then if UCharIn(s, ucHiAtoZ) then inc(s, 32); d := t.buff[i]; if IgnoreCase then if UCharIn(d, ucHiAtoZ) then inc(d, 32); if s <> d then Exit(False); end; end; function TUPascalString.ComparePos(const Offset: Integer; const p: PUPascalString): Boolean; var i, L_: Integer; sourChar, destChar: USystemChar; begin Result := False; i := 1; L_ := p^.Len; if (Offset + L_ - 1) > Len then Exit; while i <= L_ do begin sourChar := GetChars(Offset + i - 1); destChar := p^[i]; if UCharIn(sourChar, ucLoAtoZ) then dec(sourChar, 32); if UCharIn(destChar, ucLoAtoZ) then dec(destChar, 32); if sourChar <> destChar then Exit; inc(i); end; Result := True; end; function TUPascalString.ComparePos(const Offset: Integer; const t: TUPascalString): Boolean; var i, L_: Integer; sourChar, destChar: USystemChar; begin Result := False; i := 1; L_ := t.Len; if (Offset + L_) > Len then Exit; while i <= L_ do begin sourChar := GetChars(Offset + i - 1); destChar := t[i]; if UCharIn(sourChar, ucLoAtoZ) then dec(sourChar, 32); if UCharIn(destChar, ucLoAtoZ) then dec(destChar, 32); if sourChar <> destChar then Exit; inc(i); end; Result := True; end; function TUPascalString.GetPos(const s: TUPascalString; const Offset: Integer = 1): Integer; var i: Integer; begin Result := 0; if s.Len > 0 then for i := Offset to Len - s.Len + 1 do if ComparePos(i, @s) then Exit(i); end; function TUPascalString.GetPos(const s: PUPascalString; const Offset: Integer = 1): Integer; var i: Integer; begin Result := 0; if s^.Len > 0 then for i := Offset to Len - s^.Len + 1 do if ComparePos(i, s) then Exit(i); end; function TUPascalString.Exists(c: USystemChar): Boolean; var i: Integer; begin for i := low(buff) to high(buff) do if buff[i] = c then Exit(True); Result := False; end; function TUPascalString.Exists(c: array of USystemChar): Boolean; var i: Integer; begin for i := low(buff) to high(buff) do if UCharIn(buff[i], c) then Exit(True); Result := False; end; function TUPascalString.Exists(const s: TUPascalString): Boolean; begin Result := GetPos(@s, 1) > 0; end; function TUPascalString.hash: TUHash; begin Result := UFastHashPPascalString(@Self); end; function TUPascalString.Hash64: TUHash64; begin Result := UFastHash64PPascalString(@Self); end; function TUPascalString.GetCharCount(c: USystemChar): Integer; var i: Integer; begin Result := 0; for i := low(buff) to high(buff) do if UCharIn(buff[i], c) then inc(Result); end; procedure TUPascalString.DeleteLast; begin if Len > 0 then SetLength(buff, length(buff) - 1); end; procedure TUPascalString.DeleteFirst; begin if Len > 0 then buff := System.Copy(buff, 1, Len); end; procedure TUPascalString.Delete(idx, cnt: Integer); begin if (idx + cnt <= Len) then Text := GetString(1, idx) + GetString(idx + cnt, Len + 1) else Text := GetString(1, idx); end; procedure TUPascalString.Clear; begin SetLength(buff, 0); end; procedure TUPascalString.Append(t: TUPascalString); var r, L_: Integer; begin L_ := length(t.buff); if L_ > 0 then begin r := length(buff); SetLength(buff, r + L_); CopyPtr(@t.buff[0], @buff[r], L_ * USystemCharSize); end; end; procedure TUPascalString.Append(c: USystemChar); begin SetLength(buff, length(buff) + 1); buff[length(buff) - 1] := c; end; procedure TUPascalString.Append(const Fmt: SystemString; const Args: array of const); begin Append(PFormat(Fmt, Args)); end; function TUPascalString.GetString(bPos, ePos: NativeInt): TUPascalString; begin if ePos > length(buff) then Result := Self.Copy(bPos, length(buff) - bPos + 1) else Result := Self.Copy(bPos, (ePos - bPos)); end; procedure TUPascalString.Insert(AText: USystemString; idx: Integer); begin Text := GetString(1, idx) + AText + GetString(idx + 1, Len); end; procedure TUPascalString.FastAsText(var output: USystemString); begin SetLength(output, length(buff)); if length(buff) > 0 then CopyPtr(@buff[0], @output[UFirstCharPos], length(buff) * USystemCharSize); end; procedure TUPascalString.FastGetBytes(var output: TBytes); begin {$IFDEF FPC} output := SysUtils.TEncoding.UTF8.GetBytes(buff); {$ELSE} output := SysUtils.TEncoding.UTF8.GetBytes(buff); {$ENDIF} end; function TUPascalString.LowerText: USystemString; begin Result := LowerCase(Text); end; function TUPascalString.UpperText: USystemString; begin Result := UpperCase(Text); end; function TUPascalString.Invert: TUPascalString; var i, j: Integer; begin SetLength(Result.buff, length(buff)); j := low(Result.buff); for i := high(buff) downto low(buff) do begin Result.buff[j] := buff[i]; inc(j); end; end; function TUPascalString.TrimChar(const Chars: TUPascalString): TUPascalString; var L_, bp, EP: Integer; begin Result := ''; L_ := Len; if L_ > 0 then begin bp := 1; while UCharIn(GetChars(bp), @Chars) do begin inc(bp); if (bp > L_) then begin Result := ''; Exit; end; end; if bp > L_ then Result := '' else begin EP := L_; while UCharIn(GetChars(EP), @Chars) do begin dec(EP); if (EP < 1) then begin Result := ''; Exit; end; end; Result := GetString(bp, EP + 1); end; end; end; function TUPascalString.DeleteChar(const Chars: TUPascalString): TUPascalString; var c: USystemChar; begin Result := ''; for c in buff do if not UCharIn(c, @Chars) then Result.Append(c); end; function TUPascalString.DeleteChar(const Chars: TUOrdChars): TUPascalString; var c: USystemChar; begin Result := ''; for c in buff do if not UCharIn(c, Chars) then Result.Append(c); end; function TUPascalString.ReplaceChar(const Chars: TUPascalString; const newChar: USystemChar): TUPascalString; var i: Integer; begin Result.Len := Len; for i := low(buff) to high(buff) do if UCharIn(buff[i], Chars) then Result.buff[i] := newChar else Result.buff[i] := buff[i]; end; function TUPascalString.ReplaceChar(const Chars, newChar: USystemChar): TUPascalString; var i: Integer; begin Result.Len := Len; for i := low(buff) to high(buff) do if UCharIn(buff[i], Chars) then Result.buff[i] := newChar else Result.buff[i] := buff[i]; end; function TUPascalString.ReplaceChar(const Chars: TUOrdChars; const newChar: USystemChar): TUPascalString; var i: Integer; begin Result.Len := Len; for i := low(buff) to high(buff) do if UCharIn(buff[i], Chars) then Result.buff[i] := newChar else Result.buff[i] := buff[i]; end; function TUPascalString.BuildPlatformPChar: Pointer; type TAnsiChar_Buff = array [0 .. MaxInt - 1] of Byte; PAnsiChar_Buff = ^TAnsiChar_Buff; var swap_buff: TBytes; buff_P: PAnsiChar_Buff; begin swap_buff := PlatformBytes; buff_P := GetMemory(length(swap_buff) + 1); CopyPtr(@swap_buff[0], buff_P, length(swap_buff)); buff_P^[length(swap_buff)] := 0; SetLength(swap_buff, 0); Result := buff_P; end; class procedure TUPascalString.FreePlatformPChar(p: Pointer); begin FreeMemory(p); end; class function TUPascalString.RandomString(L_: Integer): TUPascalString; var i: Integer; rnd: TMT19937Random; begin Result.L := L_; rnd := TMT19937Random.Create; for i := 1 to L_ do Result[i] := USystemChar(rnd.Rand32($7E - $20) + $20); DisposeObject(rnd); end; function TUPascalString.SmithWaterman(const p: PUPascalString): Double; begin Result := USmithWatermanCompare(@Self, @p); end; function TUPascalString.SmithWaterman(const s: TUPascalString): Double; begin Result := USmithWatermanCompare(@Self, @s); end; function TUPascalString.BOMBytes: TBytes; begin {$IFDEF FPC} Result := GetBytes; {$ELSE} Result := SysUtils.TEncoding.UTF8.GetPreamble + GetBytes; {$ENDIF} end; end.