912 lines
26 KiB
ObjectPascal
912 lines
26 KiB
ObjectPascal
{******************************************************************************}
|
|
{ }
|
|
{ Library: Fundamentals 5.00 }
|
|
{ File name: flcCharSet.pas }
|
|
{ File version: 5.04 }
|
|
{ Description: Character/Byte set functions. }
|
|
{ }
|
|
{ Copyright: Copyright (c) 2000-2020, David J Butler }
|
|
{ All rights reserved. }
|
|
{ Redistribution and use in source and binary forms, with }
|
|
{ or without modification, are permitted provided that }
|
|
{ the following conditions are met: }
|
|
{ Redistributions of source code must retain the above }
|
|
{ copyright notice, this list of conditions and the }
|
|
{ following disclaimer. }
|
|
{ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
|
|
{ CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
|
|
{ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
|
|
{ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
|
|
{ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
|
|
{ THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
|
|
{ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
|
|
{ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
|
|
{ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
|
|
{ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
|
|
{ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
|
|
{ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
|
|
{ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
|
|
{ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
|
|
{ POSSIBILITY OF SUCH DAMAGE. }
|
|
{ }
|
|
{ Github: https://github.com/fundamentalslib }
|
|
{ E-mail: fundamentals.library at gmail.com }
|
|
{ }
|
|
{ Revision history: }
|
|
{ }
|
|
{ 2000/02/02 0.01 Initial version. }
|
|
{ 2017/10/07 5.02 Moved functions from unit flcUtils. }
|
|
{ 2018/08/11 5.03 Moved functions from unit flcStrings. }
|
|
{ 2018/08/14 5.04 ByteChar changes. }
|
|
{ }
|
|
{ Supported compilers: }
|
|
{ }
|
|
{ Delphi 10 Win32 5.02 2016/01/09 }
|
|
{ }
|
|
{******************************************************************************}
|
|
|
|
{$INCLUDE ..\flcInclude.inc}
|
|
|
|
{$IFDEF FREEPASCAL}
|
|
{$WARNINGS OFF}
|
|
{$HINTS OFF}
|
|
{$ENDIF}
|
|
|
|
{$IFDEF DEBUG}
|
|
{$IFDEF TEST}
|
|
{$DEFINE CHARSET_TEST}
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
|
|
unit flcCharSet;
|
|
|
|
interface
|
|
|
|
uses
|
|
{ Fundamentals }
|
|
flcStdTypes;
|
|
|
|
|
|
|
|
{ }
|
|
{ Sets }
|
|
{ Operations on byte and character sets. }
|
|
{ }
|
|
const
|
|
CompleteByteCharSet = [ByteChar(#0)..ByteChar(#255)];
|
|
CompleteByteSet = [0..255];
|
|
|
|
function AsAnsiCharSet(const C: array of ByteChar): ByteCharSet;
|
|
function AsByteSet(const C: array of Byte): ByteSet;
|
|
function AsByteCharSet(const C: array of ByteChar): ByteCharSet;
|
|
procedure ComplementChar(var C: ByteCharSet; const Ch: ByteChar);
|
|
procedure ClearCharSet(var C: ByteCharSet);
|
|
procedure FillCharSet(var C: ByteCharSet);
|
|
procedure ComplementCharSet(var C: ByteCharSet);
|
|
procedure AssignCharSet(var DestSet: ByteCharSet; const SourceSet: ByteCharSet); overload;
|
|
procedure Union(var DestSet: ByteCharSet; const SourceSet: ByteCharSet); overload;
|
|
procedure Difference(var DestSet: ByteCharSet; const SourceSet: ByteCharSet); overload;
|
|
procedure Intersection(var DestSet: ByteCharSet; const SourceSet: ByteCharSet); overload;
|
|
procedure XORCharSet(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
function IsSubSet(const A, B: ByteCharSet): Boolean;
|
|
function IsEqual(const A, B: ByteCharSet): Boolean; overload;
|
|
function IsEmpty(const C: ByteCharSet): Boolean;
|
|
function IsComplete(const C: ByteCharSet): Boolean;
|
|
function CharCount(const C: ByteCharSet): Integer; overload;
|
|
procedure ConvertCaseInsensitive(var C: ByteCharSet);
|
|
function CaseInsensitiveCharSet(const C: ByteCharSet): ByteCharSet;
|
|
function CharSetToStrB(const C: ByteCharSet): RawByteString;
|
|
function StrToCharSetB(const S: RawByteString): ByteCharSet;
|
|
|
|
|
|
|
|
{ }
|
|
{ Character class strings }
|
|
{ }
|
|
{ Perl-like character class string representation of character sets, eg }
|
|
{ the set ['0', 'A'..'Z'] is presented as [0A-Z]. Negated classes are also }
|
|
{ supported, eg '[^A-Za-z]' is all non-alpha characters. The empty and }
|
|
{ complete sets have special representations; '[]' and '.' respectively. }
|
|
{ }
|
|
{$IFDEF SupportAnsiString}
|
|
function CharSetToCharClassStr(const C: ByteCharSet): AnsiString;
|
|
{$ENDIF}
|
|
// function CharClassStrToCharSet(const S: AnsiString): CharSet;
|
|
|
|
|
|
|
|
{ }
|
|
{ Tests }
|
|
{ }
|
|
{$IFDEF CHARSET_TEST}
|
|
procedure Test;
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
implementation
|
|
|
|
uses
|
|
{ Fundamentals }
|
|
flcASCII,
|
|
flcUtils;
|
|
|
|
|
|
|
|
{ }
|
|
{ Sets }
|
|
{ }
|
|
function AsAnsiCharSet(const C: array of ByteChar): ByteCharSet;
|
|
var I: Integer;
|
|
begin
|
|
Result := [];
|
|
for I := 0 to High(C) do
|
|
Include(Result, C[I]);
|
|
end;
|
|
|
|
function AsByteSet(const C: array of Byte): ByteSet;
|
|
var I: Integer;
|
|
begin
|
|
Result := [];
|
|
for I := 0 to High(C) do
|
|
Include(Result, C[I]);
|
|
end;
|
|
|
|
function AsByteCharSet(const C: array of ByteChar): ByteCharSet;
|
|
var I: Integer;
|
|
begin
|
|
Result := [];
|
|
for I := 0 to High(C) do
|
|
Include(Result, C[I]);
|
|
end;
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure ComplementChar(var C: ByteCharSet; const Ch: ByteChar);
|
|
asm
|
|
MOVZX ECX, DL
|
|
BTC [EAX], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure ComplementChar(var C: ByteCharSet; const Ch: ByteChar);
|
|
begin
|
|
if Ch in C then
|
|
Exclude(C, Ch)
|
|
else
|
|
Include(C, Ch);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure ClearCharSet(var C: ByteCharSet);
|
|
asm
|
|
XOR EDX, EDX
|
|
MOV [EAX], EDX
|
|
MOV [EAX + 4], EDX
|
|
MOV [EAX + 8], EDX
|
|
MOV [EAX + 12], EDX
|
|
MOV [EAX + 16], EDX
|
|
MOV [EAX + 20], EDX
|
|
MOV [EAX + 24], EDX
|
|
MOV [EAX + 28], EDX
|
|
end;
|
|
{$ELSE}
|
|
procedure ClearCharSet(var C: ByteCharSet);
|
|
begin
|
|
C := [];
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure FillCharSet(var C: ByteCharSet);
|
|
asm
|
|
MOV EDX, $FFFFFFFF
|
|
MOV [EAX], EDX
|
|
MOV [EAX + 4], EDX
|
|
MOV [EAX + 8], EDX
|
|
MOV [EAX + 12], EDX
|
|
MOV [EAX + 16], EDX
|
|
MOV [EAX + 20], EDX
|
|
MOV [EAX + 24], EDX
|
|
MOV [EAX + 28], EDX
|
|
end;
|
|
{$ELSE}
|
|
procedure FillCharSet(var C: ByteCharSet);
|
|
begin
|
|
C := [ByteChar(0)..ByteChar(255)];
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure ComplementCharSet(var C: ByteCharSet);
|
|
asm
|
|
NOT DWORD PTR [EAX]
|
|
NOT DWORD PTR [EAX + 4]
|
|
NOT DWORD PTR [EAX + 8]
|
|
NOT DWORD PTR [EAX + 12]
|
|
NOT DWORD PTR [EAX + 16]
|
|
NOT DWORD PTR [EAX + 20]
|
|
NOT DWORD PTR [EAX + 24]
|
|
NOT DWORD PTR [EAX + 28]
|
|
end;
|
|
{$ELSE}
|
|
procedure ComplementCharSet(var C: ByteCharSet);
|
|
begin
|
|
C := [ByteChar(0)..ByteChar(255)] - C;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure AssignCharSet(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
asm
|
|
MOV ECX, [EDX]
|
|
MOV [EAX], ECX
|
|
MOV ECX, [EDX + 4]
|
|
MOV [EAX + 4], ECX
|
|
MOV ECX, [EDX + 8]
|
|
MOV [EAX + 8], ECX
|
|
MOV ECX, [EDX + 12]
|
|
MOV [EAX + 12], ECX
|
|
MOV ECX, [EDX + 16]
|
|
MOV [EAX + 16], ECX
|
|
MOV ECX, [EDX + 20]
|
|
MOV [EAX + 20], ECX
|
|
MOV ECX, [EDX + 24]
|
|
MOV [EAX + 24], ECX
|
|
MOV ECX, [EDX + 28]
|
|
MOV [EAX + 28], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure AssignCharSet(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
begin
|
|
DestSet := SourceSet;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure Union(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
asm
|
|
MOV ECX, [EDX]
|
|
OR [EAX], ECX
|
|
MOV ECX, [EDX + 4]
|
|
OR [EAX + 4], ECX
|
|
MOV ECX, [EDX + 8]
|
|
OR [EAX + 8], ECX
|
|
MOV ECX, [EDX + 12]
|
|
OR [EAX + 12], ECX
|
|
MOV ECX, [EDX + 16]
|
|
OR [EAX + 16], ECX
|
|
MOV ECX, [EDX + 20]
|
|
OR [EAX + 20], ECX
|
|
MOV ECX, [EDX + 24]
|
|
OR [EAX + 24], ECX
|
|
MOV ECX, [EDX + 28]
|
|
OR [EAX + 28], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure Union(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
begin
|
|
DestSet := DestSet + SourceSet;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure Difference(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
asm
|
|
MOV ECX, [EDX]
|
|
NOT ECX
|
|
AND [EAX], ECX
|
|
MOV ECX, [EDX + 4]
|
|
NOT ECX
|
|
AND [EAX + 4], ECX
|
|
MOV ECX, [EDX + 8]
|
|
NOT ECX
|
|
AND [EAX + 8],ECX
|
|
MOV ECX, [EDX + 12]
|
|
NOT ECX
|
|
AND [EAX + 12], ECX
|
|
MOV ECX, [EDX + 16]
|
|
NOT ECX
|
|
AND [EAX + 16], ECX
|
|
MOV ECX, [EDX + 20]
|
|
NOT ECX
|
|
AND [EAX + 20], ECX
|
|
MOV ECX, [EDX + 24]
|
|
NOT ECX
|
|
AND [EAX + 24], ECX
|
|
MOV ECX, [EDX + 28]
|
|
NOT ECX
|
|
AND [EAX + 28], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure Difference(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
begin
|
|
DestSet := DestSet - SourceSet;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure Intersection(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
asm
|
|
MOV ECX, [EDX]
|
|
AND [EAX], ECX
|
|
MOV ECX, [EDX + 4]
|
|
AND [EAX + 4], ECX
|
|
MOV ECX, [EDX + 8]
|
|
AND [EAX + 8], ECX
|
|
MOV ECX, [EDX + 12]
|
|
AND [EAX + 12], ECX
|
|
MOV ECX, [EDX + 16]
|
|
AND [EAX + 16], ECX
|
|
MOV ECX, [EDX + 20]
|
|
AND [EAX + 20], ECX
|
|
MOV ECX, [EDX + 24]
|
|
AND [EAX + 24], ECX
|
|
MOV ECX, [EDX + 28]
|
|
AND [EAX + 28], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure Intersection(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
begin
|
|
DestSet := DestSet * SourceSet;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure XORCharSet(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
asm
|
|
MOV ECX, [EDX]
|
|
XOR [EAX], ECX
|
|
MOV ECX, [EDX + 4]
|
|
XOR [EAX + 4], ECX
|
|
MOV ECX, [EDX + 8]
|
|
XOR [EAX + 8], ECX
|
|
MOV ECX, [EDX + 12]
|
|
XOR [EAX + 12], ECX
|
|
MOV ECX, [EDX + 16]
|
|
XOR [EAX + 16], ECX
|
|
MOV ECX, [EDX + 20]
|
|
XOR [EAX + 20], ECX
|
|
MOV ECX, [EDX + 24]
|
|
XOR [EAX + 24], ECX
|
|
MOV ECX, [EDX + 28]
|
|
XOR [EAX + 28], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure XORCharSet(var DestSet: ByteCharSet; const SourceSet: ByteCharSet);
|
|
var Ch: ByteChar;
|
|
begin
|
|
for Ch := ByteChar(0) to ByteChar(255) do
|
|
if Ch in DestSet then
|
|
begin
|
|
if Ch in SourceSet then
|
|
Exclude(DestSet, Ch);
|
|
end else
|
|
if Ch in SourceSet then
|
|
Include(DestSet, Ch);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
function IsSubSet(const A, B: ByteCharSet): Boolean;
|
|
asm
|
|
MOV ECX, [EDX]
|
|
NOT ECX
|
|
AND ECX, [EAX]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 4]
|
|
NOT ECX
|
|
AND ECX, [EAX + 4]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 8]
|
|
NOT ECX
|
|
AND ECX, [EAX + 8]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 12]
|
|
NOT ECX
|
|
AND ECX, [EAX + 12]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 16]
|
|
NOT ECX
|
|
AND ECX, [EAX + 16]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 20]
|
|
NOT ECX
|
|
AND ECX, [EAX + 20]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 24]
|
|
NOT ECX
|
|
AND ECX, [EAX + 24]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 28]
|
|
NOT ECX
|
|
AND ECX, [EAX + 28]
|
|
JNE @Fin0
|
|
MOV EAX, 1
|
|
RET
|
|
@Fin0:
|
|
XOR EAX, EAX
|
|
end;
|
|
{$ELSE}
|
|
function IsSubSet(const A, B: ByteCharSet): Boolean;
|
|
begin
|
|
Result := A <= B;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
function IsEqual(const A, B: ByteCharSet): Boolean;
|
|
asm
|
|
MOV ECX, [EDX]
|
|
XOR ECX, [EAX]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 4]
|
|
XOR ECX, [EAX + 4]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 8]
|
|
XOR ECX, [EAX + 8]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 12]
|
|
XOR ECX, [EAX + 12]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 16]
|
|
XOR ECX, [EAX + 16]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 20]
|
|
XOR ECX, [EAX + 20]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 24]
|
|
XOR ECX, [EAX + 24]
|
|
JNE @Fin0
|
|
MOV ECX, [EDX + 28]
|
|
XOR ECX, [EAX + 28]
|
|
JNE @Fin0
|
|
MOV EAX, 1
|
|
RET
|
|
@Fin0:
|
|
XOR EAX, EAX
|
|
end;
|
|
{$ELSE}
|
|
function IsEqual(const A, B: ByteCharSet): Boolean;
|
|
begin
|
|
Result := A = B;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
function IsEmpty(const C: ByteCharSet): Boolean;
|
|
asm
|
|
MOV EDX, [EAX]
|
|
OR EDX, [EAX + 4]
|
|
OR EDX, [EAX + 8]
|
|
OR EDX, [EAX + 12]
|
|
OR EDX, [EAX + 16]
|
|
OR EDX, [EAX + 20]
|
|
OR EDX, [EAX + 24]
|
|
OR EDX, [EAX + 28]
|
|
JNE @Fin0
|
|
MOV EAX, 1
|
|
RET
|
|
@Fin0:
|
|
XOR EAX,EAX
|
|
end;
|
|
{$ELSE}
|
|
function IsEmpty(const C: ByteCharSet): Boolean;
|
|
begin
|
|
Result := C = [];
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
function IsComplete(const C: ByteCharSet): Boolean;
|
|
asm
|
|
MOV EDX, [EAX]
|
|
AND EDX, [EAX + 4]
|
|
AND EDX, [EAX + 8]
|
|
AND EDX, [EAX + 12]
|
|
AND EDX, [EAX + 16]
|
|
AND EDX, [EAX + 20]
|
|
AND EDX, [EAX + 24]
|
|
AND EDX, [EAX + 28]
|
|
CMP EDX, $FFFFFFFF
|
|
JNE @Fin0
|
|
MOV EAX, 1
|
|
RET
|
|
@Fin0:
|
|
XOR EAX, EAX
|
|
end;
|
|
{$ELSE}
|
|
function IsComplete(const C: ByteCharSet): Boolean;
|
|
begin
|
|
Result := C = CompleteByteCharSet;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF __ASM386_DELPHI}
|
|
function CharCount(const C: ByteCharSet): Integer;
|
|
asm
|
|
PUSH EBX
|
|
PUSH ESI
|
|
MOV EBX, EAX
|
|
XOR ESI, ESI
|
|
MOV EAX, [EBX]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 4]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 8]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 12]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 16]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 20]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 24]
|
|
CALL BitCount32
|
|
ADD ESI, EAX
|
|
MOV EAX, [EBX + 28]
|
|
CALL BitCount32
|
|
ADD EAX, ESI
|
|
POP ESI
|
|
POP EBX
|
|
end;
|
|
{$ELSE}
|
|
function CharCount(const C: ByteCharSet): Integer;
|
|
var I : ByteChar;
|
|
begin
|
|
Result := 0;
|
|
for I := ByteChar(0) to ByteChar(255) do
|
|
if I in C then
|
|
Inc(Result);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
procedure ConvertCaseInsensitive(var C: ByteCharSet);
|
|
asm
|
|
MOV ECX, [EAX + 12]
|
|
AND ECX, $3FFFFFF
|
|
OR [EAX + 8], ECX
|
|
MOV ECX, [EAX + 8]
|
|
AND ECX, $3FFFFFF
|
|
OR [EAX + 12], ECX
|
|
end;
|
|
{$ELSE}
|
|
procedure ConvertCaseInsensitive(var C: ByteCharSet);
|
|
var Ch : ByteChar;
|
|
begin
|
|
for Ch := ByteChar(Ord('A')) to ByteChar(Ord('Z')) do
|
|
if Ch in C then
|
|
Include(C, ByteChar(Ord(Ch) + 32));
|
|
for Ch := ByteChar(Ord('a')) to ByteChar(Ord('z')) do
|
|
if Ch in C then
|
|
Include(C, ByteChar(Ord(Ch) - 32));
|
|
end;
|
|
{$ENDIF}
|
|
|
|
function CaseInsensitiveCharSet(const C: ByteCharSet): ByteCharSet;
|
|
begin
|
|
AssignCharSet(Result, C);
|
|
ConvertCaseInsensitive(Result);
|
|
end;
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
function CharSetToStrB(const C: ByteCharSet): RawByteString; // Andrew N. Driazgov
|
|
asm
|
|
PUSH EBX
|
|
MOV ECX, $100
|
|
MOV EBX, EAX
|
|
PUSH ESI
|
|
MOV EAX, EDX
|
|
SUB ESP, ECX
|
|
XOR ESI, ESI
|
|
XOR EDX, EDX
|
|
@@lp: BT [EBX], EDX
|
|
JC @@mm
|
|
@@nx: INC EDX
|
|
DEC ECX
|
|
JNE @@lp
|
|
MOV ECX, ESI
|
|
MOV EDX, ESP
|
|
CALL System.@LStrFromPCharLen
|
|
ADD ESP, $100
|
|
POP ESI
|
|
POP EBX
|
|
RET
|
|
@@mm: MOV [ESP + ESI], DL
|
|
INC ESI
|
|
JMP @@nx
|
|
end;
|
|
{$ELSE}
|
|
function CharSetToStrB(const C: ByteCharSet): RawByteString;
|
|
// Implemented recursively to avoid multiple memory allocations
|
|
procedure CharMatch(const Start: ByteChar; const Count: Integer);
|
|
var Ch : ByteChar;
|
|
begin
|
|
for Ch := Start to ByteChar(255) do
|
|
if Ch in C then
|
|
begin
|
|
if Ch = ByteChar(255) then
|
|
SetLength(Result, Count + 1)
|
|
else
|
|
CharMatch(ByteChar(Byte(Ch) + 1), Count + 1);
|
|
Result[Count + 1] := Ch;
|
|
exit;
|
|
end;
|
|
SetLength(Result, Count);
|
|
end;
|
|
begin
|
|
CharMatch(ByteChar(0), 0);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
{$IFDEF ASM386_DELPHI}
|
|
function StrToCharSetB(const S: RawByteString): ByteCharSet; // Andrew N. Driazgov
|
|
asm
|
|
XOR ECX, ECX
|
|
MOV [EDX], ECX
|
|
MOV [EDX + 4], ECX
|
|
MOV [EDX + 8], ECX
|
|
MOV [EDX + 12], ECX
|
|
MOV [EDX + 16], ECX
|
|
MOV [EDX + 20], ECX
|
|
MOV [EDX + 24], ECX
|
|
MOV [EDX + 28], ECX
|
|
TEST EAX, EAX
|
|
JE @@qt
|
|
MOV ECX, [EAX - 4]
|
|
PUSH EBX
|
|
SUB ECX, 8
|
|
JS @@nx
|
|
@@lp: MOVZX EBX, BYTE PTR [EAX]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 1]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 2]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 3]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 4]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 5]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 6]
|
|
BTS [EDX], EBX
|
|
MOVZX EBX, BYTE PTR [EAX + 7]
|
|
BTS [EDX], EBX
|
|
ADD EAX, 8
|
|
SUB ECX, 8
|
|
JNS @@lp
|
|
@@nx: JMP DWORD PTR @@tV[ECX * 4 + 32]
|
|
@@tV: DD @@ex, @@t1, @@t2, @@t3
|
|
DD @@t4, @@t5, @@t6, @@t7
|
|
@@t7: MOVZX EBX, BYTE PTR [EAX + 6]
|
|
BTS [EDX], EBX
|
|
@@t6: MOVZX EBX, BYTE PTR [EAX + 5]
|
|
BTS [EDX], EBX
|
|
@@t5: MOVZX EBX, BYTE PTR [EAX + 4]
|
|
BTS [EDX], EBX
|
|
@@t4: MOVZX EBX, BYTE PTR [EAX + 3]
|
|
BTS [EDX], EBX
|
|
@@t3: MOVZX EBX, BYTE PTR [EAX + 2]
|
|
BTS [EDX], EBX
|
|
@@t2: MOVZX EBX, BYTE PTR [EAX + 1]
|
|
BTS [EDX], EBX
|
|
@@t1: MOVZX EBX, BYTE PTR [EAX]
|
|
BTS [EDX], EBX
|
|
@@ex: POP EBX
|
|
@@qt:
|
|
end;
|
|
{$ELSE}
|
|
function StrToCharSetB(const S: RawByteString): ByteCharSet;
|
|
var I : Integer;
|
|
begin
|
|
ClearCharSet(Result);
|
|
for I := 1 to Length(S) do
|
|
Include(Result, S[I]);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
{ }
|
|
{ Character class strings }
|
|
{ }
|
|
{$IFDEF SupportAnsiString}
|
|
function CharSetToCharClassStr(const C: ByteCharSet): AnsiString;
|
|
|
|
function ChStr(const Ch: ByteChar): AnsiString;
|
|
begin
|
|
case Ch of
|
|
'\' : Result := '\\';
|
|
']' : Result := '\]';
|
|
AsciiBEL : Result := '\a';
|
|
AsciiBS : Result := '\b';
|
|
AsciiESC : Result := '\e';
|
|
AsciiFF : Result := '\f';
|
|
AsciiLF : Result := '\n';
|
|
AsciiCR : Result := '\r';
|
|
AsciiHT : Result := '\t';
|
|
AsciiVT : Result := '\v';
|
|
else if (Ch < #32) or (Ch > #127) then // non-printable
|
|
Result := '\x' + Word32ToHexA(Ord(Ch), 1) else
|
|
Result := Ch;
|
|
end;
|
|
end;
|
|
|
|
function SeqStr(const SeqStart, SeqEnd: ByteChar): AnsiString;
|
|
begin
|
|
Result := ChStr(SeqStart);
|
|
if Ord(SeqEnd) = Ord(SeqStart) + 1 then
|
|
Result := Result + ChStr(SeqEnd) else // consequetive chars
|
|
if SeqEnd > SeqStart then // range
|
|
Result := Result + '-' + ChStr(SeqEnd);
|
|
end;
|
|
|
|
var CS : ByteCharSet;
|
|
F : ByteChar;
|
|
SeqStart : ByteChar;
|
|
Seq : Boolean;
|
|
|
|
begin
|
|
if IsComplete(C) then
|
|
Result := '.' else
|
|
if IsEmpty(C) then
|
|
Result := '[]' else
|
|
begin
|
|
Result := '[';
|
|
CS := C;
|
|
if (ByteChar(#0) in C) and (ByteChar(#255) in C) then
|
|
begin
|
|
ComplementCharSet(CS);
|
|
Result := Result + '^';
|
|
end;
|
|
Seq := False;
|
|
SeqStart := #0;
|
|
for F := #0 to #255 do
|
|
if F in CS then
|
|
begin
|
|
if not Seq then
|
|
begin
|
|
SeqStart := F;
|
|
Seq := True;
|
|
end;
|
|
end else
|
|
if Seq then
|
|
begin
|
|
Result := Result + SeqStr(SeqStart, ByteChar(Ord(F) - 1));
|
|
Seq := False;
|
|
end;
|
|
if Seq then
|
|
Result := Result + SeqStr(SeqStart, #255);
|
|
Result := Result + ']';
|
|
end;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
(*
|
|
function CharClassStrToCharSet(const S: AnsiString): CharSet;
|
|
var I, L : Integer;
|
|
|
|
function DecodeChar: ByteChar;
|
|
var J : Integer;
|
|
begin
|
|
if S[I] = '\' then
|
|
if I + 1 = L then
|
|
begin
|
|
Inc(I);
|
|
Result := '\';
|
|
end else
|
|
if not MatchQuantSeqB(J, [['x'], csHexDigit, csHexDigit],
|
|
[mqOnce, mqOnce, mqOptional], S, [moDeterministic], I + 1) then
|
|
begin
|
|
case S[I + 1] of
|
|
'0' : Result := AsciiNULL;
|
|
'a' : Result := AsciiBEL;
|
|
'b' : Result := AsciiBS;
|
|
'e' : Result := AsciiESC;
|
|
'f' : Result := AsciiFF;
|
|
'n' : Result := AsciiLF;
|
|
'r' : Result := AsciiCR;
|
|
't' : Result := AsciiHT;
|
|
'v' : Result := AsciiVT;
|
|
else Result := S[I + 1];
|
|
end;
|
|
Inc(I, 2);
|
|
end else
|
|
begin
|
|
if J = I + 2 then
|
|
Result := ByteChar(HexByteCharToInt(S[J])) else
|
|
Result := ByteChar(HexByteCharToInt(S[J - 1]) * 16 + HexByteCharToInt(S[J]));
|
|
I := J + 1;
|
|
end
|
|
else
|
|
begin
|
|
Result := S[I];
|
|
Inc(I);
|
|
end;
|
|
end;
|
|
|
|
var Neg : Boolean;
|
|
A, B : ByteChar;
|
|
begin
|
|
L := Length(S);
|
|
if (L = 0) or (S = '[]') then
|
|
Result := [] else
|
|
if L = 1 then
|
|
if S[1] in ['.', '*', '?'] then
|
|
Result := CompleteCharSet else
|
|
Result := [S[1]] else
|
|
if (S[1] <> '[') or (S[L] <> ']') then
|
|
raise EConvertError.Create('Invalid character class string')
|
|
else
|
|
begin
|
|
Neg := S[2] = '^';
|
|
I := iif(Neg, 3, 2);
|
|
Result := [];
|
|
while I < L do
|
|
begin
|
|
A := DecodeChar;
|
|
if (I + 1 < L) and (S[I] = '-') then
|
|
begin
|
|
Inc(I);
|
|
B := DecodeChar;
|
|
Result := Result + [A..B];
|
|
end else
|
|
Include(Result, A);
|
|
end;
|
|
if Neg then
|
|
ComplementCharSet(Result);
|
|
end;
|
|
end;
|
|
*)
|
|
|
|
|
|
|
|
{$IFDEF CHARSET_TEST}
|
|
procedure Test;
|
|
begin
|
|
// ByteCharSet
|
|
Assert(CharCount([]) = 0, 'CharCount');
|
|
Assert(CharCount([ByteChar(Ord('a'))..ByteChar(Ord('z'))]) = 26, 'CharCount');
|
|
Assert(CharCount([ByteChar(0), ByteChar(255)]) = 2, 'CharCount');
|
|
|
|
// CharClassStr
|
|
{$IFDEF SupportAnsiString}
|
|
Assert(CharSetToCharClassStr(['a'..'z']) = '[a-z]', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr(CompleteByteCharSet) = '.', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr([#0..#31]) = '[\x0-\x1F]', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr([#0..#32]) = '[\x0- ]', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr(CompleteByteCharSet - ['a']) = '[^a]', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr(CompleteByteCharSet - ['a'..'z']) = '[^a-z]', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr(['a'..'b']) = '[ab]', 'CharClassStr');
|
|
Assert(CharSetToCharClassStr([]) = '[]', 'CharClassStr');
|
|
{$ENDIF}
|
|
(*
|
|
Assert(CharClassStrToCharSet('[a]') = ['a'], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[]') = [], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('.') = CompleteCharSet, 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('') = [], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[a-z]') = ['a'..'z'], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[^a-z]') = CompleteCharSet - ['a'..'z'], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[-]') = ['-'], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[a-]') = ['a', '-'], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[\x5]') = [#$5], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[\x1f]') = [#$1f], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[\x10-]') = [#$10, '-'], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[\x10-\x1f]') = [#$10..#$1f], 'CharClassStr');
|
|
Assert(CharClassStrToCharSet('[\x10-\xf]') = [], 'CharClassStr');
|
|
*)
|
|
end;
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
end.
|
|
|