1015 lines
28 KiB
ObjectPascal
1015 lines
28 KiB
ObjectPascal
{******************************************************************************}
|
|
{ }
|
|
{ Library: Fundamentals 5.00 }
|
|
{ File name: flcUnicodeReader.pas }
|
|
{ File version: 5.09 }
|
|
{ Description: Unicode reader class }
|
|
{ }
|
|
{ Copyright: Copyright (c) 2002-2020, David J Butler }
|
|
{ All rights reserved. }
|
|
{ Redistribution and use in source and binary forms, with }
|
|
{ or without modification, are permitted provided that }
|
|
{ the following conditions are met: }
|
|
{ Redistributions of source code must retain the above }
|
|
{ copyright notice, this list of conditions and the }
|
|
{ following disclaimer. }
|
|
{ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
|
|
{ CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
|
|
{ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
|
|
{ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
|
|
{ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
|
|
{ THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
|
|
{ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
|
|
{ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
|
|
{ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
|
|
{ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
|
|
{ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
|
|
{ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
|
|
{ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
|
|
{ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
|
|
{ POSSIBILITY OF SUCH DAMAGE. }
|
|
{ }
|
|
{ Github: https://github.com/fundamentalslib }
|
|
{ E-mail: fundamentals.library at gmail.com }
|
|
{ }
|
|
{ Revision history: }
|
|
{ }
|
|
{ 2002/04/19 0.01 Initial version. }
|
|
{ 2002/10/28 3.02 Refactored for Fundamentals 3. }
|
|
{ 2002/10/29 3.03 Bug fixes and improvements. }
|
|
{ 2002/11/05 3.04 Improved buffer handling. }
|
|
{ 2004/01/02 3.05 Changed reader's block size to 64K as suggested by Eb. }
|
|
{ 2005/08/27 4.06 Revised for Fundamentals 4. }
|
|
{ 2011/10/16 4.07 Changes for Unicode Delphi. }
|
|
{ 2015/04/01 4.08 Revision. }
|
|
{ 2016/01/17 5.09 Revised for Fundamentals 5. }
|
|
{ }
|
|
{ Supported compilers: }
|
|
{ }
|
|
{ Delphi 2010-10.4 Win32/Win64 5.09 2020/06/02 }
|
|
{ Delphi 10.2-10.4 Linux64 5.09 2020/06/02 }
|
|
{ FreePascal 3.0.4 Win64 5.09 2020/06/02 }
|
|
{ }
|
|
{******************************************************************************}
|
|
|
|
{$INCLUDE ..\flcInclude.inc}
|
|
|
|
unit flcUnicodeReader;
|
|
|
|
interface
|
|
|
|
uses
|
|
{ System }
|
|
SysUtils,
|
|
|
|
{ Fundamentals }
|
|
flcStdTypes,
|
|
flcStreams,
|
|
flcUnicodeCodecs;
|
|
|
|
|
|
|
|
{ }
|
|
{ TUnicodeReader }
|
|
{ }
|
|
type
|
|
TUnicodeReader = class
|
|
protected
|
|
FReader : AReader;
|
|
FReaderOwner : Boolean;
|
|
FReaderStartPos : Int64;
|
|
FCodec : TCustomUnicodeCodec;
|
|
FCodecOwner : Boolean;
|
|
FBuffer : UnicodeString;
|
|
FBufPos : Integer;
|
|
FBufLen : Integer;
|
|
FRawBuf : Pointer;
|
|
FRawSize : Integer;
|
|
|
|
procedure ReadError;
|
|
function BufferChars(const Count: Integer): Integer;
|
|
function GetBuffer(const Count: Integer): Boolean;
|
|
|
|
public
|
|
constructor Create(const Reader: AReader;
|
|
const ReaderOwner: Boolean = True;
|
|
const Codec: TCustomUnicodeCodec = nil;
|
|
const CodecOwner: Boolean = True);
|
|
destructor Destroy; override;
|
|
|
|
property Codec: TCustomUnicodeCodec read FCodec;
|
|
property CodecOwner: Boolean read FCodecOwner write FCodecOwner;
|
|
|
|
procedure Reset;
|
|
function EOF: Boolean;
|
|
|
|
function ReadChar: WideChar;
|
|
function ReadWide(const Buf: PWideChar; const Len: Integer): Integer;
|
|
function ReadUnicodeStr(const Len: Integer): UnicodeString;
|
|
function ReadUTF8Str(const Len: Integer): RawByteString;
|
|
|
|
procedure Skip(const Count: Integer);
|
|
function SkipAll(const CharMatchFunc: TWideCharMatchFunction): Integer;
|
|
|
|
function MatchChar(const CharMatchFunc: TWideCharMatchFunction;
|
|
const Skip: Boolean): Boolean;
|
|
function MatchWideChar(const Ch: WideChar; const Skip: Boolean): Boolean;
|
|
|
|
function MatchRawByteStr(const S: RawByteString; const CaseSensitive: Boolean;
|
|
const Skip: Boolean): Boolean;
|
|
function MatchRawByteStrDelimited(const S: RawByteString;
|
|
const CaseSensitive: Boolean;
|
|
const Delimiter: TWideCharMatchFunction;
|
|
const Skip: Boolean): Boolean;
|
|
|
|
function MatchChars(const CharMatchFunc: TWideCharMatchFunction): Integer;
|
|
function MatchRawByteChars(const C: ByteCharSet): Integer;
|
|
|
|
function LocateRawByteChar(const C: ByteCharSet;
|
|
const Optional: Boolean = False): Integer;
|
|
function LocateRawByteStr(const S: RawByteString; const CaseSensitive: Boolean;
|
|
const Optional: Boolean = False): Integer;
|
|
|
|
function PeekChar: WideChar;
|
|
function SkipAndPeek(out Ch: WideChar): Boolean;
|
|
function GetPeekBuffer(const Len: Integer; var Buffer: PWideChar): Integer;
|
|
|
|
function ReadChars(const CharMatchFunc: TWideCharMatchFunction): UnicodeString;
|
|
function ReadRawByteChars(const C: ByteCharSet): RawByteString;
|
|
|
|
function SkipToRawByteChar(const C: ByteCharSet;
|
|
const SkipDelimiter: Boolean): Integer;
|
|
function ReadToRawByteChar(const C: ByteCharSet;
|
|
const SkipDelimiter: Boolean = False): UnicodeString;
|
|
function ReadUTF8StrToRawByteChar(const C: ByteCharSet;
|
|
const SkipDelimiter: Boolean = False): RawByteString;
|
|
|
|
function ReadToRawByteStr(const S: RawByteString;
|
|
const CaseSensitive: Boolean = True;
|
|
const SkipDelimiter: Boolean = False): UnicodeString;
|
|
function ReadUTF8StrToRawByteStr(const S: RawByteString;
|
|
const CaseSensitive: Boolean = True;
|
|
const SkipDelimiter: Boolean = False): RawByteString;
|
|
end;
|
|
EUnicodeReader = class(Exception);
|
|
EUnicodeReaderReadError = class(EUnicodeReader);
|
|
|
|
|
|
|
|
{ }
|
|
{ TUnicodeMemoryReader }
|
|
{ }
|
|
type
|
|
TUnicodeMemoryReader = class(TUnicodeReader)
|
|
public
|
|
constructor Create(const Data: Pointer; const Size: Integer;
|
|
const Codec: TCustomUnicodeCodec = nil;
|
|
const CodecOwner: Boolean = True);
|
|
end;
|
|
|
|
|
|
|
|
{$IFDEF SupportRawByteString}
|
|
{ }
|
|
{ TUnicodeLongStringReader }
|
|
{ }
|
|
type
|
|
TUnicodeLongStringReader = class(TUnicodeReader)
|
|
public
|
|
constructor Create(const DataStr: RawByteString;
|
|
const Codec: TCustomUnicodeCodec = nil;
|
|
const CodecOwner: Boolean = True);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
{ }
|
|
{ TUnicodeFileReader }
|
|
{ }
|
|
type
|
|
TUnicodeFileReader = class(TUnicodeReader)
|
|
public
|
|
constructor Create(const FileName: String;
|
|
const Codec: TCustomUnicodeCodec = nil;
|
|
const CodecOwner: Boolean = True);
|
|
end;
|
|
|
|
|
|
|
|
{ }
|
|
{ Test cases }
|
|
{ }
|
|
{$IFDEF DEBUG}
|
|
{$IFDEF TEST}
|
|
procedure Test;
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
implementation
|
|
|
|
uses
|
|
{ Fundamentals }
|
|
flcUtils,
|
|
flcUTF,
|
|
flcZeroTermStrings;
|
|
|
|
|
|
|
|
resourcestring
|
|
SReadError = 'Read error';
|
|
|
|
|
|
|
|
{ }
|
|
{ TUnicodeReader }
|
|
{ }
|
|
const
|
|
ReaderBlockSize = 65536; // 64K
|
|
|
|
constructor TUnicodeReader.Create(const Reader: AReader;
|
|
const ReaderOwner: Boolean;
|
|
const Codec: TCustomUnicodeCodec;
|
|
const CodecOwner: Boolean);
|
|
begin
|
|
inherited Create;
|
|
Assert(Assigned(Reader));
|
|
FReader := Reader;
|
|
FReaderOwner := ReaderOwner;
|
|
FReaderStartPos := Reader.Position;
|
|
FCodec := Codec;
|
|
FCodecOwner := CodecOwner;
|
|
GetMem(FRawBuf, ReaderBlockSize);
|
|
end;
|
|
|
|
destructor TUnicodeReader.Destroy;
|
|
begin
|
|
if Assigned(FRawBuf) then
|
|
FreeMem(FRawBuf);
|
|
if FReaderOwner then
|
|
FreeAndNil(FReader);
|
|
if FCodecOwner then
|
|
FreeAndNil(FCodec);
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TUnicodeReader.ReadError;
|
|
begin
|
|
raise EUnicodeReaderReadError.Create(SReadError);
|
|
end;
|
|
|
|
procedure TUnicodeReader.Reset;
|
|
begin
|
|
FReader.Position := FReaderStartPos;
|
|
FBufPos := 0;
|
|
FBufLen := 0;
|
|
// Free excessively large buffer, keep part of it for re-use
|
|
if Length(FBuffer) > 4 * ReaderBlockSize then
|
|
SetLength(FBuffer, 4 * ReaderBlockSize);
|
|
end;
|
|
|
|
function TUnicodeReader.EOF: Boolean;
|
|
begin
|
|
if FBufPos < FBufLen then
|
|
Result := False
|
|
else
|
|
Result := FReader.EOF;
|
|
end;
|
|
|
|
// Attempts to decode at least Count characters
|
|
// Returns number of decoded characters available in buffer
|
|
function TUnicodeReader.BufferChars(const Count: Integer): Integer;
|
|
var I, J, L, M, N: Integer;
|
|
P: PByte;
|
|
Q: PWideChar;
|
|
begin
|
|
// Check available characters
|
|
Result := FBufLen - FBufPos;
|
|
if Result >= Count then
|
|
exit;
|
|
L := Length(FBuffer);
|
|
if L > 0 then
|
|
begin
|
|
// Reorganise buffer
|
|
if Result <= 0 then // buffer empty
|
|
begin
|
|
// move pointer to front
|
|
FBufPos := 0;
|
|
FBufLen := 0;
|
|
end
|
|
else
|
|
if (Result <= ReaderBlockSize div 16) or // buffer is nearly empty; or
|
|
(FBufPos >= 4 * ReaderBlockSize) then // buffer has too much unused space at front
|
|
begin
|
|
// Move data to front
|
|
Q := Pointer(FBuffer);
|
|
Inc(Q, FBufPos);
|
|
Move(Q^, Pointer(FBuffer)^, Result * Sizeof(WideChar));
|
|
FBufPos := 0;
|
|
FBufLen := Result;
|
|
end;
|
|
end;
|
|
// Fill unicode buffer
|
|
repeat
|
|
// Fill raw character buffer
|
|
P := FRawBuf;
|
|
Inc(P, FRawSize);
|
|
J := FReader.Read(P^, ReaderBlockSize - FRawSize);
|
|
if J <= 0 then // eof
|
|
exit;
|
|
Inc(FRawSize, J);
|
|
// Decode to unicode buffer
|
|
if Assigned(FCodec) then
|
|
begin
|
|
// Decode raw buffer using codec
|
|
P := FRawBuf;
|
|
J := FRawSize;
|
|
L := Length(FBuffer) - FBufLen;
|
|
repeat
|
|
if L < ReaderBlockSize then
|
|
begin
|
|
// grow unicode buffer to fit at least one raw buffer
|
|
L := ReaderBlockSize;
|
|
SetLength(FBuffer, FBufLen + L);
|
|
end;
|
|
Q := Pointer(FBuffer);
|
|
Inc(Q, FBufLen);
|
|
FCodec.Decode(P, J, Q, L * Sizeof(WideChar), M, N);
|
|
Inc(P, M);
|
|
Dec(J, M);
|
|
Inc(FBufLen, N);
|
|
Dec(L, N);
|
|
until (J <= 0) or (L > 0);
|
|
I := FRawSize - J;
|
|
end
|
|
else
|
|
begin
|
|
// read raw 16-bit unicode
|
|
I := FRawSize div Sizeof(WideChar);
|
|
L := Length(FBuffer) - FBufLen;
|
|
if L < I then
|
|
begin
|
|
L := I;
|
|
SetLength(FBuffer, FBufLen + L);
|
|
end;
|
|
Q := Pointer(FBuffer);
|
|
Inc(Q, FBufLen);
|
|
Inc(FBufLen, I);
|
|
I := I * Sizeof(WideChar);
|
|
Move(FRawBuf^, Q^, I);
|
|
end;
|
|
// Move undecoded raw data to front of buffer
|
|
if I < FRawSize then
|
|
begin
|
|
Move(P^, FRawBuf^, FRawSize - I);
|
|
Dec(FRawSize, I);
|
|
end
|
|
else
|
|
FRawSize := 0;
|
|
// Check if enough characters have been buffered
|
|
Result := FBufLen - FBufPos;
|
|
until Result >= Count;
|
|
end;
|
|
|
|
function TUnicodeReader.GetBuffer(const Count: Integer): Boolean;
|
|
begin
|
|
Result := FBufLen - FBufPos >= Count;
|
|
if Result then
|
|
exit;
|
|
Result := BufferChars(Count) >= Count;
|
|
end;
|
|
|
|
function TUnicodeReader.ReadWide(const Buf: PWideChar;
|
|
const Len: Integer): Integer;
|
|
var P: PWideChar;
|
|
begin
|
|
if Len <= 0 then
|
|
begin
|
|
Result := 0;
|
|
exit;
|
|
end;
|
|
// buffer
|
|
Result := FBufLen - FBufPos;
|
|
if Result < Len then
|
|
Result := BufferChars(Len);
|
|
if Result > Len then
|
|
Result := Len;
|
|
// read
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Move(P^, Buf^, Sizeof(WideChar) * Result);
|
|
Inc(FBufPos, Result);
|
|
end;
|
|
|
|
function TUnicodeReader.ReadUnicodeStr(const Len: Integer): UnicodeString;
|
|
var L: Integer;
|
|
P: PWideChar;
|
|
begin
|
|
if Len <= 0 then
|
|
begin
|
|
Result := '';
|
|
exit;
|
|
end;
|
|
// buffer
|
|
L := FBufLen - FBufPos;
|
|
if L < Len then
|
|
L := BufferChars(Len);
|
|
if L > Len then
|
|
L := Len;
|
|
// read
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
SetLength(Result, L);
|
|
Move(P^, Pointer(Result)^, Sizeof(WideChar) * L);
|
|
Inc(FBufPos, L);
|
|
end;
|
|
|
|
function TUnicodeReader.ReadUTF8Str(const Len: Integer): RawByteString;
|
|
var L: Integer;
|
|
P: PWideChar;
|
|
begin
|
|
if Len <= 0 then
|
|
begin
|
|
SetLength(Result, 0);
|
|
exit;
|
|
end;
|
|
// buffer
|
|
L := FBufLen - FBufPos;
|
|
if L < Len then
|
|
L := BufferChars(Len);
|
|
if L > Len then
|
|
L := Len;
|
|
// read
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := WideBufToUTF8String(P, L);
|
|
Inc(FBufPos, L);
|
|
end;
|
|
|
|
procedure TUnicodeReader.Skip(const Count: Integer);
|
|
begin
|
|
// buffer
|
|
if Count <= 0 then
|
|
exit;
|
|
if FBufLen - FBufPos < Count then
|
|
if not GetBuffer(Count) then
|
|
ReadError;
|
|
// skip
|
|
Inc(FBufPos, Count);
|
|
end;
|
|
|
|
function TUnicodeReader.SkipAll(const CharMatchFunc: TWideCharMatchFunction): Integer;
|
|
var P: PWideChar;
|
|
N, I: Integer;
|
|
begin
|
|
Result := 0;
|
|
// buffer
|
|
N := FBufLen - FBufPos;
|
|
if N <= 0 then
|
|
N := BufferChars(1);
|
|
repeat
|
|
if N <= 0 then // eof
|
|
exit;
|
|
// skip
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
for I := 1 to N do
|
|
if not CharMatchFunc(P^) then
|
|
exit
|
|
else
|
|
begin
|
|
Inc(Result);
|
|
Inc(FBufPos);
|
|
Inc(P);
|
|
end;
|
|
// buffer more
|
|
N := BufferChars(1);
|
|
until False;
|
|
end;
|
|
|
|
function TUnicodeReader.MatchChar(const CharMatchFunc: TWideCharMatchFunction;
|
|
const Skip: Boolean): Boolean;
|
|
var P: PWideChar;
|
|
begin
|
|
// buffer
|
|
if FBufPos >= FBufLen then
|
|
if BufferChars(1) <= 0 then // eof
|
|
begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
// match
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := CharMatchFunc(P^);
|
|
// skip
|
|
if Skip and Result then
|
|
Inc(FBufPos);
|
|
end;
|
|
|
|
function TUnicodeReader.MatchWideChar(const Ch: WideChar;
|
|
const Skip: Boolean): Boolean;
|
|
var P: PWideChar;
|
|
begin
|
|
// buffer
|
|
if FBufPos >= FBufLen then
|
|
if BufferChars(1) <= 0 then // eof
|
|
begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
// match
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := P^ = Ch;
|
|
// skip
|
|
if Skip and Result then
|
|
Inc(FBufPos);
|
|
end;
|
|
|
|
function TUnicodeReader.MatchRawByteStr(const S: RawByteString;
|
|
const CaseSensitive: Boolean; const Skip: Boolean): Boolean;
|
|
var L: Integer;
|
|
P: PWideChar;
|
|
begin
|
|
L := Length(S);
|
|
if L = 0 then
|
|
begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
// buffer
|
|
if FBufLen - FBufPos < L then
|
|
if BufferChars(L) < L then // eof
|
|
begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
// match
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := StrZMatchStrAsciiBW(P, S, CaseSensitive);
|
|
// skip
|
|
if Skip and Result then
|
|
Inc(FBufPos, L);
|
|
end;
|
|
|
|
function TUnicodeReader.MatchRawByteStrDelimited(const S: RawByteString;
|
|
const CaseSensitive: Boolean; const Delimiter: TWideCharMatchFunction;
|
|
const Skip: Boolean): Boolean;
|
|
var L: Integer;
|
|
P: PWideChar;
|
|
begin
|
|
L := Length(S);
|
|
// buffer
|
|
if FBufLen - FBufPos < L + 1 then
|
|
if BufferChars(L + 1) < L + 1 then // eof
|
|
begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
// match
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := StrZMatchStrAsciiBW(P, S, CaseSensitive);
|
|
if not Result then
|
|
exit;
|
|
Inc(P, L);
|
|
Result := Delimiter(P^);
|
|
// skip
|
|
if Skip and Result then
|
|
Inc(FBufPos, L);
|
|
end;
|
|
|
|
function TUnicodeReader.MatchChars(const CharMatchFunc: TWideCharMatchFunction): Integer;
|
|
var P: PWideChar;
|
|
N, I: Integer;
|
|
begin
|
|
Result := 0;
|
|
// buffer
|
|
N := FBufLen - FBufPos;
|
|
if N <= 0 then
|
|
N := BufferChars(1);
|
|
repeat
|
|
if N < Result + 1 then // eof
|
|
exit;
|
|
// match
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos + Result);
|
|
for I := Result + 1 to N do
|
|
if not CharMatchFunc(P^) then
|
|
exit
|
|
else
|
|
begin
|
|
Inc(Result);
|
|
Inc(P);
|
|
end;
|
|
// buffer more
|
|
N := BufferChars(Result + 1);
|
|
until False;
|
|
end;
|
|
|
|
function TUnicodeReader.MatchRawByteChars(const C: ByteCharSet): Integer;
|
|
var P: PWideChar;
|
|
N, I: Integer;
|
|
begin
|
|
Result := 0;
|
|
// buffer
|
|
N := FBufLen - FBufPos;
|
|
if N <= 0 then
|
|
N := BufferChars(1);
|
|
repeat
|
|
if N < Result + 1 then // eof
|
|
exit;
|
|
// match
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos + Result);
|
|
for I := Result + 1 to N do
|
|
if (Ord(P^) > $FF) or not (ByteChar(Byte(P^)) in C) then
|
|
exit
|
|
else
|
|
begin
|
|
Inc(Result);
|
|
Inc(P);
|
|
end;
|
|
// buffer more
|
|
N := BufferChars(Result + 1);
|
|
until False;
|
|
end;
|
|
|
|
function TUnicodeReader.LocateRawByteChar(const C: ByteCharSet;
|
|
const Optional: Boolean): Integer;
|
|
var P: PWideChar;
|
|
N, I: Integer;
|
|
V: Word;
|
|
begin
|
|
Result := 0;
|
|
// buffer
|
|
N := FBufLen - FBufPos;
|
|
if N <= 0 then
|
|
N := BufferChars(1);
|
|
repeat
|
|
if N < Result + 1 then
|
|
begin
|
|
// eof
|
|
if Optional then
|
|
Result := N
|
|
else
|
|
Result := -1;
|
|
exit;
|
|
end;
|
|
// locate
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos + Result);
|
|
for I := Result + 1 to N do
|
|
begin
|
|
V := Ord(P^);
|
|
if (V <= $FF) and (ByteChar(Byte(V)) in C) then
|
|
// found
|
|
exit;
|
|
Inc(Result);
|
|
Inc(P);
|
|
end;
|
|
// buffer more
|
|
N := BufferChars(Result + 1);
|
|
until False;
|
|
end;
|
|
|
|
function TUnicodeReader.LocateRawByteStr(const S: RawByteString;
|
|
const CaseSensitive: Boolean;
|
|
const Optional: Boolean): Integer;
|
|
var P: PWideChar;
|
|
M, N, I: Integer;
|
|
begin
|
|
Result := 0;
|
|
M := Length(S);
|
|
if M = 0 then
|
|
exit;
|
|
// buffer
|
|
N := FBufLen - FBufPos;
|
|
if N < M then
|
|
N := BufferChars(M);
|
|
repeat
|
|
if N < Result + M then
|
|
begin
|
|
// eof
|
|
if Optional then
|
|
Result := N
|
|
else
|
|
Result := -1;
|
|
exit;
|
|
end;
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos + Result);
|
|
for I := Result + 1 to N - M + 1 do
|
|
if StrZMatchStrAsciiBW(P, S, CaseSensitive) then
|
|
// found
|
|
exit
|
|
else
|
|
begin
|
|
Inc(Result);
|
|
Inc(P);
|
|
end;
|
|
// buffer more characters
|
|
N := BufferChars(Result + M);
|
|
until False;
|
|
end;
|
|
|
|
function TUnicodeReader.PeekChar: WideChar;
|
|
var P: PWideChar;
|
|
begin
|
|
// buffer
|
|
if FBufPos >= FBufLen then
|
|
if not GetBuffer(1) then
|
|
ReadError;
|
|
// peek
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := P^;
|
|
end;
|
|
|
|
function TUnicodeReader.GetPeekBuffer(const Len: Integer;
|
|
var Buffer: PWideChar): Integer;
|
|
var P: PWideChar;
|
|
begin
|
|
// Result returns the number of wide characters in Buffer.
|
|
// Buffer points to the actual data. The buffer is only valid until the next
|
|
// call to the reader.
|
|
Result := BufferChars(Len);
|
|
if Result = 0 then
|
|
Buffer := nil
|
|
else
|
|
begin
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Buffer := P;
|
|
end;
|
|
end;
|
|
|
|
function TUnicodeReader.ReadChar: WideChar;
|
|
var P: PWideChar;
|
|
O: Integer;
|
|
begin
|
|
// buffer
|
|
O := FBufPos;
|
|
if O >= FBufLen then
|
|
if GetBuffer(1) then
|
|
O := FBufPos
|
|
else
|
|
ReadError;
|
|
// read
|
|
P := Pointer(FBuffer);
|
|
Inc(P, O);
|
|
Result := P^;
|
|
Inc(FBufPos);
|
|
end;
|
|
|
|
function TUnicodeReader.SkipAndPeek(out Ch: WideChar): Boolean;
|
|
var P: PWideChar;
|
|
C: Integer;
|
|
begin
|
|
// Skip
|
|
C := FBufLen - FBufPos;
|
|
if C >= 2 then
|
|
begin
|
|
Inc(FBufPos);
|
|
Result := True;
|
|
end
|
|
else
|
|
begin
|
|
Result := GetBuffer(2);
|
|
if FBufPos < FBufLen then
|
|
Inc(FBufPos);
|
|
end;
|
|
if Result then
|
|
begin
|
|
// Peek
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Ch := P^;
|
|
end
|
|
else
|
|
Ch := WideChar(#0);
|
|
end;
|
|
|
|
function TUnicodeReader.ReadChars(const CharMatchFunc: TWideCharMatchFunction): UnicodeString;
|
|
var P: PWideChar;
|
|
L: Integer;
|
|
begin
|
|
// calculate length
|
|
L := MatchChars(CharMatchFunc);
|
|
if L = 0 then
|
|
Result := ''
|
|
else
|
|
begin
|
|
// read
|
|
SetLength(Result, L);
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Move(P^, Pointer(Result)^, Sizeof(WideChar) * L);
|
|
Inc(FBufPos, L);
|
|
end;
|
|
end;
|
|
|
|
function TUnicodeReader.ReadRawByteChars(const C: ByteCharSet): RawByteString;
|
|
var P : PWideChar;
|
|
L : Integer;
|
|
begin
|
|
// calculate length
|
|
L := MatchRawByteChars(C);
|
|
if L = 0 then
|
|
SetLength(Result, 0)
|
|
else
|
|
begin
|
|
// read
|
|
SetLength(Result, L);
|
|
P := Pointer(FBuffer);
|
|
Inc(P, FBufPos);
|
|
Result := WideBufToRawByteString(P, L);
|
|
Inc(FBufPos, L);
|
|
end;
|
|
end;
|
|
|
|
function TUnicodeReader.SkipToRawByteChar(const C: ByteCharSet;
|
|
const SkipDelimiter: Boolean): Integer;
|
|
var L: Integer;
|
|
begin
|
|
// locate
|
|
L := LocateRawByteChar(C, False);
|
|
if L = 0 then
|
|
Result := 0
|
|
else
|
|
begin
|
|
// skip characters
|
|
if L < 0 then
|
|
Result := FBufLen - FBufPos
|
|
else
|
|
Result := L;
|
|
Inc(FBufPos, Result);
|
|
end;
|
|
// skip delimiter
|
|
if (L >= 0) and SkipDelimiter then
|
|
Inc(FBufPos);
|
|
end;
|
|
|
|
function TUnicodeReader.ReadToRawByteChar(const C: ByteCharSet;
|
|
const SkipDelimiter: Boolean): UnicodeString;
|
|
var L, M: Integer;
|
|
begin
|
|
// locate
|
|
L := LocateRawByteChar(C, False);
|
|
if L = 0 then
|
|
Result := ''
|
|
else
|
|
begin
|
|
// read
|
|
if L < 0 then
|
|
M := FBufLen - FBufPos
|
|
else
|
|
M := L;
|
|
Result := ReadUnicodeStr(M);
|
|
end;
|
|
// skip delimiter
|
|
if (L >= 0) and SkipDelimiter then
|
|
Inc(FBufPos);
|
|
end;
|
|
|
|
function TUnicodeReader.ReadUTF8StrToRawByteChar(const C: ByteCharSet;
|
|
const SkipDelimiter: Boolean): RawByteString;
|
|
var L, M: Integer;
|
|
begin
|
|
// locate
|
|
L := LocateRawByteChar(C, False);
|
|
if L = 0 then
|
|
SetLength(Result, 0)
|
|
else
|
|
begin
|
|
// read
|
|
if L < 0 then
|
|
M := FBufLen - FBufPos
|
|
else
|
|
M := L;
|
|
Result := ReadUTF8Str(M);
|
|
end;
|
|
// skip delimiter
|
|
if (L >= 0) and SkipDelimiter then
|
|
Inc(FBufPos);
|
|
end;
|
|
|
|
function TUnicodeReader.ReadToRawByteStr(const S: RawByteString;
|
|
const CaseSensitive: Boolean; const SkipDelimiter: Boolean): UnicodeString;
|
|
var L, M: Integer;
|
|
begin
|
|
// locate
|
|
L := LocateRawByteStr(S, CaseSensitive, False);
|
|
if L = 0 then
|
|
Result := ''
|
|
else
|
|
begin
|
|
// read
|
|
if L < 0 then
|
|
M := FBufLen - FBufPos
|
|
else
|
|
M := L;
|
|
Result := ReadUnicodeStr(M);
|
|
end;
|
|
// skip delimiter
|
|
if (L >= 0) and SkipDelimiter then
|
|
Inc(FBufPos, Length(S));
|
|
end;
|
|
|
|
function TUnicodeReader.ReadUTF8StrToRawByteStr(const S: RawByteString;
|
|
const CaseSensitive: Boolean; const SkipDelimiter: Boolean): RawByteString;
|
|
var L, M: Integer;
|
|
begin
|
|
// locate
|
|
L := LocateRawByteStr(S, CaseSensitive, False);
|
|
if L = 0 then
|
|
SetLength(Result, 0)
|
|
else
|
|
begin
|
|
// read
|
|
if L < 0 then
|
|
M := FBufLen - FBufPos
|
|
else
|
|
M := L;
|
|
Result := ReadUTF8Str(M);
|
|
end;
|
|
// skip delimiter
|
|
if (L >= 0) and SkipDelimiter then
|
|
Inc(FBufPos, Length(S));
|
|
end;
|
|
|
|
|
|
|
|
{ }
|
|
{ TUnicodeMemoryReader }
|
|
{ }
|
|
constructor TUnicodeMemoryReader.Create(const Data: Pointer; const Size: Integer;
|
|
const Codec: TCustomUnicodeCodec; const CodecOwner: Boolean);
|
|
begin
|
|
inherited Create(TMemoryReader.Create(Data, Size), True, Codec, CodecOwner);
|
|
end;
|
|
|
|
|
|
|
|
{$IFDEF SupportRawByteString}
|
|
{ }
|
|
{ TUnicodeLongStringReader }
|
|
{ }
|
|
constructor TUnicodeLongStringReader.Create(const DataStr: RawByteString;
|
|
const Codec: TCustomUnicodeCodec; const CodecOwner: Boolean);
|
|
begin
|
|
inherited Create(TRawByteStringReader.Create(DataStr), True, Codec, CodecOwner);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
{ }
|
|
{ TUnicodeFileReader }
|
|
{ }
|
|
constructor TUnicodeFileReader.Create(const FileName: String;
|
|
const Codec: TCustomUnicodeCodec; const CodecOwner: Boolean);
|
|
begin
|
|
inherited Create(TFileReader.Create(FileName), True, Codec, CodecOwner);
|
|
end;
|
|
|
|
|
|
|
|
{ }
|
|
{ Test cases }
|
|
{ }
|
|
{$IFDEF DEBUG}
|
|
{$IFDEF TEST}
|
|
{$ASSERTIONS ON}
|
|
procedure Test;
|
|
{$IFDEF SupportRawByteString}
|
|
var A : TUnicodeReader;
|
|
{$ENDIF}
|
|
begin
|
|
{$IFDEF SupportRawByteString}
|
|
A := TUnicodeReader.Create(
|
|
TRawByteStringReader.Create(RawByteString(#$41#$E2#$89#$A2#$CE#$91#$2E)),
|
|
True,
|
|
TUTF8Codec.Create,
|
|
True);
|
|
Assert(not A.EOF, 'UnicodeReader.EOF');
|
|
Assert(A.PeekChar = #$0041, 'UnicodeReader.PeekChar');
|
|
Assert(A.ReadChar = #$0041, 'UnicodeReader.ReadChar');
|
|
Assert(A.ReadChar = #$2262, 'UnicodeReader.ReadChar');
|
|
Assert(A.ReadChar = #$0391, 'UnicodeReader.ReadChar');
|
|
Assert(A.ReadChar = #$002E, 'UnicodeReader.ReadChar');
|
|
Assert(A.EOF, 'UnicodeReader.EOF');
|
|
A.Free;
|
|
{$ENDIF}
|
|
end;
|
|
{$ENDIF}{$ENDIF}
|
|
|
|
|
|
|
|
end.
|
|
|