{******************************************************************************} { } { Library: Fundamentals 5.00 - HTML Parser } { File name: flcHTMLDocBase.pas } { File version: 5.10 } { Description: HTML DOM base classes } { } { Copyright: Copyright (c) 2000-2020, David J Butler } { All rights reserved. } { Redistribution and use in source and binary forms, with } { or without modification, are permitted provided that } { the following conditions are met: } { Redistributions of source code must retain the above } { copyright notice, this list of conditions and the } { following disclaimer. } { THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND } { CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED } { WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED } { WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A } { PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL } { THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, } { INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR } { CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, } { PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF } { USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) } { HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER } { IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING } { NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE } { USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE } { POSSIBILITY OF SUCH DAMAGE. } { } { Github: https://github.com/fundamentalslib } { E-mail: fundamentals.library at gmail.com } { } { Revision history: } { } { 2002/10/21 1.00 Initial version. } { 2002/10/27 1.01 Classes for all HTML 4 element objects. } { 2002/10/28 1.02 Unicode support. } { 2002/11/03 1.03 Optimizations. } { 2002/11/22 1.04 Style sheet support. } { 2002/12/03 1.05 Style sheet changes. } { 2002/12/07 1.06 Small additions. } { 2002/12/08 1.07 Split-up unit cHTMLObjects. } { 2002/12/16 1.08 Improved white-space and line-break handling. } { 2015/04/11 1.09 UnicodeString changes. } { 2019/02/22 5.10 Revise for Fundamentals 5. } { } {******************************************************************************} {$INCLUDE flcHTML.inc} unit flcHTMLDocBase; interface uses { System } SysUtils, { Fundamentals } flcStdTypes, flcUnicodeCodecs, { HTML } flcHTMLElements, flcHTMLStyleProperties, flcHTMLStyleSheet; { } { AhtmlObject } { Base class for HTML document objects. } { } type TRefactorOperation = ( reopRefactorForLayout ); TRefactorOperations = set of TRefactorOperation; AhtmlObject = class protected FUserTag : NativeUInt; FUserObj : TObject; FName : String; FParent : AhtmlObject; FPrev : AhtmlObject; FNext : AhtmlObject; FStyle : ThtmlcssStyleProperties; procedure Init; virtual; function GetName: String; virtual; function GetNameUTF8: RawByteString; function GetContentText: String; virtual; procedure SetContentText(const ContentText: String); virtual; function GetContentTextUTF8: RawByteString; procedure SetContentTextUTF8(const ContentText: RawByteString); function GetHTMLText: String; virtual; function GetHTMLTextUTF8: RawByteString; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; virtual; procedure InitStyleElementInfo(const StyleSheet: ThtmlCSS; const ParentInfo: PhtmlcssElementInfo); virtual; procedure InitStyleInfo(const StyleSheet: ThtmlCSS; const StyleInfo: ThtmlcssStyleProperties); virtual; function Refactor(const Operations: TRefactorOperations): AhtmlObject; virtual; public constructor Create; overload; virtual; property UserTag: NativeUInt read FUserTag write FUserTag; property UserObj: TObject read FUserObj write FUserObj; property Name: String read GetName; property NameUTF8: RawByteString read GetNameUTF8; property Parent: AhtmlObject read FParent; property PrevSibling: AhtmlObject read FPrev; property NextSibling: AhtmlObject read FNext; function DuplicateObject: AhtmlObject; virtual; function GetStructureStr(const Depth: Integer = 0; const Level: Integer = 0): String; virtual; property ContentText: String read GetContentText write SetContentText; property ContentTextUTF8: RawByteString read GetContentTextUTF8 write SetContentTextUTF8; property HTMLTextUTF8: RawByteString read GetHTMLTextUTF8; property HTMLText: String read GetHTMLText; function EncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; property Style: ThtmlcssStyleProperties read FStyle; end; AhtmlObjectClass = class of AhtmlObject; EhtmlObject = class(Exception); { } { AhtmlContainerObject } { Base class for HTML objects that can contain other HTML objects. } { } type AhtmlContainerObject = class(AhtmlObject) protected FFirst : AhtmlObject; FLast : AhtmlObject; procedure SetContentText(const ContentText: String); override; function GetContentText: String; override; // function GetHTMLTextUTF8: RawByteString; override; function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; function GetItem(const Name: String): AhtmlObject; virtual; procedure SetItem(const Name: String; const Item: AhtmlObject); virtual; function GetItemText(const Name: String): String; procedure SetItemText(const Name: String; const Text: String); procedure DeleteChild(const Obj: AhtmlObject); procedure ReplaceChild(const Obj, NewObj: AhtmlObject); procedure InitStyleElementInfo(const StyleSheet: ThtmlCSS; const ParentInfo: PhtmlcssElementInfo); override; procedure InitStyleInfo(const StyleSheet: ThtmlCSS; const StyleInfo: ThtmlcssStyleProperties); override; function Refactor(const Operations: TRefactorOperations): AhtmlObject; override; public destructor Destroy; override; property FirstChild: AhtmlObject read FFirst; property LastChild: AhtmlObject read FLast; procedure ClearItems; function DuplicateObject: AhtmlObject; override; function GetStructureStr(const Depth: Integer = 0; const Level: Integer = 0): String; override; function CreateItem(const ID: Integer; const Name: String): AhtmlObject; virtual; procedure AddItem(const Obj: AhtmlObject); virtual; function FindNext(const Previous: AhtmlObject; const ItemClass: AhtmlObjectClass): AhtmlObject; function FindNextName(const Previous: AhtmlObject; const Name: String): AhtmlObject; function GetItemByClass(const ItemClass: AhtmlObjectClass): AhtmlObject; function RequireItemByClass(const ItemClass: AhtmlObjectClass): AhtmlObject; function GetItemTextByClass(const ItemClass: AhtmlObjectClass): RawByteString; property Item[const Name: String]: AhtmlObject read GetItem write SetItem; function RequireItem(const Name: String; const ItemClass: AhtmlObjectClass = nil): AhtmlObject; property ItemText[const Name: String]: String read GetItemText write SetItemText; end; EhtmlContainerObject = class(EhtmlObject); { } { AhtmlTextContentObject } { Base class for text content objects. } { } type AhtmlTextContentObject = class(AhtmlObject) end; { } { AhtmlTextStringContentObject } { Base class for text content objects that store content as a UnicodeString. } { } type AhtmlTextStringContentObject = class(AhtmlTextContentObject) protected FText : String; function GetContentText: String; override; procedure SetContentText(const ContentText: String); override; public constructor Create(const Text: String); overload; constructor CreateWide(const Text: WideString); function DuplicateObject: AhtmlObject; override; property Text: String read FText write FText; end; { } { ThtmlText } { Text content. } { } type ThtmlText = class(AhtmlTextStringContentObject) protected function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; function IsStartWhiteSpace: Boolean; function IsEndWhiteSpace: Boolean; function Refactor(const Operations: TRefactorOperations): AhtmlObject; override; public function GetStructureStr(const Depth: Integer = 0; const Level: Integer = 0): String; override; end; { } { ThtmlCDATA } { Raw content. } { } type ThtmlCDATA = class(AhtmlTextStringContentObject) protected function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; end; { } { ThtmlLineBreak } { } type ThtmlLineBreak = class(AhtmlTextContentObject) protected function GetContentText: String; override; function GetHTMLText: String; override; function Refactor(const Operations: TRefactorOperations): AhtmlObject; override; end; { } { ThtmlCharRef } { A character reference. } { } type ThtmlCharRef = class(AhtmlTextContentObject) protected FCharVal : LongWord; FHasTrailer : Boolean; function GetContentText: String; override; function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; function GetAsWideChar: WideChar; function Refactor(const Operations: TRefactorOperations): AhtmlObject; override; public constructor Create(const CharVal: LongWord; const HasTrailer: Boolean); overload; function DuplicateObject: AhtmlObject; override; property CharVal: LongWord read FCharVal write FCharVal; property AsWideChar: WideChar read GetAsWideChar; end; { } { ThtmlEntityRef } { Entity reference. } { } type ThtmlEntityRef = class(AhtmlTextContentObject) protected FHasTrailer : Boolean; function GetContentText: String; override; function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; function Refactor(const Operations: TRefactorOperations): AhtmlObject; override; public constructor Create(const EntityName: String; const HasTrailer: Boolean); overload; property EntityName: String read FName write FName; end; { } { ThtmlComment } { A comment tag. } { } type ThtmlComment = class(AhtmlObject) protected FComments: StringArray; function GetHTMLText: String; override; public constructor Create(const Comments: StringArray); overload; function DuplicateObject: AhtmlObject; override; property Comments: StringArray read FComments write FComments; end; ThtmlEmptyComment = class(AhtmlObject) protected function GetHTMLText: String; override; end; { } { ThtmlPI } { A "Processing Information" tag. } { } type ThtmlPI = class(AhtmlObject) protected FTarget, FPI: String; function GetHTMLText: String; override; public constructor Create(const Target, PI: String); overload; function DuplicateObject: AhtmlObject; override; property Target: String read FTarget write FTarget; property PI: String read FPI write FPI; end; { } { ThtmlRawTag } { A raw un-interpreted tag. } { } type ThtmlRawTag = class(AhtmlObject) protected FTag: String; function GetHTMLText: String; override; public constructor Create(const Tag: String); overload; function DuplicateObject: AhtmlObject; override; property Tag: String read FTag write FTag; end; { } { ThtmlElementAttribute } { Element attribute. } { The object's content is the attribute value. } { } type ThtmlElementAttribute = class(AhtmlContainerObject) protected FAttrID: ThtmlAttrID; function GetName: String; override; function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; public constructor Create(const AttrID: ThtmlAttrID; const Name: String); overload; property AttrID: ThtmlAttrID read FAttrID; function DuplicateObject: AhtmlObject; override; end; { } { ThtmlElementAttributes } { Element attribute collection. } { } type ThtmlElementAttributes = class(AhtmlContainerObject) protected function GetContentText: String; override; procedure SetContentText(const ContentText: String); override; function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; function GetAttributeText(const Name: String): String; procedure SetAttributeText(const Name: String; const Value: String); function GetAttributeFlag(const Name: String): Boolean; procedure SetAttributeFlag(const Name: String; const Value: Boolean); public function CreateItem(const ID: Integer; const Name: String): AhtmlObject; override; function HasAttribute(const Name: String): Boolean; property AttributeText[const Name: String]: String read GetAttributeText write SetAttributeText; property AttributeFlag[const Name: String]: Boolean read GetAttributeFlag write SetAttributeFlag; procedure DeleteAttribute(const Name: String); end; { } { AhtmlElement } { Base class for element implementations. } { } type ThtmlElementTagType = (ttContentTags, ttEmptyTag, ttStartTag); AhtmlElement = class(AhtmlContainerObject) protected FTagID : ThtmlTagID; FTagType : ThtmlElementTagType; FAttributes : ThtmlElementAttributes; FStyleAttr : ThtmlcssDeclarations; FStyleInfo : ThtmlcssElementInfo; function GetName: String; override; procedure SetContentText(const ContentText: String); override; function GetHTMLText: String; override; function GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; override; procedure AssignElementInfo(const Source: AhtmlElement); function GetAttributes: ThtmlElementAttributes; function GetAttributeText(const Name: String): String; procedure SetAttributeText(const Name: String; const Text: String); function GetAttributeFlag(const Name: String): Boolean; procedure SetAttributeFlag(const Name: String; const Value: Boolean); procedure InitStyleElementInfo(const StyleSheet: ThtmlCSS; const ParentInfo: PhtmlcssElementInfo); override; procedure InitElementStyleInfo(const StyleSheet: ThtmlCSS; var Info: ThtmlcssStyleProperties; const StyleInfo: ThtmlcssStyleProperties); procedure InitChildrenStyleInfo(const StyleSheet: ThtmlCSS; const StyleInfo: ThtmlcssStyleProperties); procedure InitStyleInfo(const StyleSheet: ThtmlCSS; const StyleInfo: ThtmlcssStyleProperties); override; procedure ApplyHTMLStyleInfo(var StyleInfo: ThtmlcssStyleProperties; const ParentStyle: ThtmlcssStyleProperties); virtual; public constructor Create(const TagID: ThtmlTagID; const Name: String; const TagType: ThtmlElementTagType); overload; destructor Destroy; override; property TagID: ThtmlTagID read FTagID write FTagID; property TagType: ThtmlElementTagType read FTagType write FTagType; function DuplicateObject: AhtmlObject; override; function DuplicateElement: AhtmlElement; virtual; function HasAttribute(const Name: String): Boolean; property Attributes: ThtmlElementAttributes read GetAttributes; property AttributeText[const Name: String]: String read GetAttributeText write SetAttributeText; property AttributeFlag[const Name: String]: Boolean read GetAttributeFlag write SetAttributeFlag; end; ThtmlElement = class(AhtmlElement); ThtmlElementClass = class of AhtmlElement; { } { AhtmlKnownElement } { Base class for HTML elements that can be identified with a TagID. } { } type AhtmlKnownElement = class(AhtmlElement) public constructor Create(const TagID: ThtmlTagID; const TagType: ThtmlElementTagType); overload; end; implementation uses { Fundamentals } flcASCII, flcUtils, flcStrings, flcUTF, { HTML } flcHTMLCharEntity, flcHTMLProperties, flcHTMLDocElements; { } { EncodeHTMLText } { Helper function that encodes HTML text using a specific Unicode Codec. } { } function EncodeHTMLText(const WideHTMLText: PWideChar; const Len: Integer; const Codec: TCustomUnicodeCodec; const CanUseRefs: Boolean; const ReplaceText: WideString): RawByteString; var I, L : Integer; P : PWideChar; T : RawByteString; U : String; begin Assert(Assigned(Codec), 'Assigned(Codec)'); Assert(Codec.ErrorAction = eaStop, 'Codec.ErrorAction = eaStop'); L := Len; if L <= 0 then begin Result := ''; exit; end; // Encode using Codec, optionally replace unencodable characters with // their character references P := WideHTMLText; Result := ''; while L > 0 do begin T := Codec.Encode(P, L, I); if I > 0 then // bytes encoded begin Inc(P, I); Dec(L, I); Result := Result + T; end; if L > 0 then // unencodable character begin if CanUseRefs then begin // replace with encoded character reference U := htmlCharRef(Ord(P^), False); Result := Result + EncodeHTMLText(Pointer(U), Length(U), Codec, False, ''); end else if ReplaceText <> '' then // replace with encoded ReplaceText Result := Result + EncodeHTMLText(Pointer(ReplaceText), Length(ReplaceText), Codec, False, ''); // skip character Inc(P); Dec(L); end; end; end; { } { AhtmlObject } { } constructor AhtmlObject.Create; begin inherited Create; Init; end; procedure AhtmlObject.Init; begin end; function AhtmlObject.DuplicateObject: AhtmlObject; begin Result := AhtmlObject(ClassType.Create); Result.FName := FName; Result.FStyle := FStyle; end; function AhtmlObject.GetName: String; begin Result := FName; end; function AhtmlObject.GetNameUTF8: RawByteString; begin Result := StringToUTF8String(GetName); end; function AhtmlObject.GetStructureStr(const Depth: Integer; const Level: Integer): String; begin Result := GetName; if Result = '' then Result := '{' + StrExclPrefixU(StrExclPrefixU(ClassName, 'T'), 'html', False) + '}'; end; function AhtmlObject.GetContentText: String; begin raise EhtmlObject.Create('Cannot get content'); end; procedure AhtmlObject.SetContentText(const ContentText: String); begin raise EhtmlObject.Create('Cannot set content'); end; function AhtmlObject.GetContentTextUTF8: RawByteString; begin Result := StringToUTF8String(GetContentText); end; procedure AhtmlObject.SetContentTextUTF8(const ContentText: RawByteString); begin SetContentText(UTF8StringToString(ContentText)); end; function AhtmlObject.GetHTMLText: String; begin raise EhtmlObject.Create('Cannot get HTML text'); end; function AhtmlObject.GetHTMLTextUTF8: RawByteString; begin Result := StringToUTF8String(GetHTMLText); end; function AhtmlObject.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var S: String; begin S := GetHTMLText; Result := EncodeHTMLText(Pointer(S), Length(S), Codec, True, ''); end; function AhtmlObject.EncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; begin if not Assigned(Codec) then raise EhtmlObject.Create('No codec'); Codec.ErrorAction := eaStop; Result := GetEncodedHTMLText(Codec); end; procedure AhtmlObject.InitStyleElementInfo(const StyleSheet: ThtmlCSS; const ParentInfo: PhtmlcssElementInfo); begin end; procedure AhtmlObject.InitStyleInfo(const StyleSheet: ThtmlCSS; const StyleInfo: ThtmlcssStyleProperties); begin FStyle := StyleInfo; end; function AhtmlObject.Refactor(const Operations: TRefactorOperations): AhtmlObject; begin Result := self; end; { } { AhtmlContainerObject } { } destructor AhtmlContainerObject.Destroy; begin ClearItems; inherited Destroy; end; procedure AhtmlContainerObject.ClearItems; var I, N: AhtmlObject; begin I := FFirst; while Assigned(I) do begin N := I.FNext; I.Free; if I = FLast then break; I := N; end; FFirst := nil; FLast := nil; end; function AhtmlContainerObject.DuplicateObject: AhtmlObject; var I: AhtmlObject; begin Result := inherited DuplicateObject; I := FFirst; while Assigned(I) do begin AhtmlContainerObject(Result).AddItem(I.DuplicateObject); I := I.FNext; end; end; function AhtmlContainerObject.GetStructureStr(const Depth: Integer; const Level: Integer): String; var I: AhtmlObject; begin if (Depth > 0) and (Level > Depth) then begin Result := ''; exit; end; Result := GetName; if (Depth <= 0) or (Level < Depth) then begin I := FFirst; if Assigned(I) then begin Result := WideCRLF + DupStrU(' ', Level) + Result + '['; while Assigned(I) do begin if I <> FFirst then Result := Result + ','; Result := Result + I.GetStructureStr(Depth, Level + 1); I := I.FNext; end; Result := Result + WideCRLF + DupStrU(' ', Level) + ']'; end else Result := Result + '[]'; end; end; function AhtmlContainerObject.GetContentText: String; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + I.GetContentText; I := I.FNext; end; end; procedure AhtmlContainerObject.SetContentText(const ContentText: String); var T: Boolean; begin T := Assigned(FFirst) and (FFirst = FLast); if T then begin T := FFirst is ThtmlText; if T then ThtmlText(FFirst).ContentText := ContentText; end; if not T then begin ClearItems; AddItem(ThtmlText.Create(ContentText)); end; end; { function AhtmlContainerObject.GetHTMLTextUTF8: RawByteString; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + I.GetHTMLTextUTF8; I := I.FNext; end; end; } function AhtmlContainerObject.GetHTMLText: String; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + I.GetHTMLText; I := I.FNext; end; end; function AhtmlContainerObject.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + I.GetEncodedHTMLText(Codec); I := I.FNext; end; end; procedure AhtmlContainerObject.AddItem(const Obj: AhtmlObject); begin Obj.FParent := self; Obj.FNext := nil; Obj.FPrev := FLast; if Assigned(FLast) then FLast.FNext := Obj else FFirst := Obj; FLast := Obj; end; procedure AhtmlContainerObject.DeleteChild(const Obj: AhtmlObject); begin Obj.FParent := nil; if Assigned(Obj.FPrev) then Obj.FPrev.FNext := Obj.FNext; if Assigned(Obj.FNext) then Obj.FNext.FPrev := Obj.FPrev; if FFirst = Obj then FFirst := Obj.FNext; if FLast = Obj then FLast := Obj.FPrev; end; procedure AhtmlContainerObject.ReplaceChild(const Obj, NewObj: AhtmlObject); begin NewObj.FParent := self; NewObj.FNext := Obj.FNext; NewObj.FPrev := Obj.FPrev; if Assigned(Obj.FNext) then Obj.FNext.FPrev := NewObj; if Assigned(Obj.FPrev) then Obj.FPrev.FNext := NewObj; if FFirst = Obj then FFirst := NewObj; if FLast = Obj then FLast := NewObj; end; function AhtmlContainerObject.FindNext(const Previous: AhtmlObject; const ItemClass: AhtmlObjectClass): AhtmlObject; begin if Assigned(Previous) then Result := Previous.FNext else Result := FFirst; while Assigned(Result) do if Result.InheritsFrom(ItemClass) then exit else Result := Result.FNext; end; function AhtmlContainerObject.FindNextName(const Previous: AhtmlObject; const Name: String): AhtmlObject; begin if Assigned(Previous) then Result := Previous.FNext else Result := FFirst; while Assigned(Result) do if StrEqualNoAsciiCaseU(Name, Result.Name) then exit else Result := Result.FNext; end; function AhtmlContainerObject.GetItemByClass(const ItemClass: AhtmlObjectClass): AhtmlObject; begin Result := FindNext(nil, ItemClass); end; function AhtmlContainerObject.RequireItemByClass(const ItemClass: AhtmlObjectClass): AhtmlObject; begin Result := GetItemByClass(ItemClass); if Assigned(Result) then exit; Result := ItemClass.Create; AddItem(Result); end; function AhtmlContainerObject.GetItemTextByClass(const ItemClass: AhtmlObjectClass): RawByteString; var V: AhtmlObject; begin V := GetItemByClass(ItemClass); if Assigned(V) then Result := V.ContentTextUTF8 else Result := ''; end; function AhtmlContainerObject.CreateItem(const ID: Integer; const Name: String): AhtmlObject; begin if FParent is AhtmlContainerObject then Result := AhtmlContainerObject(FParent).CreateItem(ID, Name) else Result := htmlCreateElement(ThtmlTagID(ID), Name); end; function AhtmlContainerObject.GetItem(const Name: String): AhtmlObject; begin Result := FindNextName(nil, Name); end; procedure AhtmlContainerObject.SetItem(const Name: String; const Item: AhtmlObject); var V: AhtmlObject; begin V := FindNextName(nil, Name); if not Assigned(V) then AddItem(Item) else begin Item.FParent := self; Item.FNext := V.FNext; Item.FPrev := V.FPrev; if Assigned(Item.FPrev) then Item.FPrev.FNext := Item; if Assigned(Item.FNext) then Item.FNext.FPrev := Item; if FFirst = V then FFirst := Item; if FLast = V then FLast := Item; V.Free; end; end; function AhtmlContainerObject.RequireItem(const Name: String; const ItemClass: AhtmlObjectClass): AhtmlObject; begin Result := GetItem(Name); if Assigned(Result) then exit; if not Assigned(ItemClass) then begin // class not specified Result := CreateItem(-1, Name); // request item from container if not Assigned(Result) then raise EhtmlContainerObject.Create('Can not create item'); end else Result := ItemClass.Create; // create specified item class AddItem(Result); end; function AhtmlContainerObject.GetItemText(const Name: String): String; var T: AhtmlObject; begin T := GetItem(Name); if not Assigned(T) then Result := '' else Result := T.ContentText; end; procedure AhtmlContainerObject.SetItemText(const Name: String; const Text: String); begin RequireItem(Name, nil).ContentText := Text; end; procedure AhtmlContainerObject.InitStyleElementInfo(const StyleSheet: ThtmlCSS; const ParentInfo: PhtmlcssElementInfo); var I: AhtmlObject; begin inherited InitStyleElementInfo(StyleSheet, ParentInfo); // Init children's element info I := FFirst; while Assigned(I) do begin Assert(I.FParent = self, 'I.FParent = self'); I.InitStyleElementInfo(StyleSheet, ParentInfo); if I = FLast then break; I := I.FNext; end; end; procedure AhtmlContainerObject.InitStyleInfo(const StyleSheet: ThtmlCSS; const StyleInfo: ThtmlcssStyleProperties); var I: AhtmlObject; begin inherited; // Init children's style info I := FFirst; while Assigned(I) do begin Assert(I.FParent = self, 'I.FParent = self'); I.InitStyleInfo(StyleSheet, StyleInfo); if I = FLast then break; I := I.FNext; end; end; function AhtmlContainerObject.Refactor(const Operations: TRefactorOperations): AhtmlObject; var I, J, N : AhtmlObject; R : Boolean; begin Result := self; // Refactor children I := FFirst; while Assigned(I) do begin Assert(I.FParent = self, 'I.FParent = self'); J := I.Refactor(Operations); R := I = FLast; N := I.FNext; if not Assigned(J) then // Delete child begin DeleteChild(I); I.Free; end else if J <> I then // Replace child begin ReplaceChild(I, J); I.Free; end; if R then break; I := N; end; end; { } { AhtmlTextStringContentObject } { } constructor AhtmlTextStringContentObject.Create(const Text: String); begin inherited Create; FText := Text; end; constructor AhtmlTextStringContentObject.CreateWide(const Text: WideString); begin inherited Create; SetContentText(Text); end; function AhtmlTextStringContentObject.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; AhtmlTextStringContentObject(Result).FText := FText; end; function AhtmlTextStringContentObject.GetContentText: String; begin Result := FText; end; procedure AhtmlTextStringContentObject.SetContentText(const ContentText: String); begin FText := ContentText; end; { } { ThtmlText } { } procedure htmlSafeText(var S: String); begin S := StrReplaceCharStrU('&', '&', S); S := StrReplaceCharStrU('<', '<', S); S := StrReplaceCharStrU('>', '>', S); S := StrReplaceCharStrU('"', '"', S); S := StrReplaceCharStrU(#39, ''', S); end; function ThtmlText.GetHTMLText: String; begin Result := FText; htmlSafeText(Result); end; function ThtmlText.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var S: WideString; begin S := GetHTMLText; Result := EncodeHTMLText(Pointer(S), Length(S), Codec, True, ''); end; function ThtmlText.GetStructureStr(const Depth: Integer; const Level: Integer): String; begin Result := '"' + StrReplaceCharU([#0..#31], WideChar('#'), CopyLeftEllipsedU(GetContentText, 15)) + '"'; end; function ThtmlText.IsStartWhiteSpace: Boolean; var P: PAnsiChar; begin Result := False; if FText = '' then exit; P := Pointer(FText); Result := P^ in [#0..#32]; end; function ThtmlText.IsEndWhiteSpace: Boolean; var P: PAnsiChar; begin Result := False; if FText = '' then exit; P := Pointer(FText); Inc(P, Length(FText) - 1); Result := P^ in [#0..#32]; end; function ThtmlText.Refactor(const Operations: TRefactorOperations): AhtmlObject; begin Result := self; if reopRefactorForLayout in Operations then begin // prepare whitespace FText := StrReplaceCharU([#0..#31], WideChar(#32), FText); if FStyle.WhiteSpace = wsPre then exit; // Not pre-formatted style // Remove consecutive whitespaces FText := StrRemoveDupU(FText, #32); // Trim preceding white space if previous object ends with a white space if IsStartWhiteSpace and Assigned(FPrev) and ((FPrev is ThtmlLineBreak) or ((FPrev is ThtmlText) and ThtmlText(FPrev).IsEndWhiteSpace)) then StrTrimLeftInPlaceU(FText, [#0..#32]); end; end; { } { ThtmlLineBreak } { } function ThtmlLineBreak.GetContentText: String; begin Result := WideCRLF; end; function ThtmlLineBreak.GetHTMLText: String; begin Result := WideCRLF; end; function ThtmlLineBreak.Refactor(const Operations: TRefactorOperations): AhtmlObject; begin Result := self; if reopRefactorForLayout in Operations then begin if FStyle.WhiteSpace = wsPre then exit; // Not pre-formatted style // Delete line-break if its preceded by white-space if Assigned(FPrev) and ((FPrev is ThtmlLineBreak) or ((FPrev is ThtmlText) and ThtmlText(FPrev).IsEndWhiteSpace)) then begin Result := nil; exit; end; end; end; { } { ThtmlCDATA } { } function ThtmlCDATA.GetHTMLText: String; begin Result := ''; end; function ThtmlCDATA.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var S: String; begin S := GetHTMLText; Result := EncodeHTMLText(Pointer(S), Length(S), Codec, False, ''); end; { } { ThtmlCharRef } { } constructor ThtmlCharRef.Create(const CharVal: LongWord; const HasTrailer: Boolean); begin inherited Create; FCharVal := CharVal; FHasTrailer := HasTrailer; end; function ThtmlCharRef.GetAsWideChar: WideChar; begin if FCharVal <= $FFFF then Result := WideChar(FCharVal) else Result := WideChar(#$FFFD); // Unicode replacement character end; function ThtmlCharRef.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; ThtmlCharRef(Result).FCharVal := FCharVal; end; function ThtmlCharRef.GetContentText: String; begin SetLength(Result, 1); PWideChar(Pointer(Result))^ := GetAsWideChar; end; function ThtmlCharRef.GetHTMLText: String; begin Result := '&#' + Word32ToStrU(FCharVal) + ';'; end; function ThtmlCharRef.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var S: WideString; begin S := GetHTMLText; Result := EncodeHTMLText(Pointer(S), Length(S), Codec, False, ''); end; function ThtmlCharRef.Refactor(const Operations: TRefactorOperations): AhtmlObject; begin if reopRefactorForLayout in Operations then // Replace with resolved text object Result := ThtmlText.Create(GetContentText) else Result := self; end; { } { ThtmlEntityRef } { } constructor ThtmlEntityRef.Create(const EntityName: String; const HasTrailer: Boolean); begin inherited Create; FName := EntityName; FHasTrailer := HasTrailer; end; function ThtmlEntityRef.GetContentText: String; var C: Word; begin C := htmlDecodeCharEntity(FName); if C > 0 then begin SetLength(Result, 1); PWideChar(Pointer(Result))^ := WideChar(C); end else begin // unresolved Result := '&' + GetName; if FHasTrailer then Result := Result + ';'; end; end; function ThtmlEntityRef.GetHTMLText: String; begin Result := '&' + FName + ';'; end; function ThtmlEntityRef.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var S: WideString; begin S := GetHTMLText; Result := EncodeHTMLText(Pointer(S), Length(S), Codec, False, ''); end; function ThtmlEntityRef.Refactor(const Operations: TRefactorOperations): AhtmlObject; var S: String; begin Result := self; if reopRefactorForLayout in Operations then begin // Replace with text object S := GetContentText; if S = '' then Result := ThtmlText.Create(GetHTMLText) else Result := ThtmlText.Create(S); end; end; { } { ThtmlComment } { } constructor ThtmlComment.Create(const Comments: StringArray); begin inherited Create; FComments := Comments; end; function ThtmlComment.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; ThtmlComment(Result).FComments := Comments; end; function ThtmlComment.GetHTMLText: String; var I: Integer; begin Result := ''; end; { } { ThtmlEmptyComment } { } function ThtmlEmptyComment.GetHTMLText: String; begin Result := ''; end; { } { ThtmlPI } { } constructor ThtmlPI.Create(const Target, PI: String); begin inherited Create; FTarget := Target; FPI := PI; end; function ThtmlPI.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; ThtmlPI(Result).FTarget := FTarget; ThtmlPI(Result).FPI := FPI; end; function ThtmlPI.GetHTMLText: String; begin Result := ''; end; { } { ThtmlRawTag } { } constructor ThtmlRawTag.Create(const Tag: String); begin inherited Create; FTag := Tag; end; function ThtmlRawTag.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; ThtmlRawTag(Result).FTag := FTag; end; function ThtmlRawTag.GetHTMLText: String; begin Result := '<' + FTag + '>'; end; { } { ThtmlElementAttribute } { } constructor ThtmlElementAttribute.Create(const AttrID: ThtmlAttrID; const Name: String); begin inherited Create; FAttrID := AttrID; FName := Name; end; function ThtmlElementAttribute.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; ThtmlElementAttribute(Result).FAttrID := FAttrID; end; function ThtmlElementAttribute.GetName: String; begin if (FName = '') and (FAttrID <> HTML_ATTR_None) then FName := htmlGetAttrName(FAttrID); Result := FName; end; { function ThtmlElementAttribute.GetHTMLTextUTF8: RawByteString; begin Result := NameUTF8; if Assigned(FFirst) then Result := Result + '="' + inherited GetHTMLTextUTF8 + '"'; end; } function ThtmlElementAttribute.GetHTMLText: String; begin Result := Name; if Assigned(FFirst) then Result := Result + '="' + inherited GetHTMLText + '"'; end; function ThtmlElementAttribute.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var S: String; begin S := Name; Result := EncodeHTMLText(Pointer(S), Length(S), Codec, False, ''); if Assigned(FFirst) then Result := Result + Codec.EncodeStr('="') + inherited GetEncodedHTMLText(Codec) + Codec.EncodeStr('"'); end; { } { ThtmlElementAttributes } { } function ThtmlElementAttributes.GetContentText: String; begin raise EhtmlContainerObject.Create('No content'); end; procedure ThtmlElementAttributes.SetContentText(const ContentText: String); begin raise EhtmlContainerObject.Create('Can not set content'); end; function ThtmlElementAttributes.CreateItem(const ID: Integer; const Name: String): AhtmlObject; begin Result := ThtmlElementAttribute.Create(ThtmlAttrID(ID), Name); end; { function ThtmlElementAttributes.GetHTMLTextUTF8: RawByteString; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + ' ' + I.GetHTMLTextUTF8; I := I.FNext; end; end; } function ThtmlElementAttributes.GetHTMLText: String; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + ' ' + I.GetHTMLText; I := I.FNext; end; end; function ThtmlElementAttributes.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var I: AhtmlObject; begin Result := ''; I := FFirst; while Assigned(I) do begin Result := Result + Codec.EncodeStr(' ') + I.GetEncodedHTMLText(Codec); I := I.FNext; end; end; function ThtmlElementAttributes.HasAttribute(const Name: String): Boolean; begin Result := Assigned(Item[Name]); end; function ThtmlElementAttributes.GetAttributeText(const Name: String): String; begin Result := ItemText[Name]; end; procedure ThtmlElementAttributes.SetAttributeText(const Name: String; const Value: String); begin ItemText[Name] := Value; end; function ThtmlElementAttributes.GetAttributeFlag(const Name: String): Boolean; begin Result := HasAttribute(Name); end; procedure ThtmlElementAttributes.SetAttributeFlag(const Name: String; const Value: Boolean); begin if GetAttributeFlag(Name) = Value then exit; if Value then AddItem(CreateItem(-1, Name)) else DeleteAttribute(Name); end; procedure ThtmlElementAttributes.DeleteAttribute(const Name: String); var I: AhtmlObject; begin I := FindNextName(nil, Name); if Assigned(I) then DeleteChild(I); end; { } { AhtmlElement } { } constructor AhtmlElement.Create(const TagID: ThtmlTagID; const Name: String; const TagType: ThtmlElementTagType); begin inherited Create; FName := Name; FTagID := TagID; FTagType := TagType; end; destructor AhtmlElement.Destroy; begin FreeAndNil(FAttributes); FreeAndNil(FStyleAttr); inherited Destroy; end; function AhtmlElement.GetName: String; begin Result := FName; if (Result = '') and (FTagID <> HTML_TAG_None) then begin FName := htmlGetTagName(FTagID); Result := FName; end; end; procedure AhtmlElement.AssignElementInfo(const Source: AhtmlElement); begin FName := Source.FName; FTagType := Source.FTagType; FTagID := Source.FTagID; if Assigned(Source.FAttributes) then FAttributes := ThtmlElementAttributes(Source.FAttributes.DuplicateObject); end; function AhtmlElement.DuplicateObject: AhtmlObject; begin Result := inherited DuplicateObject; AhtmlElement(Result).AssignElementInfo(self); end; function AhtmlElement.DuplicateElement: AhtmlElement; begin Result := AhtmlElement(ClassType.Create); Result.AssignElementInfo(self); end; function AhtmlElement.GetAttributes: ThtmlElementAttributes; begin if not Assigned(FAttributes) then FAttributes := ThtmlElementAttributes.Create; Result := FAttributes; end; procedure AhtmlElement.SetContentText(const ContentText: String); begin if FTagType <> ttContentTags then raise EhtmlContainerObject.Create('Tag(' + Name + ') cannot have content'); inherited; end; { function AhtmlElement.GetHTMLTextUTF8: RawByteString; begin Result := '<' + NameUTF8; if Assigned(FAttributes) then Result := Result + FAttributes.GetHTMLTextUTF8; case FTagType of ttEmptyTag : Result := Result + '/>'; ttStartTag : Result := Result + '>'; else Result := Result + '>' + inherited GetHTMLTextUTF8 + ''; end; end; } function AhtmlElement.GetHTMLText: String; var N: String; begin N := Name; Result := '<' + N; if Assigned(FAttributes) then Result := Result + FAttributes.GetHTMLText; case FTagType of ttEmptyTag : Result := Result + '/>'; ttStartTag : Result := Result + '>'; else Result := Result + '>' + inherited GetHTMLText + ''; end; end; function AhtmlElement.GetEncodedHTMLText(const Codec: TCustomUnicodeCodec): RawByteString; var N, S: RawByteString; begin N := Codec.EncodeStr(UTF8StringToString(NameUTF8)); Result := Codec.EncodeStr('<') + N; if Assigned(FAttributes) then Result := Result + FAttributes.GetEncodedHTMLText(Codec); case FTagType of ttEmptyTag : Result := Result + Codec.EncodeStr('/>'); ttStartTag : Result := Result + Codec.EncodeStr('>'); else begin S := Codec.EncodeStr('>'); Result := Result + S + inherited GetEncodedHTMLText(Codec) + Codec.EncodeStr(' HTML_TAG_None, 'TagID <> HTML_TAG_None'); inherited Create(TagID, '', TagType); end; end.