source upload

This commit is contained in:
Razor12911
2022-01-17 22:16:47 +02:00
parent 12936d065b
commit 098e8c48de
1778 changed files with 1206749 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
{$INCLUDE ..\flcInclude.inc}
{$IFDEF DEBUG}
{$DEFINE HTML_DEBUG}
{$IFDEF TEST}
{$DEFINE HTML_TEST}
{$ENDIF}
{$ENDIF}

View File

@@ -0,0 +1,317 @@
{******************************************************************************}
{ }
{ Library: Fundamentals 5.00 - HTML Parser }
{ File name: flcHTMLCharEntity.pas }
{ File version: 5.04 }
{ Description: HTML named character entities }
{ }
{ Copyright: Copyright (c) 2000-2020, David J Butler }
{ All rights reserved. }
{ Redistribution and use in source and binary forms, with }
{ or without modification, are permitted provided that }
{ the following conditions are met: }
{ Redistributions of source code must retain the above }
{ copyright notice, this list of conditions and the }
{ following disclaimer. }
{ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
{ CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
{ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
{ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
{ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
{ THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
{ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
{ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
{ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
{ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
{ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
{ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
{ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
{ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
{ POSSIBILITY OF SUCH DAMAGE. }
{ }
{ Github: https://github.com/fundamentalslib }
{ E-mail: fundamentals.library at gmail.com }
{ }
{ Revision history: }
{ }
{ 2002/11/03 1.00 Part of cHTMLUtils. }
{ 2002/12/08 1.01 Part of cHTMLConsts. }
{ 2015/04/04 1.02 RawByteString changes. }
{ 2015/04/11 1.03 UnicodeString changes. }
{ 2019/02/22 5.04 Revised for Fundamentals 5. }
{ }
{******************************************************************************}
{$INCLUDE flcHTML.inc}
unit flcHTMLCharEntity;
interface
{ }
{ HTML Named Character Entities }
{ }
function htmlDecodeCharEntity(const Entity: String): Word;
{ }
{ htmlCharRef }
{ }
function htmlCharRef(const CharVal: LongWord; const UseHex: Boolean): String;
{ }
{ Tests }
{ }
{$IFDEF HTML_TEST}
procedure Test;
{$ENDIF}
implementation
uses
flcStdTypes,
flcUtils,
flcDynArrays;
{ }
{ HTML Named Character Entities }
{ }
type
THtmlEntity = record
Name : String;
Char : Word;
end;
const
// HTML 4 character entity references
HtmlEntities = 253;
HtmlEntity: array[0..HtmlEntities - 1] of ThtmlEntity = (
(* Additional *)
(Name:'apos'; Char:39),
(* HTMLspecial.ent *)
(Name:'quot'; Char:34), (Name:'amp'; Char:38),
(Name:'lt'; Char:60), (Name:'gt'; Char:62),
{ Latin Extended-A }
(Name:'OElig'; Char:338), (Name:'oelig'; Char:339),
(Name:'Scaron'; Char:352), (Name:'scaron'; Char:353),
(Name:'Yuml'; Char:376),
{ Spacing Modifier Letters }
(Name:'circ'; Char:710), (Name:'tilde'; Char:732),
{ General Punctuation }
(Name:'ensp'; Char:8194), (Name:'emsp'; Char:8195),
(Name:'thinsp'; Char:8201), (Name:'zwnj'; Char:8204),
(Name:'zwj'; Char:8205), (Name:'lrm'; Char:8206),
(Name:'rlm'; Char:8207), (Name:'ndash'; Char:8211),
(Name:'mdash'; Char:8212), (Name:'lsquo'; Char:8216),
(Name:'rsquo'; Char:8217), (Name:'sbquo'; Char:8218),
(Name:'ldquo'; Char:8220), (Name:'rdquo'; Char:8221),
(Name:'bdquo'; Char:8222), (Name:'dagger'; Char:8224),
(Name:'Dagger'; Char:8225), (Name:'permil'; Char:8240),
(Name:'lsaquo'; Char:8249), (Name:'rsaquo'; Char:8250),
(Name:'euro'; Char:8364),
(* HTMLsymbol.ent *)
{ Latin Extended-B }
(Name:'fnof'; Char:402),
{ Greek }
(Name:'Alpha'; Char:913), (Name:'Beta'; Char:914),
(Name:'Gamma'; Char:915), (Name:'Delta'; Char:916),
(Name:'Epsilon'; Char:917), (Name:'Zeta'; Char:918),
(Name:'Eta'; Char:919), (Name:'Theta'; Char:920),
(Name:'Iota'; Char:921), (Name:'Kappa'; Char:922),
(Name:'Lambda'; Char:923), (Name:'Mu'; Char:924),
(Name:'Nu'; Char:925), (Name:'Xi'; Char:926),
(Name:'Omicron'; Char:927), (Name:'Pi'; Char:928),
(Name:'Rho'; Char:929), (Name:'Sigma'; Char:931),
(Name:'Tau'; Char:932), (Name:'Upsilon'; Char:933),
(Name:'Phi'; Char:934), (Name:'Chi'; Char:935),
(Name:'Psi'; Char:936), (Name:'Omega'; Char:937),
(Name:'alpha'; Char:945), (Name:'beta'; Char:946),
(Name:'gamma'; Char:947), (Name:'delta'; Char:948),
(Name:'epsilon'; Char:949), (Name:'zeta'; Char:950),
(Name:'eta'; Char:951), (Name:'theta'; Char:952),
(Name:'iota'; Char:953), (Name:'kappa'; Char:954),
(Name:'lambda'; Char:955), (Name:'mu'; Char:956),
(Name:'nu'; Char:957), (Name:'xi'; Char:958),
(Name:'omicron'; Char:959), (Name:'pi'; Char:960),
(Name:'rho'; Char:961), (Name:'sigmaf'; Char:962),
(Name:'sigma'; Char:963), (Name:'tau'; Char:964),
(Name:'upsilon'; Char:965), (Name:'phi'; Char:966),
(Name:'chi'; Char:967), (Name:'psi'; Char:968),
(Name:'omega'; Char:969), (Name:'thetasym'; Char:977),
(Name:'upsih'; Char:978), (Name:'piv'; Char:982),
{ General Punctuation }
(Name:'bull'; Char:8226), (Name:'hellip'; Char:8230),
(Name:'prime'; Char:8242), (Name:'Prime'; Char:8243),
(Name:'oline'; Char:8254), (Name:'frasl'; Char:8260),
{ Letterlike Symbols }
(Name:'weierp'; Char:8472), (Name:'image'; Char:8465),
(Name:'real'; Char:8476), (Name:'trade'; Char:8482),
(Name:'alefsym'; Char:8501),
{ Arrows }
(Name:'larr'; Char:8592), (Name:'uarr'; Char:8593),
(Name:'rarr'; Char:8594), (Name:'darr'; Char:8595),
(Name:'harr'; Char:8596), (Name:'crarr'; Char:8629),
(Name:'lArr'; Char:8656), (Name:'uArr'; Char:8657),
(Name:'rArr'; Char:8658), (Name:'dArr'; Char:8659),
(Name:'hArr'; Char:8660),
{ Mathematical Operators }
(Name:'forall'; Char:8704), (Name:'part'; Char:8706),
(Name:'exist'; Char:8707), (Name:'empty'; Char:8709),
(Name:'nabla'; Char:8711), (Name:'isin'; Char:8712),
(Name:'notin'; Char:8713), (Name:'ni'; Char:8715),
(Name:'prod'; Char:8719), (Name:'sum'; Char:8721),
(Name:'minus'; Char:8722), (Name:'lowast'; Char:8727),
(Name:'radic'; Char:8730), (Name:'prop'; Char:8733),
(Name:'infin'; Char:8734), (Name:'ang'; Char:8736),
(Name:'and'; Char:8743), (Name:'or'; Char:8744),
(Name:'cap'; Char:8745), (Name:'cup'; Char:8746),
(Name:'int'; Char:8747), (Name:'there4'; Char:8756),
(Name:'sim'; Char:8764), (Name:'cong'; Char:8773),
(Name:'asymp'; Char:8776), (Name:'ne'; Char:8800),
(Name:'equiv'; Char:8801), (Name:'le'; Char:8804),
(Name:'ge'; Char:8805), (Name:'sub'; Char:8834),
(Name:'sup'; Char:8835), (Name:'nsub'; Char:8836),
(Name:'sube'; Char:8838), (Name:'supe'; Char:8839),
(Name:'oplus'; Char:8853), (Name:'otimes'; Char:8855),
(Name:'perp'; Char:8869), (Name:'sdot'; Char:8901),
{ Miscellaneous Technical }
(Name:'lceil'; Char:8968), (Name:'rceil'; Char:8969),
(Name:'lfloor'; Char:8970), (Name:'rfloor'; Char:8971),
(Name:'lang'; Char:9001), (Name:'rang'; Char:9002),
(Name:'loz'; Char:9674),
{ Miscellaneous Symbols }
(Name:'spades'; Char:9824), (Name:'clubs'; Char:9827),
(Name:'hearts'; Char:9829), (Name:'diams'; Char:9830),
(* HTMLlat1.ent *)
(Name:'nbsp'; Char:160), (Name:'iexcl'; Char:161),
(Name:'cent'; Char:162), (Name:'pound'; Char:163),
(Name:'curren'; Char:164), (Name:'yen'; Char:165),
(Name:'brvbar'; Char:166), (Name:'sect'; Char:167),
(Name:'uml'; Char:168), (Name:'copy'; Char:169),
(Name:'ordf'; Char:170), (Name:'laquo'; Char:171),
(Name:'not'; Char:172), (Name:'shy'; Char:173),
(Name:'reg'; Char:174), (Name:'macr'; Char:175),
(Name:'deg'; Char:176), (Name:'plusmn'; Char:177),
(Name:'sup2'; Char:178), (Name:'sup3'; Char:179),
(Name:'acute'; Char:180), (Name:'micro'; Char:181),
(Name:'para'; Char:182), (Name:'middot'; Char:183),
(Name:'cedil'; Char:184), (Name:'sup1'; Char:185),
(Name:'ordm'; Char:186), (Name:'raquo'; Char:187),
(Name:'frac14'; Char:188), (Name:'frac12'; Char:189),
(Name:'frac34'; Char:190), (Name:'iquest'; Char:191),
(Name:'Agrave'; Char:192), (Name:'Aacute'; Char:193),
(Name:'Acirc'; Char:194), (Name:'Atilde'; Char:195),
(Name:'Auml'; Char:196), (Name:'Aring'; Char:197),
(Name:'AElig'; Char:198), (Name:'Ccedil'; Char:199),
(Name:'Egrave'; Char:200), (Name:'Eacute'; Char:201),
(Name:'Ecirc'; Char:202), (Name:'Euml'; Char:203),
(Name:'Igrave'; Char:204), (Name:'Iacute'; Char:205),
(Name:'Icirc'; Char:206), (Name:'Iuml'; Char:207),
(Name:'ETH'; Char:208), (Name:'Ntilde'; Char:209),
(Name:'Ograve'; Char:210), (Name:'Oacute'; Char:211),
(Name:'Ocirc'; Char:212), (Name:'Otilde'; Char:213),
(Name:'Ouml'; Char:214), (Name:'times'; Char:215),
(Name:'Oslash'; Char:216), (Name:'Ugrave'; Char:217),
(Name:'Uacute'; Char:218), (Name:'Ucirc'; Char:219),
(Name:'Uuml'; Char:220), (Name:'Yacute'; Char:221),
(Name:'THORN'; Char:222), (Name:'szlig'; Char:223),
(Name:'agrave'; Char:224), (Name:'aacute'; Char:225),
(Name:'acirc'; Char:226), (Name:'atilde'; Char:227),
(Name:'auml'; Char:228), (Name:'aring'; Char:229),
(Name:'aelig'; Char:230), (Name:'ccedil'; Char:231),
(Name:'egrave'; Char:232), (Name:'eacute'; Char:233),
(Name:'ecirc'; Char:234), (Name:'euml'; Char:235),
(Name:'igrave'; Char:236), (Name:'iacute'; Char:237),
(Name:'icirc'; Char:238), (Name:'iuml'; Char:239),
(Name:'eth'; Char:240), (Name:'ntilde'; Char:241),
(Name:'ograve'; Char:242), (Name:'oacute'; Char:243),
(Name:'ocirc'; Char:244), (Name:'otilde'; Char:245),
(Name:'ouml'; Char:246), (Name:'divide'; Char:247),
(Name:'oslash'; Char:248), (Name:'ugrave'; Char:249),
(Name:'uacute'; Char:250), (Name:'ucirc'; Char:251),
(Name:'uuml'; Char:252), (Name:'yacute'; Char:253),
(Name:'thorn'; Char:254), (Name:'yuml'; Char:255)
);
const
HtmlEntityHashSize = HtmlEntities;
var
HtmlEntityHashIndex : array of LongIntArray;
HtmlEntityHashInit : Boolean = False;
procedure InitHTMLEntityHash;
var I: Integer;
begin
HtmlEntityHashIndex := nil;
SetLength(HtmlEntityHashIndex, HtmlEntityHashSize);
for I := 0 to HtmlEntities - 1 do
DynArrayAppend(HtmlEntityHashIndex[HashStr(HtmlEntity[I].Name, 1, -1, True, HtmlEntityHashSize)], I);
HtmlEntityHashInit := True;
end;
function htmlDecodeCharEntity(const Entity: String): Word;
var I, J, H: Integer;
begin
if not HtmlEntityHashInit then
InitHTMLEntityHash;
H := HashStr(Entity, 1, -1, True, HtmlEntityHashSize);
for I := 0 to Length(HtmlEntityHashIndex[H]) - 1 do
begin
J := HtmlEntityHashIndex[H][I];
if Entity = HtmlEntity[J].Name then // case-sensitive
begin
Result := HtmlEntity[J].Char;
exit;
end;
end;
Result := 0;
end;
{ }
{ htmlCharRef }
{ }
function htmlCharRef(const CharVal: LongWord; const UseHex: Boolean): String;
begin
if UseHex then
if CharVal <= $FF then
Result := '#x' + Word32toHex(CharVal, 2) + ';'
else
if CharVal <= $FFFF then
Result := '#x' + Word32toHex(CharVal, 4) + ';'
else
Result := '#x' + Word32toHex(CharVal, 6) + ';'
else
Result := '#' + Word32ToStr(CharVal) + ';';
end;
{ }
{ Tests }
{ }
{$IFDEF HTML_TEST}
{$ASSERTIONS ON}
procedure Test;
begin
Assert(htmlDecodeCharEntity('quot') = 34, 'htmlDecodeCharEntity');
Assert(htmlDecodeCharEntity('QUOT') = 0, 'htmlDecodeCharEntity');
Assert(htmlDecodeCharEntity('pi') = 960, 'htmlDecodeCharEntity');
Assert(htmlDecodeCharEntity('xyz') = 0, 'htmlDecodeCharEntity');
end;
{$ENDIF}
end.

View File

@@ -0,0 +1,220 @@
{******************************************************************************}
{ }
{ Library: Fundamentals 5.00 - HTML Parser }
{ File name: flcHTMLDoc.pas }
{ File version: 5.03 }
{ Description: HTML document }
{ }
{ Copyright: Copyright (c) 2000-2020, David J Butler }
{ All rights reserved. }
{ Redistribution and use in source and binary forms, with }
{ or without modification, are permitted provided that }
{ the following conditions are met: }
{ Redistributions of source code must retain the above }
{ copyright notice, this list of conditions and the }
{ following disclaimer. }
{ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
{ CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
{ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
{ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
{ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
{ THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
{ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
{ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
{ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
{ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
{ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
{ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
{ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
{ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
{ POSSIBILITY OF SUCH DAMAGE. }
{ }
{ Github: https://github.com/fundamentalslib }
{ E-mail: fundamentals.library at gmail.com }
{ }
{ Revision history: }
{ }
{ 2002/10/21 1.00 Initial version in cHTMLObjects }
{ 2002/12/08 1.01 Created unit cHTMLDoc. }
{ 2015/04/11 1.02 UnicodeString changes. }
{ 2019/02/22 5.03 Revise for Fundamentals 5. }
{ }
{******************************************************************************}
{$INCLUDE flcHTML.inc}
unit flcHTMLDoc;
interface
uses
{ Fundamentals }
flcStdTypes,
{ HTML }
flcHTMLStyleSheet,
flcHTMLDocBase,
flcHTMLDocElements;
{ }
{ ThtmlDocument }
{ }
type
ThtmlDocumentState = (
htdocInit,
htdocStructurePrepared,
htdocPreparingStyle,
htdocStylePrepared,
htdocRefactoredForLayout);
ThtmlDocument = class(AhtmlContainerObject)
protected
FStyleSheet : ThtmlCSS;
FState : ThtmlDocumentState;
function GetHTML: ThtmlHTML;
public
constructor Create; override;
destructor Destroy; override;
{ Document objects }
function CreateItem(const ID: Integer; const Name: String): AhtmlObject; override;
property HTML: ThtmlHTML read GetHTML;
property StyleSheet: ThtmlCSS read FStyleSheet;
{ Document state }
property State: ThtmlDocumentState read FState;
{ Initial preparation of the document structure }
procedure PrepareStructure;
{ Parse and apply style sheet information }
procedure InitDocStyle(const ReaderStyle: String;
var ExternalStyles: StringArray);
procedure SetExternalStyleState(const Source: String;
const State: ThtmlcssRuleSetState;
const StyleText: String);
procedure SetContentStyle;
{ Layout preparation }
function ReadyForLayout: Boolean;
procedure RefactorForLayout;
end;
implementation
uses
{ System }
SysUtils,
{ Fundamentals }
flcDynArrays,
{ HTML }
flcHTMLElements,
flcHTMLStyleProperties;
{ }
{ ThtmlDocument }
{ }
constructor ThtmlDocument.Create;
begin
inherited Create;
FStyleSheet := ThtmlCSS.Create;
FState := htdocInit;
end;
destructor ThtmlDocument.Destroy;
begin
FreeAndNil(FStyleSheet);
inherited Destroy;
end;
function ThtmlDocument.CreateItem(const ID: Integer; const Name: String): AhtmlObject;
begin
if ThtmlTagID(ID) = HTML_TAG_HTML then
Result := ThtmlHTML.Create
else
Result := inherited CreateItem(ID, Name);
end;
function ThtmlDocument.GetHTML: ThtmlHTML;
begin
Result := ThtmlHTML(RequireItemByClass(ThtmlHTML));
end;
procedure ThtmlDocument.PrepareStructure;
begin
Assert(FState in [htdocInit, htdocStructurePrepared],
'FState in [htdocInit, htdocStructurePrepared]');
HTML.PrepareStructure;
FState := htdocStructurePrepared;
end;
procedure ThtmlDocument.InitDocStyle(
const ReaderStyle: String;
var ExternalStyles: StringArray);
var
Refs1, Refs2: StringArray;
begin
Assert(FState in [htdocStructurePrepared, htdocPreparingStyle],
'FState in [htdocStructurePrepared, htdocPreparingStyle]');
FState := htdocPreparingStyle;
// init main style
Refs1 := HTML.Head.StyleRefs;
Refs2 := HTML.Body.StyleRefs;
DynArrayAppendStringArray(Refs1, Refs2);
FStyleSheet.InitStyle(ReaderStyle, HTML.Head.StyleText, Refs1);
// get external style sources
ExternalStyles := FStyleSheet.GetRequiredImports;
end;
procedure ThtmlDocument.SetExternalStyleState(const Source: String;
const State: ThtmlcssRuleSetState; const StyleText: String);
begin
Assert(FState = htdocPreparingStyle, 'FState = htdocPreparingStyle');
FStyleSheet.SetImportedStyleState(Source, State, StyleText);
end;
procedure ThtmlDocument.SetContentStyle;
var StyleInfo : ThtmlcssStyleProperties;
begin
Assert(FState in [htdocStructurePrepared, htdocPreparingStyle, htdocStylePrepared, htdocRefactoredForLayout],
'FState in [htdocStructurePrepared, htdocPreparingStyle, htdocStylePrepared, htdocRefactoredForLayout]');
// init information required by style sheet selectors
InitStyleElementInfo(FStyleSheet, nil);
// set default style information for document
InitDefaultStyleProperties(StyleInfo);
// init actual style information for child objects
InitStyleInfo(FStyleSheet, StyleInfo);
// update document state
FState := htdocStylePrepared;
end;
function ThtmlDocument.ReadyForLayout: Boolean;
begin
Result := FState in [htdocStructurePrepared, htdocPreparingStyle,
htdocStylePrepared, htdocRefactoredForLayout];
end;
procedure ThtmlDocument.RefactorForLayout;
begin
Assert(FState in [htdocStructurePrepared, htdocPreparingStyle, htdocStylePrepared],
'FState in [htdocStructurePrepared, htdocPreparingStyle, htdocStylePrepared]');
// refactor children
Refactor([reopRefactorForLayout]);
// update document state
FState := htdocRefactoredForLayout;
end;
end.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,192 @@
{******************************************************************************}
{ }
{ Library: Fundamentals 5.00 - HTML Parser }
{ File name: flcHTMLReader.pas }
{ File version: 5.02 }
{ Description: HTML reader utilities }
{ }
{ Copyright: Copyright (c) 2000-2020, David J Butler }
{ All rights reserved. }
{ Redistribution and use in source and binary forms, with }
{ or without modification, are permitted provided that }
{ the following conditions are met: }
{ Redistributions of source code must retain the above }
{ copyright notice, this list of conditions and the }
{ following disclaimer. }
{ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
{ CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
{ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
{ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
{ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
{ THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
{ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
{ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
{ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
{ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
{ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
{ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
{ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
{ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
{ POSSIBILITY OF SUCH DAMAGE. }
{ }
{ Github: https://github.com/fundamentalslib }
{ E-mail: fundamentals.library at gmail.com }
{ }
{ Revision history: }
{ }
{ 2001/04/13 1.01 Part of cHTML unit. }
{ 2019/02/21 5.02 Part flcHTMLReader unit. }
{ }
{******************************************************************************}
{$INCLUDE flcHTML.inc}
unit flcHTMLReader;
interface
uses
flcStreams,
flcUnicodeCodecs,
flcUnicodeReader;
{ Encoding detection functions }
function htmlGetUnicodeCodec(const Encoding: RawByteString): TUnicodeCodecClass;
function htmlDetectEncoding(const DocumentTop: RawByteString): RawByteString;
function htmlDetectDocumentCodec(const DocumentTop: RawByteString): TUnicodeCodecClass;
function htmlGetDocumentCodec(const Encoding, DocumentTop: RawByteString): TUnicodeCodecClass;
{ Unicode Document Reader constructors }
function htmlGetDocumentReader(
const Reader: AReaderEx; const ReaderOwner: Boolean = True;
const Encoding: RawByteString = ''): TUnicodeReader;
function htmlGetDocumentReaderForRawString(
const Document: RawByteString;
const Encoding: RawByteString = ''): TUnicodeReader;
function htmlGetDocumentReaderForFile(
const FileName: String;
const Encoding: RawByteString = ''): TUnicodeReader;
implementation
uses
flcUTF,
flcStrings;
{ Encoding detection functions }
function htmlGetUnicodeCodec(const Encoding: RawByteString): TUnicodeCodecClass;
begin
if Encoding <> '' then
begin
Result := GetCodecClassByAliasA(Encoding);
end
else
Result := nil;
end;
function htmlDetectEncoding(const DocumentTop: RawByteString): RawByteString;
var P: PAnsiChar;
L: Integer;
R: Boolean;
begin
L := Length(DocumentTop);
if L = 0 then
begin
Result := '';
exit;
end;
P := Pointer(DocumentTop);
// check if document is UTF-16 Unicode encoding
if DetectUTF16BOM(P, L, R) then
begin
if not R then
Result := 'utf16'
else
Result := 'utf16le';
exit;
end;
// check document html meta tag
Result := StrBetweenB(DocumentTop, 'text/html; charset=', [#0..#32, '"', '''', '>', ';'],
False, True, False);
if Result <> '' then
exit;
// find any charset indicator
Result := StrTrimB(StrBetweenB(DocumentTop, 'charset=', ['<', '>', ';', ']'],
False, True, False), [#0..#32, '"', '''']);
if Result <> '' then
exit;
end;
function htmlDetectDocumentCodec(const DocumentTop: RawByteString): TUnicodeCodecClass;
begin
Result := htmlGetUnicodeCodec(htmlDetectEncoding(DocumentTop));
end;
function htmlGetDocumentCodec(const Encoding, DocumentTop: RawByteString): TUnicodeCodecClass;
begin
// Check specified encoding
Result := htmlGetUnicodeCodec(Encoding);
if Assigned(Result) then
exit;
// Detect encoding
Result := htmlDetectDocumentCodec(DocumentTop);
if Assigned(Result) then
exit;
// Use default for HTML: ISO-8859-1 (Latin1)
Result := TISO8859_1Codec;
end;
{ Unicode Document Reader constructors }
function htmlGetDocumentReader(
const Reader: AReaderEx; const ReaderOwner: Boolean;
const Encoding: RawByteString): TUnicodeReader;
const
DocumentSampleSize = 4096;
var
C : TUnicodeCodecClass;
P : Integer;
T : RawByteString;
begin
C := htmlGetUnicodeCodec(Encoding);
if not Assigned(C) then
begin
// detect from document top
P := Reader.Position;
T := Reader.ReadStrB(DocumentSampleSize);
Reader.Position := P;
C := htmlDetectDocumentCodec(T);
end;
if not Assigned(C) then
C := TISO8859_1Codec; // default codec
Result := TUnicodeReader.Create(Reader, ReaderOwner, C.Create, True);
end;
function htmlGetDocumentReaderForRawString(const Document: RawByteString;
const Encoding: RawByteString): TUnicodeReader;
begin
Result := TUnicodeMemoryReader.Create(
Pointer(Document), Length(Document),
htmlGetDocumentCodec(Encoding, Document).Create, True);
end;
function htmlGetDocumentReaderForFile(const FileName: String;
const Encoding: RawByteString): TUnicodeReader;
begin
Result := htmlGetDocumentReader(
TFileReader.Create(FileName), True, Encoding);
end;
end.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,278 @@
{******************************************************************************}
{ }
{ Library: Fundamentals 5.00 - HTML Parser }
{ File name: flcHTMLStyleTypes.pas }
{ File version: 5.06 }
{ Description: HTML style types }
{ }
{ Copyright: Copyright (c) 2000-2020, David J Butler }
{ All rights reserved. }
{ Redistribution and use in source and binary forms, with }
{ or without modification, are permitted provided that }
{ the following conditions are met: }
{ Redistributions of source code must retain the above }
{ copyright notice, this list of conditions and the }
{ following disclaimer. }
{ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
{ CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
{ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
{ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
{ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
{ THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
{ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
{ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
{ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
{ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
{ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
{ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
{ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
{ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
{ POSSIBILITY OF SUCH DAMAGE. }
{ }
{ Github: https://github.com/fundamentalslib }
{ E-mail: fundamentals.library at gmail.com }
{ }
{ Revision history: }
{ }
{ 2002/11/03 1.00 Part of cHTMLUtils. }
{ 2002/11/04 1.01 ThtmlcssPropertyID. }
{ 2002/12/08 1.02 Part of cHTMLConsts. }
{ 2012/12/16 1.03 HTML 5.1 tags. }
{ 2015/04/04 1.04 RawByteString changes. }
{ 2015/04/11 1.05 UnicodeString changes. }
{ 2019/02/22 5.06 Revised for Fundamentals 5. }
{ }
{******************************************************************************}
{$INCLUDE flcHTML.inc}
unit flcHTMLStyleTypes;
interface
{ }
{ HTML CSS Properties }
{ }
type
ThtmlcssPropertyID = (
HTML_CSS_PROP_None,
{ Font properties }
HTML_CSS_PROP_font_family,
HTML_CSS_PROP_font_style,
HTML_CSS_PROP_font_variant,
HTML_CSS_PROP_font_weight,
HTML_CSS_PROP_font_size,
HTML_CSS_PROP_font,
{ Color and background properties }
HTML_CSS_PROP_color,
HTML_CSS_PROP_background_color,
HTML_CSS_PROP_background_image,
HTML_CSS_PROP_background_repeat,
HTML_CSS_PROP_background_attachment,
HTML_CSS_PROP_background_position,
HTML_CSS_PROP_background,
{ Text properties }
HTML_CSS_PROP_word_spacing,
HTML_CSS_PROP_letter_spacing,
HTML_CSS_PROP_text_decoration,
HTML_CSS_PROP_vertical_align,
HTML_CSS_PROP_text_transform,
HTML_CSS_PROP_text_align,
HTML_CSS_PROP_text_indent,
HTML_CSS_PROP_line_height,
{ Box properties }
HTML_CSS_PROP_margin_top,
HTML_CSS_PROP_margin_right,
HTML_CSS_PROP_margin_bottom,
HTML_CSS_PROP_margin_left,
HTML_CSS_PROP_margin,
HTML_CSS_PROP_padding_top,
HTML_CSS_PROP_padding_right,
HTML_CSS_PROP_padding_bottom,
HTML_CSS_PROP_padding_left,
HTML_CSS_PROP_padding,
HTML_CSS_PROP_border_top_width,
HTML_CSS_PROP_border_right_width,
HTML_CSS_PROP_border_bottom_width,
HTML_CSS_PROP_border_left_width,
HTML_CSS_PROP_border_width,
HTML_CSS_PROP_border_color,
HTML_CSS_PROP_border_style,
HTML_CSS_PROP_border_top,
HTML_CSS_PROP_border_right,
HTML_CSS_PROP_border_bottom,
HTML_CSS_PROP_border_left,
HTML_CSS_PROP_border,
HTML_CSS_PROP_width,
HTML_CSS_PROP_height,
HTML_CSS_PROP_float,
HTML_CSS_PROP_clear,
{ Classification properties }
HTML_CSS_PROP_display,
HTML_CSS_PROP_white_space,
HTML_CSS_PROP_list_style_type,
HTML_CSS_PROP_list_style_image,
HTML_CSS_PROP_list_style_position,
HTML_CSS_PROP_list_style
);
const
HTML_CSS_PROP_FirstID = HTML_CSS_PROP_font_family;
HTML_CSS_PROP_LastID = HTML_CSS_PROP_list_style;
function htmlcssGetPropertyIDPtrW(const Name: PWideChar; const NameLen: Integer): ThtmlcssPropertyID;
function htmlcssGetPropertyName(const PropID: ThtmlcssPropertyID): String;
{ }
{ HTML CSS Pseudo Properties }
{ }
type
ThtmlcssPseudoPropertyID = (
HTML_CSS_PP_None,
{ Anchor pseudo classes }
HTML_CSS_PP_Link,
HTML_CSS_PP_Visited,
HTML_CSS_PP_Hover,
HTML_CSS_PP_Active,
HTML_CSS_PP_Focus,
{ Typographical pseudo-elements }
HTML_CSS_PP_First_Line,
HTML_CSS_PP_First_Letter
);
ThtmlcssPseudoPropertyIDSet = Set of ThtmlcssPseudoPropertyID;
ThtmlcssAnchorPseudoPropertyState = (
anchorLink,
anchorLink_Hover,
anchorLink_Active,
anchorLink_Focus,
anchorVisited,
anchorVisited_Hover,
anchorVisited_Active,
anchorVisited_Focus
);
const
HTML_CSS_PP_FirstID = HTML_CSS_PP_Link;
HTML_CSS_PP_LastID = HTML_CSS_PP_First_Letter;
HTML_CSS_PP_Properties_Anchor : ThtmlcssPseudoPropertyIDSet =
[HTML_CSS_PP_Link..HTML_CSS_PP_Focus];
HTML_CSS_PP_PseudoClasses : ThtmlcssPseudoPropertyIDSet =
[HTML_CSS_PP_Link, HTML_CSS_PP_Visited, HTML_CSS_PP_Hover,
HTML_CSS_PP_Active, HTML_CSS_PP_Focus];
HTML_CSS_PP_PseudoElements : ThtmlcssPseudoPropertyIDSet =
[HTML_CSS_PP_First_Line, HTML_CSS_PP_First_Letter];
HTML_CSS_PP_Properties_AnchorState : Array[ThtmlcssAnchorPseudoPropertyState]
of ThtmlcssPseudoPropertyIDSet = (
[HTML_CSS_PP_Link],
[HTML_CSS_PP_Link, HTML_CSS_PP_Hover],
[HTML_CSS_PP_Link, HTML_CSS_PP_Active],
[HTML_CSS_PP_Link, HTML_CSS_PP_Focus],
[HTML_CSS_PP_Visited],
[HTML_CSS_PP_Visited, HTML_CSS_PP_Hover],
[HTML_CSS_PP_Visited, HTML_CSS_PP_Active],
[HTML_CSS_PP_Visited, HTML_CSS_PP_Focus]);
function htmlcssGetPseudoPropIDPtrW(const Name: PWideChar;
const NameLen: Integer): ThtmlcssPseudoPropertyID;
implementation
uses
flcStrings;
{ }
{ HTML CSS Properties }
{ }
const
htmlcssPropertyTable: array[ThtmlcssPropertyID] of String = ('',
'font-family', 'font-style',
'font-variant', 'font-weight',
'font-size', 'font',
'color', 'background-color',
'background-image', 'background-repeat',
'background-attachment', 'background-position',
'background', 'word-spacing',
'letter-spacing', 'text-decoration',
'vertical-align', 'text-transform',
'text-align', 'text-indent',
'line-height', 'margin-top',
'margin-right', 'margin-bottom',
'margin-left', 'margin',
'padding-top', 'padding-right',
'padding-bottom', 'padding-left',
'padding', 'border-top-width',
'border-right-width', 'border-bottom-width',
'border-left-width', 'border-width',
'border-color', 'border-style',
'border-top', 'border-right',
'border-bottom', 'border-left',
'border', 'width',
'height', 'float',
'clear', 'display',
'white-space', 'list-style-type',
'list-style-image', 'list-style-position',
'list-style');
function htmlcssGetPropertyIDPtrW(const Name: PWideChar; const NameLen: Integer): ThtmlcssPropertyID;
var I: ThtmlcssPropertyID;
begin
if NameLen > 0 then
for I := HTML_CSS_PROP_FirstID to HTML_CSS_PROP_LastID do
if (NameLen = Length(htmlcssPropertyTable[I])) and
StrPMatchNoAsciiCase(Name, Pointer(htmlcssPropertyTable[I]), NameLen) then
begin
Result := I;
exit;
end;
Result := HTML_CSS_PROP_None;
end;
var
PropNameRef: Array[ThtmlcssPropertyID] of String;
function htmlcssGetPropertyName(const PropID: ThtmlcssPropertyID): String;
begin
if (PropID < HTML_CSS_PROP_FirstID) or (PropID > HTML_CSS_PROP_LastID) then
Result := '' else
begin
Result := PropNameRef[PropID]; // reference
if Result <> '' then
exit;
Result := htmlcssPropertyTable[PropID]; // copy
PropNameRef[PropID] := Result; // reference
end;
end;
const
htmlcssPseudoClassTable: Array[ThtmlcssPseudoPropertyID] of String = ('',
'Link', 'Visited', 'Hover', 'Active', 'Focus',
'First-Line', 'First-Letter');
function htmlcssGetPseudoPropIDPtrW(const Name: PWideChar; const NameLen: Integer): ThtmlcssPseudoPropertyID;
var I: ThtmlcssPseudoPropertyID;
begin
if NameLen > 0 then
for I := HTML_CSS_PP_FirstID to HTML_CSS_PP_LastID do
if (Length(htmlcssPseudoClassTable[I]) = NameLen) and
(StrPMatchNoAsciiCase(Name, Pointer(htmlcssPseudoClassTable[I]), NameLen)) then
begin
Result := I;
exit;
end;
Result := HTML_CSS_PP_None;
end;
end.

View File

@@ -0,0 +1,28 @@
unit flcHTMLTest;
interface
procedure Test;
implementation
uses
flcHTMLCharEntity,
flcHTMLProperties,
flcHTMLElements,
flcHTMLLexer,
flcHTMLParser;
procedure Test;
begin
flcHTMLCharEntity.Test;
flcHTMLProperties.Test;
flcHTMLElements.Test;
flcHTMLLexer.Test;
flcHTMLParser.Test;
end;
end.