95a2f54683
git-svn-id: https://svn.code.sf.net/p/apophysis7x/svn/trunk@1 a5d1c0f9-a0e9-45c6-87dd-9d276e40c949
634 lines
15 KiB
ObjectPascal
634 lines
15 KiB
ObjectPascal
{ unit sdStringTable
|
|
|
|
Author: Nils Haeck M.Sc. (n.haeck@simdesign.nl)
|
|
Original Date: 28 May 2007
|
|
Version: 1.1
|
|
Copyright (c) 2007 - 2010 Simdesign BV
|
|
|
|
It is NOT allowed under ANY circumstances to publish or copy this code
|
|
without accepting the license conditions in accompanying LICENSE.txt
|
|
first!
|
|
|
|
This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF
|
|
ANY KIND, either express or implied.
|
|
|
|
Please visit http://www.simdesign.nl/xml.html for more information.
|
|
}
|
|
unit sdStringTable;
|
|
|
|
interface
|
|
|
|
{$i NativeXml.inc}
|
|
|
|
uses
|
|
Classes, SysUtils, Contnrs;
|
|
|
|
type
|
|
|
|
// A record describing a string by its first position and length (Count)
|
|
TsdStringRec = record
|
|
First: Pbyte;
|
|
Count: integer;
|
|
end;
|
|
|
|
// A string reference item used in string reference lists (do not use directly)
|
|
TsdRefString = class
|
|
private
|
|
FID: integer;
|
|
FFrequency: integer;
|
|
FFirst: Pbyte;
|
|
FCharCount: integer;
|
|
protected
|
|
procedure SetString(const SR: TsdStringRec);
|
|
function CompareToSR(const SR: TsdStringRec): integer;
|
|
function StringRec: TsdStringRec;
|
|
public
|
|
destructor Destroy; override;
|
|
function AsString: UTF8String;
|
|
property CharCount: integer read FCharCount;
|
|
property Frequency: integer read FFrequency;
|
|
end;
|
|
|
|
// A list of string reference items (do not use directly)
|
|
TsdRefStringList = class(TObjectList)
|
|
private
|
|
function GetItems(Index: integer): TsdRefString;
|
|
protected
|
|
// Assumes list is sorted by StringID
|
|
function IndexOfID(AID: integer; var Index: integer): boolean;
|
|
// Assumes list is sorted by string rec
|
|
function IndexOfSR(const AStringRec: TsdStringRec; var Index: integer): boolean;
|
|
public
|
|
property Items[Index: integer]: TsdRefString read GetItems; default;
|
|
end;
|
|
|
|
// A string table, holding a collection of unique strings, sorted in 2 ways
|
|
// for fast access. Strings can be added with AddString or AddStringRec,
|
|
// and should be updated with SetString. When a string is added or updated,
|
|
// an ID is returned which the application can use to retrieve the string,
|
|
// using GetString.
|
|
TsdStringTable = class(TPersistent)
|
|
private
|
|
FByID: TsdRefStringList;
|
|
FBySR: TsdRefStringList;
|
|
protected
|
|
procedure DecFrequency(AItem: TsdRefString; ByIdIndex: integer);
|
|
function NextUniqueID: integer;
|
|
public
|
|
constructor Create;
|
|
destructor Destroy; override;
|
|
procedure Clear;
|
|
// Add a new string rec, return fresh ID or ID of existing item, and increase
|
|
// the existing item's ref count
|
|
function AddStringRec(const SR: TsdStringRec): integer;
|
|
// Add a new string S to the table, the function returns its ID.
|
|
function AddString(const S: UTF8String): integer;
|
|
// Get the refstring by ID
|
|
function ById(index: integer): TsdRefString;
|
|
// Delete refstring by ID
|
|
procedure Delete(ByIdIndex: integer);
|
|
// determine if the stringrec exists
|
|
function ExistStringRec(const SR: TsdStringRec): boolean;
|
|
// Get the string of refstring with ID
|
|
function GetString(ID: integer): UTF8String;
|
|
// Set the string value of refstring with ID.
|
|
procedure SetString(var ID: integer; const S: UTF8String);
|
|
// Number of refstrings
|
|
function StringCount: integer;
|
|
procedure SaveToFile(const AFileName: string);
|
|
procedure SaveToStream(S: TStream);
|
|
end;
|
|
|
|
{utility functions}
|
|
|
|
// convert a string into a string rec
|
|
function sdStringToSR(const S: Utf8String): TsdStringRec;
|
|
|
|
// convert a string rec into a string
|
|
function sdSRToString(const SR: TsdStringRec): Utf8String;
|
|
|
|
// compare two string recs. This is NOT an alphabetic compare. SRs are first
|
|
// compared by length, then by first byte, then last byte then second, then
|
|
// N-1, until all bytes are compared.
|
|
function sdCompareSR(const SR1, SR2: TsdStringRec): integer;
|
|
|
|
// compare 2 bytes
|
|
function sdCompareByte(Byte1, Byte2: byte): integer;
|
|
|
|
// compare 2 integers
|
|
function sdCompareInteger(Int1, Int2: integer): integer;
|
|
|
|
function sdUtf16ToUtf8Mem(Src: Pword; Dst: Pbyte; Count: integer): integer;
|
|
function sdUtf8ToUtf16Mem(var Src: Pbyte; Dst: Pword; Count: integer): integer;
|
|
procedure sdStreamWrite(S: TStream; const AString: AnsiString);
|
|
procedure sdStreamWriteStringRec(S: TStream; const AStringRec: TsdStringRec);
|
|
procedure sdStreamWriteRefString(S: TStream; ARefString: TsdRefString);
|
|
|
|
implementation
|
|
|
|
{ TsdRefString }
|
|
|
|
function TsdRefString.AsString: UTF8String;
|
|
begin
|
|
Result := sdSRToString(StringRec);
|
|
end;
|
|
|
|
function TsdRefString.CompareToSR(const SR: TsdStringRec): integer;
|
|
begin
|
|
if SR.Count = 0 then
|
|
begin
|
|
// shortcut
|
|
Result := 1;
|
|
exit;
|
|
end;
|
|
Result := sdCompareSR(StringRec, SR);
|
|
end;
|
|
|
|
destructor TsdRefString.Destroy;
|
|
begin
|
|
FreeMem(FFirst);
|
|
inherited;
|
|
end;
|
|
|
|
procedure TsdRefString.SetString(const SR: TsdStringRec);
|
|
begin
|
|
FCharCount := SR.Count;
|
|
ReallocMem(FFirst, FCharCount);
|
|
Move(SR.First^, FFirst^, FCharCount);
|
|
end;
|
|
|
|
function TsdRefString.StringRec: TsdStringRec;
|
|
begin
|
|
Result.First := FFirst;
|
|
Result.Count := FCharCount;
|
|
end;
|
|
|
|
{ TsdRefStringList }
|
|
|
|
function TsdRefStringList.GetItems(Index: integer): TsdRefString;
|
|
begin
|
|
Result := Get(Index);
|
|
end;
|
|
|
|
function TsdRefStringList.IndexOfID(AID: integer; var Index: integer): boolean;
|
|
var
|
|
Min, Max: integer;
|
|
begin
|
|
Result := False;
|
|
|
|
// Find position - binary method
|
|
Index := 0;
|
|
Min := 0;
|
|
Max := Count;
|
|
while Min < Max do
|
|
begin
|
|
Index := (Min + Max) div 2;
|
|
case sdCompareInteger(Items[Index].FID, AID) of
|
|
-1: Min := Index + 1;
|
|
0: begin
|
|
Result := True;
|
|
exit;
|
|
end;
|
|
1: Max := Index;
|
|
end;
|
|
end;
|
|
|
|
Index := Min;
|
|
end;
|
|
|
|
function TsdRefStringList.IndexOfSR(const AStringRec: TsdStringRec; var Index: integer): boolean;
|
|
var
|
|
Min, Max: integer;
|
|
SR: TsdStringRec;
|
|
begin
|
|
Result := False;
|
|
|
|
// Find position - binary method
|
|
Index := 0;
|
|
Min := 0;
|
|
Max := Count;
|
|
while Min < Max do
|
|
begin
|
|
Index := (Min + Max) div 2;
|
|
SR := TsdRefString(Get(Index)).StringRec;
|
|
case sdCompareSR(SR, AStringRec) of
|
|
-1: Min := Index + 1;
|
|
0: begin
|
|
Result := True;
|
|
exit;
|
|
end;
|
|
1: Max := Index;
|
|
end;
|
|
end;
|
|
|
|
Index := Min;
|
|
end;
|
|
|
|
{ TsdStringTable }
|
|
|
|
function TsdStringTable.AddString(const S: UTF8String): integer;
|
|
var
|
|
SR: TsdStringRec;
|
|
begin
|
|
SR := sdStringToSR(S);
|
|
Result := AddStringRec(SR);
|
|
end;
|
|
|
|
function TsdStringTable.AddStringRec(const SR: TsdStringRec): integer;
|
|
var
|
|
BySRIndex: integer;
|
|
Item: TsdRefString;
|
|
NewSR: TsdStringRec;
|
|
Res: boolean;
|
|
begin
|
|
// zero-length string
|
|
if SR.Count = 0 then
|
|
begin
|
|
Result := 0;
|
|
exit;
|
|
end;
|
|
|
|
// Try to find the new string
|
|
if FBySR.IndexOfSR(SR, BySRIndex) then
|
|
begin
|
|
Item := FBySR.Items[BySRIndex];
|
|
inc(Item.FFrequency);
|
|
Result := Item.FID;
|
|
exit;
|
|
end;
|
|
|
|
// Not found.. must make new item
|
|
Item := TsdRefString.Create;
|
|
Item.SetString(SR);
|
|
NewSR := Item.StringRec;
|
|
Item.FID := NextUniqueID;
|
|
FById.Add(Item);
|
|
Item.FFrequency := 1;
|
|
|
|
// debug:
|
|
//SetLength(Item.FValue, Item.FCount);
|
|
//Move(Item.FirstPtr(FBase)^, Item.FValue[1], Item.FCount);
|
|
|
|
// Insert in BySR lists
|
|
Res := FBySR.IndexOfSR(NewSR, BySRIndex);
|
|
assert(Res = False);
|
|
FBySR.Insert(BySRIndex, Item);
|
|
Result := Item.FID;
|
|
end;
|
|
|
|
function TsdStringTable.ById(index: integer): TsdRefString;
|
|
begin
|
|
Result := FById[Index];
|
|
end;
|
|
|
|
procedure TsdStringTable.Clear;
|
|
begin
|
|
FByID.Clear;
|
|
FBySR.Clear;
|
|
end;
|
|
|
|
constructor TsdStringTable.Create;
|
|
begin
|
|
inherited Create;
|
|
FByID := TsdRefStringList.Create(False);
|
|
FBySR := TsdRefStringList.Create(True);
|
|
end;
|
|
|
|
procedure TsdStringTable.DecFrequency(AItem: TsdRefString; ByIdIndex: integer);
|
|
var
|
|
BySRIndex: integer;
|
|
Res: boolean;
|
|
begin
|
|
dec(AItem.FFrequency);
|
|
assert(AItem.FFrequency >= 0);
|
|
|
|
if AItem.FFrequency = 0 then
|
|
begin
|
|
// We must remove it
|
|
FById.Delete(ByIdIndex);
|
|
Res := FBySR.IndexOfSR(AItem.StringRec, BySRIndex);
|
|
assert(Res = True);
|
|
FBySR.Delete(BySRIndex);
|
|
end;
|
|
end;
|
|
|
|
procedure TsdStringTable.Delete(ByIdIndex: integer);
|
|
var
|
|
Item: TsdRefString;
|
|
BySRIndex: integer;
|
|
Res: boolean;
|
|
begin
|
|
Item := FById[ByIdIndex];
|
|
if Item = nil then
|
|
exit;
|
|
FById.Delete(ByIdIndex);
|
|
Res := FBySR.IndexOfSR(Item.StringRec, BySRIndex);
|
|
assert(Res = True);
|
|
FBySR.Delete(BySRIndex);
|
|
end;
|
|
|
|
destructor TsdStringTable.Destroy;
|
|
begin
|
|
FreeAndNil(FByID);
|
|
FreeAndNil(FBySR);
|
|
inherited;
|
|
end;
|
|
|
|
function TsdStringTable.ExistStringRec(const SR: TsdStringRec): boolean;
|
|
var
|
|
BySRIndex: integer;
|
|
begin
|
|
// zero-length string
|
|
if SR.Count = 0 then
|
|
begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
|
|
// Try to find the new string
|
|
Result := FBySR.IndexOfSR(SR, BySRIndex);
|
|
|
|
end;
|
|
|
|
function TsdStringTable.GetString(ID: integer): UTF8String;
|
|
var
|
|
Index, Count: integer;
|
|
Item: TsdRefString;
|
|
begin
|
|
if ID = 0 then
|
|
begin
|
|
Result := '';
|
|
exit;
|
|
end;
|
|
|
|
// Find the ID
|
|
if FByID.IndexOfID(ID, Index) then
|
|
begin
|
|
Item := FById[Index];
|
|
Count := Item.FCharCount;
|
|
SetLength(Result, Count);
|
|
Move(Item.FFirst^, Result[1], Count);
|
|
exit;
|
|
end;
|
|
|
|
Result := '';
|
|
end;
|
|
|
|
function TsdStringTable.NextUniqueID: integer;
|
|
begin
|
|
if FById.Count = 0 then
|
|
Result := 1
|
|
else
|
|
Result := FByID[FByID.Count - 1].FID + 1;
|
|
end;
|
|
|
|
procedure TsdStringTable.SaveToFile(const AFileName: string);
|
|
var
|
|
F: TFileStream;
|
|
begin
|
|
F := TFileStream.Create(AFileName, fmCreate);
|
|
try
|
|
SaveToStream(F);
|
|
finally
|
|
F.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure TsdStringTable.SaveToStream(S: TStream);
|
|
var
|
|
i: integer;
|
|
R: UTF8String;
|
|
begin
|
|
for i := 0 to FBySR.Count - 1 do
|
|
begin
|
|
R := FBySR[i].AsString + #13#10;
|
|
S.Write(R[1], length(R));
|
|
end;
|
|
end;
|
|
|
|
procedure TsdStringTable.SetString(var ID: integer; const S: UTF8String);
|
|
var
|
|
ByIdIndex: integer;
|
|
Item: TsdRefString;
|
|
SR: TsdStringRec;
|
|
begin
|
|
// Make temp string record
|
|
SR := sdStringtoSR(S);
|
|
|
|
// Do we have a ref string with this ID?
|
|
if (ID > 0) and FByID.IndexOfID(ID, ByIdIndex) then
|
|
begin
|
|
// Is the string still the same?
|
|
Item := FById[ByIdIndex];
|
|
if Item.CompareToSR(SR) = 0 then
|
|
exit;
|
|
// The string changed..
|
|
DecFrequency(Item, ByIdIndex);
|
|
end;
|
|
|
|
ID := AddStringRec(SR);
|
|
end;
|
|
|
|
{utility functions}
|
|
|
|
function TsdStringTable.StringCount: integer;
|
|
begin
|
|
Result := FBySR.Count;
|
|
end;
|
|
|
|
function sdStringToSR(const S: UTF8String): TsdStringRec;
|
|
begin
|
|
Result.Count := length(S);
|
|
if Result.Count = 0 then
|
|
Result.First := nil
|
|
else
|
|
Result.First := @S[1];
|
|
end;
|
|
|
|
function sdSRToString(const SR: TsdStringRec): UTF8String;
|
|
begin
|
|
SetLength(Result, SR.Count);
|
|
if SR.Count > 0 then
|
|
Move(SR.First^, Result[1], SR.Count);
|
|
end;
|
|
|
|
function sdCompareByte(Byte1, Byte2: byte): integer;
|
|
begin
|
|
if Byte1 < Byte2 then
|
|
Result := -1
|
|
else
|
|
if Byte1 > Byte2 then
|
|
Result := 1
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
function sdCompareInteger(Int1, Int2: integer): integer;
|
|
begin
|
|
if Int1 < Int2 then
|
|
Result := -1
|
|
else
|
|
if Int1 > Int2 then
|
|
Result := 1
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
function sdCompareSR(const SR1, SR2: TsdStringRec): integer;
|
|
var
|
|
Count: integer;
|
|
First1, First2, Last1, Last2: Pbyte;
|
|
begin
|
|
// Compare string length first
|
|
Result := sdCompareInteger(SR1.Count, SR2.Count);
|
|
if Result <> 0 then
|
|
exit;
|
|
|
|
// Compare first
|
|
Result := sdCompareByte(SR1.First^, SR2.First^);
|
|
if Result <> 0 then
|
|
exit;
|
|
Count := SR1.Count;
|
|
|
|
// Setup First & Last pointers
|
|
First1 := SR1.First;
|
|
First2 := SR2.First;
|
|
Last1 := First1; inc(Last1, Count);
|
|
Last2 := First2; inc(Last2, Count);
|
|
|
|
// Compare each time last ptrs then first ptrs, until they meet in the middle
|
|
repeat
|
|
dec(Last1);
|
|
dec(Last2);
|
|
if First1 = Last1 then
|
|
exit;
|
|
Result := sdCompareByte(Last1^, Last2^);
|
|
if Result <> 0 then
|
|
exit;
|
|
inc(First1); inc(First2);
|
|
if First1 = Last1 then
|
|
exit;
|
|
Result := sdCompareByte(First1^, First2^);
|
|
if Result <> 0 then
|
|
exit;
|
|
until False;
|
|
end;
|
|
|
|
function sdUtf16ToUtf8Mem(Src: Pword; Dst: Pbyte; Count: integer): integer;
|
|
// Convert an Unicode (UTF16 LE) memory block to UTF8. This routine will process
|
|
// Count wide characters (2 bytes size) to Count UTF8 characters (1-3 bytes).
|
|
// Therefore, the block at Dst must be at least 1.5 the size of the source block.
|
|
// The function returns the number of *bytes* written.
|
|
var
|
|
W: word;
|
|
DStart: Pbyte;
|
|
begin
|
|
DStart := Dst;
|
|
while Count > 0 do
|
|
begin
|
|
W := Src^;
|
|
inc(Src);
|
|
if W <= $7F then
|
|
begin
|
|
Dst^ := byte(W);
|
|
inc(Dst);
|
|
end else
|
|
begin
|
|
if W > $7FF then
|
|
begin
|
|
Dst^ := byte($E0 or (W shr 12));
|
|
inc(Dst);
|
|
Dst^ := byte($80 or ((W shr 6) and $3F));
|
|
inc(Dst);
|
|
Dst^ := byte($80 or (W and $3F));
|
|
inc(Dst);
|
|
end else
|
|
begin // $7F < W <= $7FF
|
|
Dst^ := byte($C0 or (W shr 6));
|
|
inc(Dst);
|
|
Dst^ := byte($80 or (W and $3F));
|
|
inc(Dst);
|
|
end;
|
|
end;
|
|
dec(Count);
|
|
end;
|
|
Result := integer(Dst) - integer(DStart);
|
|
end;
|
|
|
|
function sdUtf8ToUtf16Mem(var Src: Pbyte; Dst: Pword; Count: integer): integer;
|
|
// Convert an UTF8 memory block to Unicode (UTF16 LE). This routine will process
|
|
// Count *bytes* of UTF8 (each character 1-3 bytes) into UTF16 (each char 2 bytes).
|
|
// Therefore, the block at Dst must be at least 2 times the size of Count, since
|
|
// many UTF8 characters consist of just one byte, and are mapped to 2 bytes. The
|
|
// function returns the number of *wide chars* written. Note that the Src block must
|
|
// have an exact number of UTF8 characters in it, if Count doesn't match then
|
|
// the last character will be converted anyway (going past the block boundary!)
|
|
var
|
|
W: word;
|
|
C: byte;
|
|
DStart: Pword;
|
|
SClose: Pbyte;
|
|
begin
|
|
DStart := Dst;
|
|
SClose := Src;
|
|
inc(SClose, Count);
|
|
while integer(Src) < integer(SClose) do
|
|
begin
|
|
// 1st byte
|
|
W := Src^;
|
|
inc(Src);
|
|
if W and $80 <> 0 then
|
|
begin
|
|
W := W and $3F;
|
|
if W and $20 <> 0 then
|
|
begin
|
|
// 2nd byte
|
|
C := Src^;
|
|
inc(Src);
|
|
if C and $C0 <> $80 then
|
|
// malformed trail byte or out of range char
|
|
Continue;
|
|
W := (W shl 6) or (C and $3F);
|
|
end;
|
|
// 2nd or 3rd byte
|
|
C := Src^;
|
|
inc(Src);
|
|
if C and $C0 <> $80 then
|
|
// malformed trail byte
|
|
Continue;
|
|
Dst^ := (W shl 6) or (C and $3F);
|
|
inc(Dst);
|
|
end else
|
|
begin
|
|
Dst^ := W;
|
|
inc(Dst);
|
|
end;
|
|
end;
|
|
Result := (integer(Dst) - integer(DStart)) div 2;
|
|
end;
|
|
|
|
procedure sdStreamWrite(S: TStream; const AString: AnsiString);
|
|
var
|
|
L: integer;
|
|
begin
|
|
L := Length(AString);
|
|
if L > 0 then
|
|
begin
|
|
S.Write(AString[1], L);
|
|
end;
|
|
end;
|
|
|
|
procedure sdStreamWriteStringRec(S: TStream; const AStringRec: TsdStringRec);
|
|
begin
|
|
S.Write(PAnsiChar(AStringRec.First)^, AStringRec.Count);
|
|
end;
|
|
|
|
procedure sdStreamWriteRefString(S: TStream; ARefString: TsdRefString);
|
|
begin
|
|
if ARefString = nil then
|
|
exit;
|
|
S.Write(PAnsiChar(ARefString.FFirst)^, ARefString.FCharCount);
|
|
end;
|
|
|
|
end.
|