{**************************************************************************************************} { } { Perl Regular Expressions VCL component } { } { The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); } { you may not use this file except in compliance with the License. You may obtain a copy of the } { License at http://www.mozilla.org/MPL/ } { } { Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF } { ANY KIND, either express or implied. See the License for the specific language governing rights } { and limitations under the License. } { } { The Original Code is PerlRegEx.pas. } { } { The Initial Developer of the Original Code is Jan Goyvaerts. } { Portions created by Jan Goyvaerts are Copyright (C) 1999, 2005, 2008 Jan Goyvaerts. } { All rights reserved. } { } { Design & implementation, by Jan Goyvaerts, 1999, 2005, 2008 } { } { TPerlRegEx is available at http://www.regular-expressions.info/delphi.html } { } {**************************************************************************************************} unit PerlRegEx; interface uses Windows, Messages, SysUtils, Classes, pcre; type TPerlRegExOptions = set of ( preCaseLess, // /i -> Case insensitive preMultiLine, // /m -> ^ and $ also match before/after a newline, not just at the beginning and the end of the PCREString preSingleLine, // /s -> Dot matches any character, including \n (newline). Otherwise, it matches anything except \n preExtended, // /x -> Allow regex to contain extra whitespace, newlines and Perl-style comments, all of which will be filtered out preAnchored, // /A -> Successful match can only occur at the start of the subject or right after the previous match preUnGreedy, // Repeat operators (+, *, ?) are not greedy by default (i.e. they try to match the minimum number of characters instead of the maximum) preNoAutoCapture // (group) is a non-capturing group; only named groups capture ); type TPerlRegExState = set of ( preNotBOL, // Not Beginning Of Line: ^ does not match at the start of Subject preNotEOL, // Not End Of Line: $ does not match at the end of Subject preNotEmpty // Empty matches not allowed ); const // Maximum number of subexpressions (backreferences) // Subexpressions are created by placing round brackets in the regex, and are referenced by \1, \2, ... // In Perl, they are available as $1, $2, ... after the regex matched; with TPerlRegEx, use the Subexpressions property // You can also insert \1, \2, ... in the Replacement PCREString; \0 is the complete matched expression MAX_SUBEXPRESSIONS = 99; {$IFDEF UNICODE} // All implicit string casts have been verified to be correct {$WARN IMPLICIT_STRING_CAST OFF} // Use UTF-8 in Delphi 2009 and later, so Unicode strings are handled correctly. // PCRE does not support UTF-16 type PCREString = UTF8String; {$ELSE UNICODE} // Use AnsiString in Delphi 2007 and earlier type PCREString = AnsiString; {$ENDIF UNICODE} type TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: PCREString) of object; type TPerlRegEx = class(TComponent) private // *** Property storage, getters and setters FCompiled, FStudied: Boolean; FOptions: TPerlRegExOptions; FState: TPerlRegExState; FRegEx, FReplacement, FSubject: PCREString; FStart, FStop: Integer; FOnMatch: TNotifyEvent; FOnReplace: TPerlRegExReplaceEvent; function GetMatchedExpression: PCREString; function GetMatchedExpressionLength: Integer; function GetMatchedExpressionOffset: Integer; procedure SetOptions(Value: TPerlRegExOptions); procedure SetRegEx(const Value: PCREString); function GetSubExpressionCount: Integer; function GetSubExpressions(Index: Integer): PCREString; function GetSubExpressionLengths(Index: Integer): Integer; function GetSubExpressionOffsets(Index: Integer): Integer; procedure SetSubject(const Value: PCREString); procedure SetStart(const Value: Integer); procedure SetStop(const Value: Integer); function GetFoundMatch: Boolean; private // *** Variables used by pcrelib.dll Offsets: array[0..(MAX_SUBEXPRESSIONS+1)*3] of Integer; OffsetCount: Integer; pcreOptions: Integer; pattern, hints, chartable: Pointer; FSubjectPChar: PAnsiChar; FHasStoredSubExpressions: Boolean; FStoredSubExpressions: array of PCREString; function GetSubjectLeft: PCREString; function GetSubjectRight: PCREString; protected procedure CleanUp; // Dispose off whatever we created, so we can start over. Called automatically when needed, so it is not made public procedure ClearStoredSubExpressions; public constructor Create(AOwner: TComponent); override; // Come to life destructor Destroy; override; // Clean up after ourselves class function EscapeRegExChars(const S: string): string; // Escapes regex characters in S so that the regex engine can be used to match S as plain text procedure Compile; // Compile the regex. Called automatically by Match procedure Study; // Study the regex. Studying takes time, but will make the execution of the regex a lot faster. // Call study if you will be using the same regex many times function Match: Boolean; // Attempt to match the regex function MatchAgain: Boolean; // Attempt to match the regex to the remainder of the string after the previous match // To avoid problems (when using ^ in the regex), call MatchAgain only after a succesful Match() function Replace: PCREString; // Replace matched expression in Subject with ComputeReplacement. Returns the actual replacement text from ComputeReplacement function ReplaceAll: Boolean; // Repeat MatchAgain and Replace until you drop. Returns True if anything was replaced at all. function ComputeReplacement: PCREString; // Returns Replacement with backreferences filled in procedure StoreSubExpressions; // Stores duplicates of SubExpressions[] so they and ComputeReplacement will still return the proper strings // even if FSubject is changed or cleared function NamedSubExpression(const SEName: PCREString): Integer; // Returns the index of the named group SEName procedure Split(Strings: TStrings; Limit: Integer); // Split Subject along regex matches. Items are appended to PCREStrings. property Compiled: Boolean read FCompiled; // True if the RegEx has already been compiled. property FoundMatch: Boolean read GetFoundMatch; // Returns True when MatchedExpression* and SubExpression* indicate a match property Studied: Boolean read FStudied; // True if the RegEx has already been studied property MatchedExpression: PCREString read GetMatchedExpression; // The matched PCREString property MatchedExpressionLength: Integer read GetMatchedExpressionLength; // Length of the matched PCREString property MatchedExpressionOffset: Integer read GetMatchedExpressionOffset; // Character offset in the Subject PCREString at which the matched subPCREString starts property Start: Integer read FStart write SetStart; // Starting position in Subject from which MatchAgain begins property Stop: Integer read FStop write SetStop; // Last character in Subject that Match and MatchAgain search through property State: TPerlRegExState read FState write FState; // State of Subject property SubExpressionCount: Integer read GetSubExpressionCount; // Number of matched subexpressions property SubExpressions[Index: Integer]: PCREString read GetSubExpressions; // Matched subexpressions after a regex has been matched property SubExpressionLengths[Index: Integer]: Integer read GetSubExpressionLengths; // Lengths of the subexpressions property SubExpressionOffsets[Index: Integer]: Integer read GetSubExpressionOffsets; // Character offsets in the Subject PCREString of the subexpressions property Subject: PCREString read FSubject write SetSubject; // The PCREString on which Match() will try to match RegEx property SubjectLeft: PCREString read GetSubjectLeft; // Part of the subject to the left of the match property SubjectRight: PCREString read GetSubjectRight; // Part of the subject to the right of the match published property Options: TPerlRegExOptions read FOptions write SetOptions; // Options property RegEx: PCREString read FRegEx write SetRegEx; // The regular expression to be matched property Replacement: PCREString read FReplacement write FReplacement; // PCREString to replace matched expression with. \number backreferences will be substituted with SubExpressions // TPerlRegEx supports the "JGsoft" replacement text flavor as explained at http://www.regular-expressions.info/refreplace.html property OnMatch: TNotifyEvent read FOnMatch write FOnMatch; // Triggered by Match and MatchAgain after a successful match property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace; // Triggered by Replace and ReplaceAll just before the replacement is done, allowing you to determine the new PCREString end; { You can add TPerlRegEx components to a TPerlRegExList to match them all together on the same subject, as if they were one regex regex1|regex2|regex3|... TPerlRegExList does not own the TPerlRegEx components, just like a TList If a TPerlRegEx has been added to a TPerlRegExList, it should not be used in any other situation until it is removed from the list } type TPerlRegExList = class private FList: TList; FSubject: PCREString; FMatchedRegEx: TPerlRegEx; FStart, FStop: Integer; function GetRegEx(Index: Integer): TPerlRegEx; procedure SetRegEx(Index: Integer; Value: TPerlRegEx); procedure SetSubject(const Value: PCREString); procedure SetStart(const Value: Integer); procedure SetStop(const Value: Integer); function GetCount: Integer; protected procedure UpdateRegEx(ARegEx: TPerlRegEx); public constructor Create; destructor Destroy; override; public function Add(ARegEx: TPerlRegEx): Integer; procedure Clear; procedure Delete(Index: Integer); function IndexOf(ARegEx: TPerlRegEx): Integer; procedure Insert(Index: Integer; ARegEx: TPerlRegEx); public function Match: Boolean; function MatchAgain: Boolean; property RegEx[Index: Integer]: TPerlRegEx read GetRegEx write SetRegEx; property Count: Integer read GetCount; property Subject: PCREString read FSubject write SetSubject; property Start: Integer read FStart write SetStart; property Stop: Integer read FStop write SetStop; property MatchedRegEx: TPerlRegEx read FMatchedRegEx; end; procedure Register; implementation { ********* Unit support routines ********* } procedure Register; begin RegisterComponents('JGsoft', [TPerlRegEx]); end; function FirstCap(const S: string): string; begin if S = '' then Result := '' else begin Result := AnsiLowerCase(S); {$IFDEF UNICODE} CharUpperBuffW(@Result[1], 1); {$ELSE} CharUpperBuffA(@Result[1], 1); {$ENDIF} end end; function InitialCaps(const S: string): string; var I: Integer; Up: Boolean; begin Result := AnsiLowerCase(S); Up := True; {$IFDEF UNICODE} for I := 1 to Length(Result) do begin case Result[I] of #0..'&', '(', '*', '+', ',', '-', '.', '?', '<', '[', '{', #$00B7: Up := True else if Up and (Result[I] <> '''') then begin CharUpperBuffW(@Result[I], 1); Up := False end end; end; {$ELSE UNICODE} if SysLocale.FarEast then begin I := 1; while I <= Length(Result) do begin if Result[I] in LeadBytes then begin Inc(I, 2) end else begin if Result[I] in [#0..'&', '('..'.', '?', '<', '[', '{'] then Up := True else if Up and (Result[I] <> '''') then begin CharUpperBuffA(@Result[I], 1); Result[I] := UpperCase(Result[I])[1]; Up := False end; Inc(I) end end end else for I := 1 to Length(Result) do begin if Result[I] in [#0..'&', '('..'.', '?', '<', '[', '{', #$B7] then Up := True else if Up and (Result[I] <> '''') then begin CharUpperBuffA(@Result[I], 1); Result[I] := AnsiUpperCase(Result[I])[1]; Up := False end end; {$ENDIF UNICODE} end; { ********* TPerlRegEx component ********* } procedure TPerlRegEx.CleanUp; begin FCompiled := False; FStudied := False; pcre_dispose(pattern, hints, nil); pattern := nil; hints := nil; ClearStoredSubExpressions; OffsetCount := 0; end; procedure TPerlRegEx.ClearStoredSubExpressions; begin FHasStoredSubExpressions := False; FStoredSubExpressions := nil; end; procedure TPerlRegEx.Compile; var Error: PAnsiChar; ErrorOffset: Integer; begin if FRegEx = '' then raise Exception.Create('TPerlRegEx.Compile() - Please specify a regular expression in RegEx first'); CleanUp; Pattern := pcre_compile(PAnsiChar(FRegEx), pcreOptions, @Error, @ErrorOffset, chartable); if Pattern = nil then raise Exception.Create(Format('TPerlRegEx.Compile() - Error in regex at offset %d: %s', [ErrorOffset, AnsiString(Error)])); FCompiled := True end; (* Backreference overview: Assume there are 13 backreferences: Text TPerlRegex .NET Java ECMAScript $17 $1 + "7" "$17" $1 + "7" $1 + "7" $017 $1 + "7" "$017" $1 + "7" $1 + "7" $12 $12 $12 $12 $12 $012 $1 + "2" $12 $12 $1 + "2" ${1}2 $1 + "2" $1 + "2" error "${1}2" $$ "$" "$" error "$" \$ "$" "\$" "$" "\$" *) function TPerlRegEx.ComputeReplacement: PCREString; var Mode: AnsiChar; S: PCREString; I, J, N: Integer; procedure ReplaceBackreference(Number: Integer); var Backreference: PCREString; begin Delete(S, I, J-I); if Number <= SubExpressionCount then begin Backreference := SubExpressions[Number]; if Backreference <> '' then begin // Ignore warnings; converting to UTF-8 does not cause data loss case Mode of 'L', 'l': Backreference := AnsiLowerCase(Backreference); 'U', 'u': Backreference := AnsiUpperCase(Backreference); 'F', 'f': Backreference := FirstCap(Backreference); 'I', 'i': Backreference := InitialCaps(Backreference); end; if S <> '' then begin Insert(Backreference, S, I); I := I + Length(Backreference); end else begin S := Backreference; I := MaxInt; end end; end end; procedure ProcessBackreference(NumberOnly, Dollar: Boolean); var Number, Number2: Integer; Group: PCREString; begin Number := -1; if (J <= Length(S)) and (S[J] in ['0'..'9']) then begin // Get the number of the backreference Number := Ord(S[J]) - Ord('0'); Inc(J); if (J <= Length(S)) and (S[J] in ['0'..'9']) then begin // Expand it to two digits only if that would lead to a valid backreference Number2 := Number*10 + Ord(S[J]) - Ord('0'); if Number2 <= SubExpressionCount then begin Number := Number2; Inc(J) end; end; end else if not NumberOnly then begin if Dollar and (J < Length(S)) and (S[J] = '{') then begin // Number or name in curly braces Inc(J); case S[J] of '0'..'9': begin Number := Ord(S[J]) - Ord('0'); Inc(J); while (J <= Length(S)) and (S[J] in ['0'..'9']) do begin Number := Number*10 + Ord(S[J]) - Ord('0'); Inc(J) end; end; 'A'..'Z', 'a'..'z', '_': begin Inc(J); while (J <= Length(S)) and (S[J] in ['A'..'Z', 'a'..'z', '0'..'9', '_']) do Inc(J); if (J <= Length(S)) and (S[J] = '}') then begin Group := Copy(S, I+2, J-I-2); Number := NamedSubExpression(Group); end end; end; if (J > Length(S)) or (S[J] <> '}') then Number := -1 else Inc(J) end else if Dollar and (S[J] = '_') then begin // $_ (whole subject) Delete(S, I, J+1-I); Insert(Subject, S, I); I := I + Length(Subject); Exit; end else case S[J] of '&': begin // \& or $& (whole regex match) Number := 0; Inc(J); end; '+': begin // \+ or $+ (highest-numbered participating group) Number := SubExpressionCount; Inc(J); end; '`': begin // \` or $` (backtick; subject to the left of the match) Delete(S, I, J+1-I); Insert(SubjectLeft, S, I); I := I + Offsets[0] - 1; Exit; end; '''': begin // \' or $' (straight quote; subject to the right of the match) Delete(S, I, J+1-I); Insert(SubjectRight, S, I); I := I + Length(Subject) - Offsets[1]; Exit; end end; end; if Number >= 0 then ReplaceBackreference(Number) else Inc(I) end; begin S := FReplacement; I := 1; while I < Length(S) do begin case S[I] of '\': begin J := I + 1; Assert(J <= Length(S), 'CHECK: We let I stop one character before the end, so J cannot point beyond the end of the PCREString here'); case S[J] of '$', '\': begin Delete(S, I, 1); Inc(I); end; 'g': begin if (J < Length(S)-1) and (S[J+1] = '<') and (S[J+2] in ['A'..'Z', 'a'..'z', '_']) then begin // Python-style named group reference \g J := J+3; while (J <= Length(S)) and (S[J] in ['0'..'9', 'A'..'Z', 'a'..'z', '_']) do Inc(J); if (J <= Length(S)) and (S[J] = '>') then begin N := NamedSubExpression(Copy(S, I+3, J-I-3)); Inc(J); Mode := #0; if N > 0 then ReplaceBackreference(N) else Delete(S, I, J-I) end else I := J end else I := I+2; end; 'l', 'L', 'u', 'U', 'f', 'F', 'i', 'I': begin Mode := S[J]; Inc(J); ProcessBackreference(True, False); end; else begin Mode := #0; ProcessBackreference(False, False); end; end; end; '$': begin J := I + 1; Assert(J <= Length(S), 'CHECK: We let I stop one character before the end, so J cannot point beyond the end of the PCREString here'); if S[J] = '$' then begin Delete(S, J, 1); Inc(I); end else begin Mode := #0; ProcessBackreference(False, True); end end; else Inc(I) end end; Result := S end; constructor TPerlRegEx.Create(AOwner: TComponent); begin inherited Create(AOwner); FState := [preNotEmpty]; chartable := pcre_maketables; {$IFDEF UNICODE} pcreOptions := PCRE_UTF8 or PCRE_NEWLINE_ANY; {$ELSE} pcreOptions := PCRE_NEWLINE_ANY; {$ENDIF} end; destructor TPerlRegEx.Destroy; begin pcre_dispose(pattern, hints, chartable); inherited Destroy; end; class function TPerlRegEx.EscapeRegExChars(const S: string): string; var I: Integer; begin Result := S; I := Length(Result); while I > 0 do begin case Result[I] of '.', '[', ']', '(', ')', '?', '*', '+', '{', '}', '^', '$', '|', '\': Insert('\', Result, I); #0: begin Result[I] := '0'; Insert('\', Result, I); end; end; Dec(I); end; end; function TPerlRegEx.GetFoundMatch: Boolean; begin Result := OffsetCount > 0; end; function TPerlRegEx.GetMatchedExpression: PCREString; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); Result := GetSubExpressions(0); end; function TPerlRegEx.GetMatchedExpressionLength: Integer; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); Result := GetSubExpressionLengths(0) end; function TPerlRegEx.GetMatchedExpressionOffset: Integer; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); Result := GetSubExpressionOffsets(0) end; function TPerlRegEx.GetSubExpressionCount: Integer; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); Result := OffsetCount-1 end; function TPerlRegEx.GetSubExpressionLengths(Index: Integer): Integer; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); Assert((Index >= 0) and (Index <= SubExpressionCount), 'REQUIRE: Index <= SubExpressionCount'); Result := Offsets[Index*2+1]-Offsets[Index*2] end; function TPerlRegEx.GetSubExpressionOffsets(Index: Integer): Integer; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); Assert((Index >= 0) and (Index <= SubExpressionCount), 'REQUIRE: Index <= SubExpressionCount'); Result := Offsets[Index*2] end; function TPerlRegEx.GetSubExpressions(Index: Integer): PCREString; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); if Index > SubExpressionCount then Result := '' else if FHasStoredSubExpressions then Result := FStoredSubExpressions[Index] else Result := Copy(FSubject, Offsets[Index*2], Offsets[Index*2+1]-Offsets[Index*2]); end; function TPerlRegEx.GetSubjectLeft: PCREString; begin Result := Copy(Subject, 1, Offsets[0]-1); end; function TPerlRegEx.GetSubjectRight: PCREString; begin Result := Copy(Subject, Offsets[1], MaxInt); end; function TPerlRegEx.Match: Boolean; var I, Opts: Integer; begin ClearStoredSubExpressions; if not Compiled then Compile; if preNotBOL in State then Opts := PCRE_NOTBOL else Opts := 0; if preNotEOL in State then Opts := Opts or PCRE_NOTEOL; if preNotEmpty in State then Opts := Opts or PCRE_NOTEMPTY; OffsetCount := pcre_exec(Pattern, Hints, FSubjectPChar, FStop, 0, Opts, @Offsets[0], High(Offsets)); Result := OffsetCount > 0; // Convert offsets into PCREString indices if Result then begin for I := 0 to OffsetCount*2-1 do Inc(Offsets[I]); FStart := Offsets[1]; if Offsets[0] = Offsets[1] then Inc(FStart); // Make sure we don't get stuck at the same position if Assigned(OnMatch) then OnMatch(Self) end; end; function TPerlRegEx.MatchAgain: Boolean; var I, Opts: Integer; begin ClearStoredSubExpressions; if not Compiled then Compile; if preNotBOL in State then Opts := PCRE_NOTBOL else Opts := 0; if preNotEOL in State then Opts := Opts or PCRE_NOTEOL; if preNotEmpty in State then Opts := Opts or PCRE_NOTEMPTY; if FStart-1 > FStop then OffsetCount := -1 else OffsetCount := pcre_exec(Pattern, Hints, FSubjectPChar, FStop, FStart-1, Opts, @Offsets[0], High(Offsets)); Result := OffsetCount > 0; // Convert offsets into PCREString indices if Result then begin for I := 0 to OffsetCount*2-1 do Inc(Offsets[I]); FStart := Offsets[1]; if Offsets[0] = Offsets[1] then Inc(FStart); // Make sure we don't get stuck at the same position if Assigned(OnMatch) then OnMatch(Self) end; end; function TPerlRegEx.NamedSubExpression(const SEName: PCREString): Integer; begin Result := pcre_get_stringnumber(Pattern, PAnsiChar(SEName)); end; function TPerlRegEx.Replace: PCREString; begin Assert(FoundMatch, 'REQUIRE: There must be a successful match first'); // Substitute backreferences Result := ComputeReplacement; // Allow for just-in-time substitution determination if Assigned(OnReplace) then OnReplace(Self, Result); // Perform substitution Delete(FSubject, MatchedExpressionOffset, MatchedExpressionLength); if Result <> '' then Insert(Result, FSubject, MatchedExpressionOffset); FSubjectPChar := PAnsiChar(FSubject); // Position to continue search FStart := FStart - MatchedExpressionLength + Length(Result); FStop := FStop - MatchedExpressionLength + Length(Result); // Replacement no longer matches regex, we assume ClearStoredSubExpressions; OffsetCount := 0; end; function TPerlRegEx.ReplaceAll: Boolean; begin if Match then begin Result := True; repeat Replace until not MatchAgain; end else Result := False; end; procedure TPerlRegEx.SetOptions(Value: TPerlRegExOptions); begin if (FOptions <> Value) then begin FOptions := Value; {$IFDEF UNICODE} pcreOptions := PCRE_UTF8 or PCRE_NEWLINE_ANY; {$ELSE} pcreOptions := PCRE_NEWLINE_ANY; {$ENDIF} if (preCaseLess in Value) then pcreOptions := pcreOptions or PCRE_CASELESS; if (preMultiLine in Value) then pcreOptions := pcreOptions or PCRE_MULTILINE; if (preSingleLine in Value) then pcreOptions := pcreOptions or PCRE_DOTALL; if (preExtended in Value) then pcreOptions := pcreOptions or PCRE_EXTENDED; if (preAnchored in Value) then pcreOptions := pcreOptions or PCRE_ANCHORED; if (preUnGreedy in Value) then pcreOptions := pcreOptions or PCRE_UNGREEDY; if (preNoAutoCapture in Value) then pcreOptions := pcreOptions or PCRE_NO_AUTO_CAPTURE; CleanUp end end; procedure TPerlRegEx.SetRegEx(const Value: PCREString); begin if FRegEx <> Value then begin FRegEx := Value; CleanUp end end; procedure TPerlRegEx.SetStart(const Value: Integer); begin if Value < 1 then FStart := 1 else FStart := Value; // If FStart > Length(Subject), MatchAgain() will simply return False end; procedure TPerlRegEx.SetStop(const Value: Integer); begin if Value > Length(Subject) then FStop := Length(Subject) else FStop := Value; end; procedure TPerlRegEx.SetSubject(const Value: PCREString); begin FSubject := Value; FSubjectPChar := PAnsiChar(Value); FStart := 1; FStop := Length(Subject); if not FHasStoredSubExpressions then OffsetCount := 0; end; procedure TPerlRegEx.Split(Strings: TStrings; Limit: Integer); var Offset, Count: Integer; begin Assert(Strings <> nil, 'REQUIRE: Strings'); if (Limit = 1) or not Match then Strings.Add(Subject) else begin Offset := 1; Count := 1; repeat Strings.Add(Copy(Subject, Offset, MatchedExpressionOffset - Offset)); Inc(Count); Offset := MatchedExpressionOffset + MatchedExpressionLength; until ((Limit > 1) and (Count >= Limit)) or not MatchAgain; Strings.Add(Copy(Subject, Offset, MaxInt)); end end; procedure TPerlRegEx.StoreSubExpressions; var I: Integer; begin if OffsetCount > 0 then begin ClearStoredSubExpressions; SetLength(FStoredSubExpressions, SubExpressionCount+1); for I := SubExpressionCount downto 0 do FStoredSubExpressions[I] := SubExpressions[I]; FHasStoredSubExpressions := True; end end; procedure TPerlRegEx.Study; var Error: PAnsiChar; begin if not FCompiled then Compile; Hints := pcre_study(Pattern, 0, @Error); if Error <> nil then raise Exception.Create('TPerlRegEx.Study() - Error studying the regex: ' + AnsiString(Error)); FStudied := True end; { TPerlRegExList } function TPerlRegExList.Add(ARegEx: TPerlRegEx): Integer; begin Result := FList.Add(ARegEx); UpdateRegEx(ARegEx); end; procedure TPerlRegExList.Clear; begin FList.Clear; end; constructor TPerlRegExList.Create; begin inherited Create; FList := TList.Create; end; procedure TPerlRegExList.Delete(Index: Integer); begin FList.Delete(Index); end; destructor TPerlRegExList.Destroy; begin FList.Free; inherited end; function TPerlRegExList.GetCount: Integer; begin Result := FList.Count; end; function TPerlRegExList.GetRegEx(Index: Integer): TPerlRegEx; begin Result := TPerlRegEx(Pointer(FList[Index])); end; function TPerlRegExList.IndexOf(ARegEx: TPerlRegEx): Integer; begin Result := FList.IndexOf(ARegEx); end; procedure TPerlRegExList.Insert(Index: Integer; ARegEx: TPerlRegEx); begin FList.Insert(Index, ARegEx); UpdateRegEx(ARegEx); end; function TPerlRegExList.Match: Boolean; begin SetStart(1); FMatchedRegEx := nil; Result := MatchAgain; end; function TPerlRegExList.MatchAgain: Boolean; var I, MatchStart, MatchPos: Integer; ARegEx: TPerlRegEx; begin if FMatchedRegEx <> nil then MatchStart := FMatchedRegEx.MatchedExpressionOffset + FMatchedRegEx.MatchedExpressionLength else MatchStart := FStart; FMatchedRegEx := nil; MatchPos := MaxInt; for I := 0 to Count-1 do begin ARegEx := RegEx[I]; if (not ARegEx.FoundMatch) or (ARegEx.MatchedExpressionOffset < MatchStart) then begin ARegEx.Start := MatchStart; ARegEx.MatchAgain; end; if ARegEx.FoundMatch and (ARegEx.MatchedExpressionOffset < MatchPos) then begin MatchPos := ARegEx.MatchedExpressionOffset; FMatchedRegEx := ARegEx; end; if MatchPos = MatchStart then Break; end; Result := MatchPos < MaxInt; end; procedure TPerlRegExList.SetRegEx(Index: Integer; Value: TPerlRegEx); begin FList[Index] := Value; UpdateRegEx(Value); end; procedure TPerlRegExList.SetStart(const Value: Integer); var I: Integer; begin if FStart <> Value then begin FStart := Value; for I := Count-1 downto 0 do RegEx[I].Start := Value; FMatchedRegEx := nil; end; end; procedure TPerlRegExList.SetStop(const Value: Integer); var I: Integer; begin if FStop <> Value then begin FStop := Value; for I := Count-1 downto 0 do RegEx[I].Stop := Value; FMatchedRegEx := nil; end; end; procedure TPerlRegExList.SetSubject(const Value: PCREString); var I: Integer; begin if FSubject <> Value then begin FSubject := Value; for I := Count-1 downto 0 do RegEx[I].Subject := Value; FMatchedRegEx := nil; end; end; procedure TPerlRegExList.UpdateRegEx(ARegEx: TPerlRegEx); begin ARegEx.Subject := FSubject; ARegEx.Start := FStart; end; end.