FastStrings.pas
Code: |
//================================================== //All code herein is copyrighted by //Peter Morris //----- //Do not alter / remove this copyright notice //Email me at : Этот адрес электронной почты защищён от спам-ботов. У вас должен быть включен JavaScript для просмотра. // //The homepage for this library is https://www.droopyeyes.com // // CURRENT VERSION V3.2 // //(Check out www.HowToDoThings.com for Delphi articles !) //(Check out www.stuckindoors.com if you need a free events page on your site !) //==================================================
unit FastStrings;
interface
uses {$IFNDEF LINUX} Windows, {$ENDIF} SysUtils;
//This TYPE declaration will become apparent later type TBMJumpTable = array[0..255] of Integer; TFastPosProc = function (const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer; TFastPosIndexProc = function (const aSourceString, aFindString: string; const aSourceLen, aFindLen, StartPos: Integer; var JumpTable: TBMJumpTable): Integer; TFastTagReplaceProc = procedure (var Tag: string; const UserData: Integer);
//Boyer-Moore routines procedure MakeBMTable(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable); procedure MakeBMTableNoCase(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable); function BMPos(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer; function BMPosNoCase(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer;
function FastAnsiReplace(const S, OldPattern, NewPattern: string; Flags: TReplaceFlags): string; procedure FastCharMove(const Source; var Dest; Count : Integer); function FastCharPos(const aSource : string; const C: Char; StartPos : Integer): Integer; function FastCharPosNoCase(const aSource : string; C: Char; StartPos : Integer): Integer; function FastPos(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; function FastPosNoCase(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; function FastPosBack(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; function FastPosBackNoCase(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; function FastReplace(const aSourceString : string; const aFindString, aReplaceString : string; CaseSensitive : Boolean = False) : string; function FastTagReplace(const SourceString, TagStart, TagEnd: string; FastTagReplaceProc: TFastTagReplaceProc; const UserData: Integer): string; function SmartPos(const SearchStr,SourceStr : string; const CaseSensitive : Boolean = TRUE; const StartPos : Integer = 1; const ForwardSearch : Boolean = TRUE) : Integer;
implementation
const cDeltaSize = 1.5;
var GUpcaseTable : array[0..255] of char; GUpcaseLUT: Pointer;
//MakeBMJumpTable takes a FindString and makes a JumpTable procedure MakeBMTable(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable); begin if BufferLen = 0thenraise Exception.Create('BufferLen is 0'); asm push EDI push ESI mov EDI, JumpTable mov EAX, BufferLen mov ECX, $100 REPNE STOSD mov ECX, BufferLen mov EDI, JumpTable mov ESI, Buffer dec ECX xor EAX, EAX @@loop: mov AL, [ESI] lea ESI, ESI + 1 mov [EDI + EAX * 4], ECX dec ECX jg @@loop
pop ESI pop EDI end; end;
procedure MakeBMTableNoCase(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable); begin if BufferLen = 0thenraise Exception.Create('BufferLen is 0'); asm push EDI push ESI
mov EDI, JumpTable mov EAX, BufferLen mov ECX, $100 REPNE STOSD
mov EDX, GUpcaseLUT mov ECX, BufferLen mov EDI, JumpTable mov ESI, Buffer dec ECX xor EAX, EAX @@loop: mov AL, [ESI] lea ESI, ESI + 1 mov AL, [EDX + EAX] mov [EDI + EAX * 4], ECX dec ECX jg @@loop pop ESI pop EDI end; end;
function BMPos(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer; var LastPos: Pointer; begin LastPos := Pointer(Integer(aSource) + aSourceLen - 1); asm push ESI push EDI push EBX
mov EAX, aFindLen mov ESI, aSource lea ESI, ESI + EAX - 1 std mov EBX, JumpTable
@@comparetext: cmp ESI, LastPos jg @@NotFound mov EAX, aFindLen mov EDI, aFind mov ECX, EAX push ESI //Remember where we are lea EDI, EDI + EAX - 1 xor EAX, EAX @@CompareNext: mov al, [ESI] cmp al, [EDI] jne @@LookAhead lea ESI, ESI - 1 lea EDI, EDI - 1 dec ECX jz @@Found jmp @@CompareNext
@@LookAhead: //Look up the char in our Jump Table pop ESI mov al, [ESI] mov EAX, [EBX + EAX * 4] lea ESI, ESI + EAX jmp @@CompareText
@@NotFound: mov Result, 0 jmp @@TheEnd @@Found: pop EDI //We are just popping, we don't need the value inc ESI mov Result, ESI @@TheEnd: cld pop EBX pop EDI pop ESI end; end;
function BMPosNoCase(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer; var LastPos: Pointer; begin LastPos := Pointer(Integer(aSource) + aSourceLen - 1); asm push ESI push EDI push EBX
mov EAX, aFindLen mov ESI, aSource lea ESI, ESI + EAX - 1 std mov EDX, GUpcaseLUT
@@comparetext: cmp ESI, LastPos jg @@NotFound mov EAX, aFindLen mov EDI, aFind push ESI //Remember where we are mov ECX, EAX lea EDI, EDI + EAX - 1 xor EAX, EAX @@CompareNext: mov al, [ESI] mov bl, [EDX + EAX] mov al, [EDI] cmp bl, [EDX + EAX] jne @@LookAhead lea ESI, ESI - 1 lea EDI, EDI - 1 dec ECX jz @@Found jmp @@CompareNext
@@LookAhead: //Look up the char in our Jump Table pop ESI mov EBX, JumpTable mov al, [ESI] mov al, [EDX + EAX] mov EAX, [EBX + EAX * 4] lea ESI, ESI + EAX jmp @@CompareText
@@NotFound: mov Result, 0 jmp @@TheEnd @@Found: pop EDI //We are just popping, we don't need the value inc ESI mov Result, ESI @@TheEnd: cld pop EBX pop EDI pop ESI end; end;
//NOTE : FastCharPos and FastCharPosNoCase do not require you to pass the length // of the string, this was only done in FastPos and FastPosNoCase because // they are used by FastReplace many times over, thus saving a LENGTH() // operation each time. I can't see you using these two routines for the // same purposes so I didn't do that this time ! function FastCharPos(const aSource : string; const C: Char; StartPos : Integer) : Integer; var L : Integer; begin //If this assert failed, it is because you passed 0 for StartPos, lowest value is 1 !! Assert(StartPos > 0);
Result := 0; L := Length(aSource); if L = 0then exit; if StartPos > L then exit; Dec(StartPos); asm PUSH EDI //Preserve this register
mov EDI, aSource //Point EDI at aSource add EDI, StartPos mov ECX, L //Make a note of how many chars to search through sub ECX, StartPos mov AL, C //and which char we want @Loop: cmp Al, [EDI] //compare it against the SourceString jz @Found inc EDI dec ECX jnz @Loop jmp @NotFound @Found: sub EDI, aSource //EDI has been incremented, so EDI-OrigAdress = Char pos ! inc EDI mov Result, EDI @NotFound:
POP EDI end; end;
function FastCharPosNoCase(const aSource : string; C: Char; StartPos : Integer) : Integer; var L : Integer; begin Result := 0; L := Length(aSource); if L = 0then exit; if StartPos > L then exit; Dec(StartPos); if StartPos < 0then StartPos := 0;
asm PUSH EDI //Preserve this register PUSH EBX mov EDX, GUpcaseLUT
mov EDI, aSource //Point EDI at aSource add EDI, StartPos mov ECX, L //Make a note of how many chars to search through sub ECX, StartPos
xor EBX, EBX mov BL, C mov AL, [EDX+EBX] @Loop: mov BL, [EDI] inc EDI cmp Al, [EDX+EBX] jz @Found dec ECX jnz @Loop jmp @NotFound @Found: sub EDI, aSource //EDI has been incremented, so EDI-OrigAdress = Char pos ! mov Result, EDI @NotFound:
POP EBX POP EDI end; end;
//The first thing to note here is that I am passing the SourceLength and FindLength //As neither Source or Find will alter at any point during FastReplace there is //no need to call the LENGTH subroutine each time ! function FastPos(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; var JumpTable: TBMJumpTable; begin //If this assert failed, it is because you passed 0 for StartPos, lowest value is 1 !! Assert(StartPos > 0); if aFindLen < 1thenbegin Result := 0; exit; end; if aFindLen > aSourceLen thenbegin Result := 0; exit; end;
MakeBMTable(PChar(aFindString), aFindLen, JumpTable); Result := Integer(BMPos(PChar(aSourceString) + (StartPos - 1), PChar(aFindString),aSourceLen - (StartPos-1), aFindLen, JumpTable)); if Result > 0then Result := Result - Integer(@aSourceString[1]) +1; end;
function FastPosNoCase(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; var JumpTable: TBMJumpTable; begin //If this assert failed, it is because you passed 0 for StartPos, lowest value is 1 !! Assert(StartPos > 0); if aFindLen < 1thenbegin Result := 0; exit; end; if aFindLen > aSourceLen thenbegin Result := 0; exit; end;
MakeBMTableNoCase(PChar(AFindString), aFindLen, JumpTable); Result := Integer(BMPosNoCase(PChar(aSourceString) + (StartPos - 1), PChar(aFindString),aSourceLen - (StartPos-1), aFindLen, JumpTable)); if Result > 0then Result := Result - Integer(@aSourceString[1]) +1; end;
function FastPosBack(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; var SourceLen : Integer; begin if aFindLen < 1thenbegin Result := 0; exit; end; if aFindLen > aSourceLen thenbegin Result := 0; exit; end;
if (StartPos = 0) or (StartPos + aFindLen > aSourceLen) then SourceLen := aSourceLen - (aFindLen-1) else SourceLen := StartPos;
asm push ESI push EDI push EBX
mov EDI, aSourceString add EDI, SourceLen Dec EDI
mov ESI, aFindString mov ECX, SourceLen Mov Al, [ESI]
@ScaSB: cmp Al, [EDI] jne @NextChar
@CompareStrings: mov EBX, aFindLen dec EBX jz @FullMatch
@CompareNext: mov Ah, [ESI+EBX] cmp Ah, [EDI+EBX] Jnz @NextChar
@Matches: Dec EBX Jnz @CompareNext
@FullMatch: mov EAX, EDI sub EAX, aSourceString inc EAX mov Result, EAX jmp @TheEnd @NextChar: dec EDI dec ECX jnz @ScaSB
mov Result,0
@TheEnd: pop EBX pop EDI pop ESI end; end;
function FastPosBackNoCase(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer; var SourceLen : Integer; begin if aFindLen < 1thenbegin Result := 0; exit; end; if aFindLen > aSourceLen thenbegin Result := 0; exit; end;
if (StartPos = 0) or (StartPos + aFindLen > aSourceLen) then SourceLen := aSourceLen - (aFindLen-1) else SourceLen := StartPos;
asm push ESI push EDI push EBX
mov EDI, aSourceString add EDI, SourceLen Dec EDI
mov ESI, aFindString mov ECX, SourceLen
mov EDX, GUpcaseLUT xor EBX, EBX
mov Bl, [ESI] mov Al, [EDX+EBX]
@ScaSB: mov Bl, [EDI] cmp Al, [EDX+EBX] jne @NextChar
@CompareStrings: PUSH ECX mov ECX, aFindLen dec ECX jz @FullMatch
@CompareNext: mov Bl, [ESI+ECX] mov Ah, [EDX+EBX] mov Bl, [EDI+ECX] cmp Ah, [EDX+EBX] Jz @Matches
//Go back to findind the first char POP ECX Jmp @NextChar
@Matches: Dec ECX Jnz @CompareNext
@FullMatch: POP ECX
mov EAX, EDI sub EAX, aSourceString inc EAX mov Result, EAX jmp @TheEnd @NextChar: dec EDI dec ECX jnz @ScaSB
mov Result,0
@TheEnd: pop EBX pop EDI pop ESI end; end;
//My move is not as fast as MOVE when source and destination are both //DWord aligned, but certainly faster when they are not. //As we are moving characters in a string, it is not very likely at all that //both source and destination are DWord aligned, so moving bytes avoids the //cycle penality of reading/writing DWords across physical boundaries procedure FastCharMove(const Source; var Dest; Count : Integer); asm //Note: When this function is called, delphi passes the parameters as follows //ECX = Count //EAX = Const Source //EDX = Var Dest
//If no bytes to copy, just quit altogether, no point pushing registers cmp ECX,0 Je @JustQuit
//Preserve the critical delphi registers push ESI push EDI
//move Source into ESI (generally the SOURCE register) //move Dest into EDI (generally the DEST register for string commands) //This may not actually be neccessary, as I am not using MOVsb etc //I may be able just to use EAX and EDX, there may be a penalty for //not using ESI, EDI but I doubt it, this is another thing worth trying ! mov ESI, EAX mov EDI, EDX
//The following loop is the same as repNZ MovSB, but oddly quicker ! @Loop: //Get the source byte Mov AL, [ESI] //Point to next byte Inc ESI //Put it into the Dest mov [EDI], AL //Point dest to next position Inc EDI //Dec ECX to note how many we have left to copy Dec ECX //If ECX <> 0 then loop Jnz @Loop
//Another optimization note. //Many people like to do this
//Mov AL, [ESI] //Mov [EDI], Al //Inc ESI //Inc ESI
//There is a hidden problem here, I wont go into too much detail, but //the pentium can continue processing instructions while it is still //working out the result of INC ESI or INC EDI //(almost like a multithreaded CPU) //if, however, you go to use them while they are still being calculated //the processor will stop until they are calculated (a penalty) //Therefore I alter ESI and EDI as far in advance as possible of using them
//Pop the critical Delphi registers that we have altered pop EDI pop ESI @JustQuit: end;
function FastAnsiReplace(const S, OldPattern, NewPattern: string; Flags: TReplaceFlags): string; var BufferSize, BytesWritten: Integer; SourceString, FindString: string; ResultPChar: PChar; FindPChar, ReplacePChar: PChar; SPChar, SourceStringPChar, PrevSourceStringPChar: PChar; FinalSourceMarker: PChar; SourceLength, FindLength, ReplaceLength, CopySize: Integer; FinalSourcePosition: Integer; begin //Set up string lengths BytesWritten := 0; SourceLength := Length(S); FindLength := Length(OldPattern); ReplaceLength := Length(NewPattern); //Quick exit if (SourceLength = 0) or (FindLength = 0) or (FindLength > SourceLength) then begin Result := S; Exit; end;
//Set up the source string and find string if rfIgnoreCase in Flags then begin SourceString := AnsiUpperCase(S); FindString := AnsiUpperCase(OldPattern); endelse begin SourceString := S; FindString := OldPattern; end;
//Set up the result buffer size and pointers try if ReplaceLength <= FindLength then //Result cannot be larger, only same size or smaller BufferSize := SourceLength else //Assume a source string made entired of the sub string BufferSize := (SourceLength * ReplaceLength) div FindLength;
//10 times is okay for starters. We don't want to //go allocating much more than we need. if BufferSize > (SourceLength * 10) then BufferSize := SourceLength * 10; except //Oops, integer overflow! Better start with a string //of the same size as the source. BufferSize := SourceLength; end; SetLength(Result, BufferSize); ResultPChar := @Result[1];
//Set up the pointers to S and SourceString SPChar := @S[1]; SourceStringPChar := @SourceString[1]; PrevSourceStringPChar := SourceStringPChar; FinalSourceMarker := @SourceString[SourceLength - (FindLength - 1)];
//Set up the pointer to FindString FindPChar := @FindString[1];
//Set the pointer to ReplaceString if ReplaceLength > 0then ReplacePChar := @NewPattern[1] else ReplacePChar := nil;
//Replace routine repeat //Find the sub string SourceStringPChar := AnsiStrPos(PrevSourceStringPChar, FindPChar); if SourceStringPChar = nilthen Break; //How many characters do we need to copy before //the string occurs CopySize := SourceStringPChar - PrevSourceStringPChar;
//Check we have enough space in our Result buffer if CopySize + ReplaceLength > BufferSize - BytesWritten then begin BufferSize := Trunc((BytesWritten + CopySize + ReplaceLength) * cDeltaSize); SetLength(Result, BufferSize); ResultPChar := @Result[BytesWritten + 1]; end;
//Copy the preceeding characters to our result buffer Move(SPChar^, ResultPChar^, CopySize); Inc(BytesWritten, CopySize); //Advance the copy position of S Inc(SPChar, CopySize + FindLength); //Advance the Result pointer Inc(ResultPChar, CopySize); //Copy the replace string into the Result buffer if Assigned(ReplacePChar) then begin Move(ReplacePChar^, ResultPChar^, ReplaceLength); Inc(ResultPChar, ReplaceLength); Inc(BytesWritten, ReplaceLength); end;
//Fake delete the start of the source string PrevSourceStringPChar := SourceStringPChar + FindLength; until (PrevSourceStringPChar > FinalSourceMarker) or not (rfReplaceAll in Flags);
FinalSourcePosition := Integer(SPChar - @S[1]); CopySize := SourceLength - FinalSourcePosition; SetLength(Result, BytesWritten + CopySize); if CopySize > 0then Move(SPChar^, Result[BytesWritten + 1], CopySize); end;
function FastReplace(const aSourceString : string; const aFindString, aReplaceString : string; CaseSensitive : Boolean = False) : string; var PResult : PChar; PReplace : PChar; PSource : PChar; PFind : PChar; PPosition : PChar; CurrentPos, BytesUsed, lResult, lReplace, lSource, lFind : Integer; Find : TFastPosProc; CopySize : Integer; JumpTable : TBMJumpTable; begin LSource := Length(aSourceString); if LSource = 0thenbegin Result := aSourceString; exit; end; PSource := @aSourceString[1];
LFind := Length(aFindString); if LFind = 0then exit; PFind := @aFindString[1];
LReplace := Length(aReplaceString);
//Here we may get an Integer Overflow, or OutOfMemory, if so, we use a Delta try if LReplace <= LFind then SetLength(Result,lSource) else SetLength(Result, (LSource *LReplace) div LFind); except SetLength(Result,0); end;
LResult := Length(Result); if LResult = 0thenbegin LResult := Trunc((LSource + LReplace) * cDeltaSize); SetLength(Result, LResult); end;
PResult := @Result[1];
if CaseSensitive then begin MakeBMTable(PChar(AFindString), lFind, JumpTable); Find := BMPos; endelse begin MakeBMTableNoCase(PChar(AFindString), lFind, JumpTable); Find := BMPosNoCase; end;
BytesUsed := 0; if LReplace > 0thenbegin PReplace := @aReplaceString[1]; repeat PPosition := Find(PSource,PFind,lSource, lFind, JumpTable); if PPosition = nilthen break;
CopySize := PPosition - PSource; Inc(BytesUsed, CopySize + LReplace);
if BytesUsed >= LResult thenbegin //We have run out of space CurrentPos := Integer(PResult) - Integer(@Result[1]) +1; LResult := Trunc(LResult * cDeltaSize); SetLength(Result,LResult); PResult := @Result[CurrentPos]; end;
FastCharMove(PSource^,PResult^,CopySize); Dec(lSource,CopySize + LFind); Inc(PSource,CopySize + LFind); Inc(PResult,CopySize);
FastCharMove(PReplace^,PResult^,LReplace); Inc(PResult,LReplace);
until lSource < lFind; endelsebegin repeat PPosition := Find(PSource,PFind,lSource, lFind, JumpTable); if PPosition = nilthen break;
CopySize := PPosition - PSource; FastCharMove(PSource^,PResult^,CopySize); Dec(lSource,CopySize + LFind); Inc(PSource,CopySize + LFind); Inc(PResult,CopySize); Inc(BytesUsed, CopySize); until lSource < lFind; end;
SetLength(Result, (PResult+LSource) - @Result[1]); if LSource > 0then FastCharMove(PSource^, Result[BytesUsed + 1], LSource); end;
function FastTagReplace(const SourceString, TagStart, TagEnd: string; FastTagReplaceProc: TFastTagReplaceProc; const UserData: Integer): string; var TagStartPChar: PChar; TagEndPChar: PChar; SourceStringPChar: PChar; TagStartFindPos: PChar; TagEndFindPos: PChar; TagStartLength: Integer; TagEndLength: Integer; DestPChar: PChar; FinalSourceMarkerStart: PChar; FinalSourceMarkerEnd: PChar; BytesWritten: Integer; BufferSize: Integer; CopySize: Integer; ReplaceString: string;
procedure AddBuffer(const Buffer: Pointer; Size: Integer); begin if BytesWritten + Size > BufferSize then begin BufferSize := Trunc(BufferSize * cDeltaSize); if BufferSize <= (BytesWritten + Size) then BufferSize := Trunc((BytesWritten + Size) * cDeltaSize); SetLength(Result, BufferSize); DestPChar := @Result[BytesWritten + 1]; end; Inc(BytesWritten, Size); FastCharMove(Buffer^, DestPChar^, Size); DestPChar := DestPChar + Size; end;
begin Assert(Assigned(@FastTagReplaceProc)); TagStartPChar := PChar(TagStart); TagEndPChar := PChar(TagEnd); if (SourceString = '') or (TagStart = '') or (TagEnd = '') then begin Result := SourceString; Exit; end;
SourceStringPChar := PChar(SourceString); TagStartLength := Length(TagStart); TagEndLength := Length(TagEnd); FinalSourceMarkerEnd := SourceStringPChar + Length(SourceString) - TagEndLength; FinalSourceMarkerStart := FinalSourceMarkerEnd - TagStartLength;
BytesWritten := 0; BufferSize := Length(SourceString); SetLength(Result, BufferSize); DestPChar := @Result[1];
repeat TagStartFindPos := AnsiStrPos(SourceStringPChar, TagStartPChar); if (TagStartFindPos = nil) or (TagStartFindPos > FinalSourceMarkerStart) then Break; TagEndFindPos := AnsiStrPos(TagStartFindPos + TagStartLength, TagEndPChar); if (TagEndFindPos = nil) or (TagEndFindPos > FinalSourceMarkerEnd) then Break; CopySize := TagStartFindPos - SourceStringPChar; AddBuffer(SourceStringPChar, CopySize); CopySize := TagEndFindPos - (TagStartFindPos + TagStartLength); SetLength(ReplaceString, CopySize); if CopySize > 0then Move((TagStartFindPos + TagStartLength)^, ReplaceString[1], CopySize); FastTagReplaceProc(ReplaceString, UserData); if Length(ReplaceString) > 0then AddBuffer(@ReplaceString[1], Length(ReplaceString)); SourceStringPChar := TagEndFindPos + TagEndLength; until SourceStringPChar > FinalSourceMarkerStart; CopySize := PChar(@SourceString[Length(SourceString)]) - (SourceStringPChar - 1); if CopySize > 0then AddBuffer(SourceStringPChar, CopySize); SetLength(Result, BytesWritten); end;
function SmartPos(const SearchStr,SourceStr : string; const CaseSensitive : Boolean = TRUE; const StartPos : Integer = 1; const ForwardSearch : Boolean = TRUE) : Integer; begin // NOTE: When using StartPos, the returned value is absolute! if (CaseSensitive) then if (ForwardSearch) then Result:= FastPos(SourceStr,SearchStr,Length(SourceStr),Length(SearchStr),StartPos) else Result:= FastPosBack(SourceStr,SearchStr,Length(SourceStr),Length(SearchStr),StartPos) else if (ForwardSearch) then Result:= FastPosNoCase(SourceStr,SearchStr,Length(SourceStr),Length(SearchStr),StartPos) else Result:= FastPosBackNoCase(SourceStr,SearchStr,Length(SourceStr),Length(SearchStr),StartPos) end;
var I: Integer; initialization {$IFNDEF LINUX} for I:=0to255do GUpcaseTable[I] := Chr(I); CharUpperBuff(@GUpcaseTable[0], 256); {$ELSE} for I:=0to255do GUpcaseTable[I] := UpCase(Chr(I)); {$ENDIF} GUpcaseLUT := @GUpcaseTable[0]; end.
FastStringFuncs.pas
//================================================== //All code herein is copyrighted by //Peter Morris //----- //Do not alter / remove this copyright notice //Email me at : Этот адрес электронной почты защищён от спам-ботов. У вас должен быть включен JavaScript для просмотра. // //The homepage for this library is https://www.droopyeyes.com // //(Check out www.HowToDoThings.com for Delphi articles !) //(Check out www.stuckindoors.com if you need a free events page on your site !)
unit FastStringFuncs;
interface
uses {$IFDEF LINUX} QGraphics, {$ELSE} Graphics, {$ENDIF} FastStrings, Sysutils, Classes;
const cHexChars = '0123456789ABCDEF'; cSoundexTable: array[65..122] of Byte = ({A}0, {B}1, {C}2, {D}3, {E}0, {F}1, {G}2, {H}0, {I}0, {J}2, {K}2, {L}4, {M}5, {N}5, {O}0, {P}1, {Q}2, {R}6, {S}2, {T}3, {U}0, {V}1, {W}0, {X}2, {Y}0, {Z}2, 0, 0, 0, 0, 0, 0, {a}0, {b}1, {c}2, {d}3, {e}0, {f}1, {g}2, {h}0, {i}0, {j}2, {k}2, {l}4, {m}5, {n}5, {o}0, {p}1, {q}2, {r}6, {s}2, {t}3, {u}0, {v}1, {w}0, {x}2, {y}0, {z}2);
function Base64Encode(const Source: AnsiString): AnsiString; function Base64Decode(const Source: string): string; function CopyStr(const aSourceString : string; aStart, aLength : Integer) : string; function Decrypt(const S: string; Key: Word): string; function Encrypt(const S: string; Key: Word): string; function ExtractHTML(S : string) : string; function ExtractNonHTML(S : string) : string; function HexToInt(aHex : string) : int64; function LeftStr(const aSourceString : string; Size : Integer) : string; function StringMatches(Value, Pattern : string) : Boolean; function MissingText(Pattern, Source : string; SearchText : string = '?') : string; function RandomFileName(aFilename : string) : string; function RandomStr(aLength : Longint) : string; function ReverseStr(const aSourceString: string): string; function RightStr(const aSourceString : string; Size : Integer) : string; function RGBToColor(aRGB : string) : TColor; function StringCount(const aSourceString, aFindString : string; Const CaseSensitive : Boolean = TRUE) : Integer; function SoundEx(const aSourceString: string): Integer; function UniqueFilename(aFilename : string) : string; function URLToText(aValue : string) : string; function WordAt(Text : string; Position : Integer) : string;
procedure Split(aValue : string; aDelimiter : Char; var Result : TStrings);
implementation const cKey1 = 52845; cKey2 = 22719; Base64_Table : shortstring = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
function StripHTMLorNonHTML(const S : string; WantHTML : Boolean) : string; forward;
//Encode to Base64 function Base64Encode(const Source: AnsiString): AnsiString; var NewLength: Integer; begin NewLength := ((2 + Length(Source)) div3) * 4; SetLength( Result, NewLength);
asm Push ESI Push EDI Push EBX Lea EBX, Base64_Table Inc EBX // Move past String Size (ShortString) Mov EDI, Result Mov EDI, [EDI] Mov ESI, Source Mov EDX, [ESI-4] //Length of Input String @WriteFirst2: CMP EDX, 0 JLE @Done MOV AL, [ESI] SHR AL, 2 {$IFDEF VER140}// Changes to BASM in D6 XLATB {$ELSE} XLAT {$ENDIF} MOV [EDI], AL INC EDI MOV AL, [ESI + 1] MOV AH, [ESI] SHR AX, 4 AND AL, 63 {$IFDEF VER140}// Changes to BASM in D6 XLATB {$ELSE} XLAT {$ENDIF} MOV [EDI], AL INC EDI CMP EDX, 1 JNE @Write3 MOV AL, 61// Add == MOV [EDI], AL INC EDI MOV [EDI], AL INC EDI JMP @Done @Write3: MOV AL, [ESI + 2] MOV AH, [ESI + 1] SHR AX, 6 AND AL, 63 {$IFDEF VER140}// Changes to BASM in D6 XLATB {$ELSE} XLAT {$ENDIF} MOV [EDI], AL INC EDI CMP EDX, 2 JNE @Write4 MOV AL, 61// Add = MOV [EDI], AL INC EDI JMP @Done @Write4: MOV AL, [ESI + 2] AND AL, 63 {$IFDEF VER140}// Changes to BASM in D6 XLATB {$ELSE} XLAT {$ENDIF} MOV [EDI], AL INC EDI ADD ESI, 3 SUB EDX, 3 JMP @WriteFirst2 @done: Pop EBX Pop EDI Pop ESI end; end;
//Decode Base64 function Base64Decode(const Source: string): string; var NewLength: Integer; begin { NB: On invalid input this routine will simply skip the bad data, a better solution would probably report the error
ESI -> Source String EDI -> Result String
ECX -> length of Source (number of DWords) EAX -> 32 Bits from Source EDX -> 24 Bits Decoded
BL -> Current number of bytes decoded }
SetLength( Result, (Length(Source) div4) * 3); NewLength := 0; asm Push ESI Push EDI Push EBX
Mov ESI, Source
Mov EDI, Result //Result address Mov EDI, [EDI]
Or ESI,ESI // Nil Strings Jz @Done
Mov ECX, [ESI-4] Shr ECX,2// DWord Count
JeCxZ @Error // Empty String
Cld
jmp @Read4
@Next: Dec ECX Jz @Done
@Read4: lodsd
Xor BL, BL Xor EDX, EDX
Call @DecodeTo6Bits Shl EDX, 6 Shr EAX,8 Call @DecodeTo6Bits Shl EDX, 6 Shr EAX,8 Call @DecodeTo6Bits Shl EDX, 6 Shr EAX,8 Call @DecodeTo6Bits
// Write Word
Or BL, BL JZ @Next // No Data
Dec BL Or BL, BL JZ @Next // Minimum of 2 decode values to translate to 1 byte
Mov EAX, EDX
Cmp BL, 2 JL @WriteByte
Rol EAX, 8
BSWAP EAX
StoSW
Add NewLength, 2
@WriteByte: Cmp BL, 2 JE @Next SHR EAX, 16 StoSB
Inc NewLength jmp @Next
@Error: jmp @Done
@DecodeTo6Bits:
@TestLower: Cmp AL, 'a' Jl @TestCaps Cmp AL, 'z' Jg @Skip Sub AL, 71 Jmp @Finish
@TestCaps: Cmp AL, 'A' Jl @TestEqual Cmp AL, 'Z' Jg @Skip Sub AL, 65 Jmp @Finish
@TestEqual: Cmp AL, '=' Jne @TestNum // Skip byte ret
@TestNum: Cmp AL, '9' Jg @Skip Cmp AL, '0' JL @TestSlash Add AL, 4 Jmp @Finish
@TestSlash: Cmp AL, '/' Jne @TestPlus Mov AL, 63 Jmp @Finish
@TestPlus: Cmp AL, '+' Jne @Skip Mov AL, 62
@Finish: Or DL, AL Inc BL
@Skip: Ret
@Done: Pop EBX Pop EDI Pop ESI
end;
SetLength( Result, NewLength); // Trim off the excess end;
//Encrypt a string function Encrypt(const S: string; Key: Word): string; var I: byte; begin SetLength(result,length(s)); for I := 1to Length(S) do begin Result[I] := char(byte(S[I]) xor (Key shr8)); Key := (byte(Result[I]) + Key) * cKey1 + cKey2; end; end;
//Return only the HTML of a string function ExtractHTML(S : string) : string; begin Result := StripHTMLorNonHTML(S, True); end;
function CopyStr(const aSourceString : string; aStart, aLength : Integer) : string; var L : Integer; begin L := Length(aSourceString); if L=0then Exit; if (aStart < 1) or (aLength < 1) then Exit;
if aStart + (aLength-1) > L then aLength := L - (aStart-1);
if (aStart <1) then exit;
SetLength(Result,aLength); FastCharMove(aSourceString[aStart], Result[1], aLength); end;
//Take all HTML out of a string function ExtractNonHTML(S : string) : string; begin Result := StripHTMLorNonHTML(S,False); end;
//Decrypt a string encoded with Encrypt function Decrypt(const S: string; Key: Word): string; var I: byte; begin SetLength(result,length(s)); for I := 1to Length(S) do begin Result[I] := char(byte(S[I]) xor (Key shr8)); Key := (byte(S[I]) + Key) * cKey1 + cKey2; end; end;
//Convert a text-HEX value (FF0088 for example) to an integer function HexToInt(aHex : string) : int64; var Multiplier : Int64; Position : Byte; Value : Integer; begin Result := 0; Multiplier := 1; Position := Length(aHex); while Position >0dobegin Value := FastCharPosNoCase(cHexChars, aHex[Position], 1)-1; if Value = -1then raise Exception.Create('Invalid hex character ' + aHex[Position]);
Result := Result + (Value * Multiplier); Multiplier := Multiplier * 16; Dec(Position); end; end;
//Get the left X amount of chars function LeftStr(const aSourceString : string; Size : Integer) : string; begin if Size > Length(aSourceString) then Result := aSourceString elsebegin SetLength(Result, Size); Move(aSourceString[1],Result[1],Size); end; end;
//Do strings match with wildcards, eg //StringMatches('The cat sat on the mat', 'The * sat * the *') = True function StringMatches(Value, Pattern : string) : Boolean; var NextPos, Star1, Star2 : Integer; NextPattern : string; begin Star1 := FastCharPos(Pattern,'*',1); if Star1 = 0then Result := (Value = Pattern) else begin Result := (Copy(Value,1,Star1-1) = Copy(Pattern,1,Star1-1)); if Result then begin if Star1 > 1then Value := Copy(Value,Star1,Length(Value)); Pattern := Copy(Pattern,Star1+1,Length(Pattern));
NextPattern := Pattern; Star2 := FastCharPos(NextPattern, '*',1); if Star2 > 0then NextPattern := Copy(NextPattern,1,Star2-1);
//pos(NextPattern,Value); NextPos := FastPos(Value, NextPattern, Length(Value), Length(NextPattern), 1); if (NextPos = 0) andnot (NextPattern = '') then Result := False else begin Value := Copy(Value,NextPos,Length(Value)); if Pattern = ''then Result := True else Result := Result and StringMatches(Value,Pattern); end; end; end; end;
//Missing text will tell you what text is missing, eg //MissingText('the ? sat on the mat','the cat sat on the mat','?') = 'cat' function MissingText(Pattern, Source : string; SearchText : string = '?') : string; var Position : Longint; BeforeText, AfterText : string; BeforePos, AfterPos : Integer; lSearchText, lBeforeText, lAfterText, lSource : Longint; begin Result := ''; Position := Pos(SearchText,Pattern); if Position = 0then exit;
lSearchText := Length(SearchText); lSource := Length(Source); BeforeText := Copy(Pattern,1,Position-1); AfterText := Copy(Pattern,Position+lSearchText,lSource);
lBeforeText := Length(BeforeText); lAfterText := Length(AfterText);
AfterPos := lBeforeText; repeat AfterPos := FastPosNoCase(Source,AfterText,lSource,lAfterText,AfterPos+lSearchText); if AfterPos > 0thenbegin BeforePos := FastPosBackNoCase(Source,BeforeText,AfterPos-1,lBeforeText,AfterPos - (lBeforeText-1)); if (BeforePos > 0) thenbegin Result := Copy(Source,BeforePos + lBeforeText, AfterPos - (BeforePos + lBeforeText)); Break; end; end; until AfterPos = 0; end;
//Generates a random filename but preserves the original path + extension function RandomFilename(aFilename : string) : string; var Path, Filename, Ext : string; begin Result := aFilename; Path := ExtractFilepath(aFilename); Ext := ExtractFileExt(aFilename); Filename := ExtractFilename(aFilename); if Length(Ext) > 0then Filename := Copy(Filename,1,Length(Filename)-Length(Ext)); repeat Result := Path + RandomStr(32) + Ext; untilnot FileExists(Result); end;
//Makes a string of aLength filled with random characters function RandomStr(aLength : Longint) : string; var X : Longint; begin if aLength <= 0then exit; SetLength(Result, aLength); for X:=1to aLength do Result[X] := Chr(Random(26) + 65); end;
function ReverseStr(const aSourceString: string): string; var L : Integer; S, D : Pointer; begin L := Length(aSourceString); SetLength(Result,L); if L = 0then exit;
S := @aSourceString[1]; D := @Result[L];
asm push ESI push EDI
mov ECX, L mov ESI, S mov EDI, D
@Loop: mov Al, [ESI] inc ESI mov [EDI], Al dec EDI dec ECX jnz @Loop
pop EDI pop ESI end; end;
//Returns X amount of chars from the right of a string function RightStr(const aSourceString : string; Size : Integer) : string; begin if Size > Length(aSourceString) then Result := aSourceString elsebegin SetLength(Result, Size); FastCharMove(aSourceString[Length(aSourceString)-(Size-1)],Result[1],Size); end; end;
//Converts a typical HTML RRGGBB color to a TColor function RGBToColor(aRGB : string) : TColor; begin if Length(aRGB) < 6thenraise EConvertError.Create('Not a valid RGB value'); if aRGB[1] = '#'then aRGB := Copy(aRGB,2,Length(aRGB)); if Length(aRGB) <> 6thenraise EConvertError.Create('Not a valid RGB value');
Result := HexToInt(aRGB); asm mov EAX, Result BSwap EAX shr EAX, 8 mov Result, EAX end; end;
//Splits a delimited text line into TStrings (does not account for stuff in quotes but it should) procedure Split(aValue : string; aDelimiter : Char; var Result : TStrings); var X : Integer; S : string; begin if Result = nilthen Result := TStringList.Create; Result.Clear; S := ''; for X:=1to Length(aValue) dobegin if aValue[X] <> aDelimiter then S:=S + aValue[X] elsebegin Result.Add(S); S := ''; end; end; if S <> ''then Result.Add(S); end;
//counts how many times a substring exists within a string //StringCount('XXXXX','XX') would return 2 function StringCount(const aSourceString, aFindString : string; Const CaseSensitive : Boolean = TRUE) : Integer; var Find, Source, NextPos : PChar; LSource, LFind : Integer; Next : TFastPosProc; JumpTable : TBMJumpTable; begin Result := 0; LSource := Length(aSourceString); if LSource = 0then exit;
LFind := Length(aFindString); if LFind = 0then exit;
if CaseSensitive then begin Next := BMPos; MakeBMTable(PChar(aFindString), Length(aFindString), JumpTable); endelse begin Next := BMPosNoCase; MakeBMTableNoCase(PChar(aFindString), Length(aFindString), JumpTable); end;
Source := @aSourceString[1]; Find := @aFindString[1];
repeat NextPos := Next(Source, Find, LSource, LFind, JumpTable); if NextPos <> nilthen begin Dec(LSource, (NextPos - Source) + LFind); Inc(Result); Source := NextPos + LFind; end; until NextPos = nil; end;
function SoundEx(const aSourceString: string): Integer; var CurrentChar: PChar; I, S, LastChar, SoundexGroup: Byte; Multiple: Word; begin if aSourceString = ''then Result := 0 else begin //Store first letter immediately Result := Ord(Upcase(aSourceString[1]));
//Last character found = 0 LastChar := 0; Multiple := 26;
//Point to first character CurrentChar := @aSourceString[1];
for I := 1to Length(aSourceString) do begin Inc(CurrentChar);
S := Ord(CurrentChar^); if (S > 64) and (S < 123) then begin SoundexGroup := cSoundexTable[S]; if (SoundexGroup <> LastChar) and (SoundexGroup > 0) then begin Inc(Result, SoundexGroup * Multiple); if Multiple = 936then Break; {26 * 6 * 6} Multiple := Multiple * 6; LastChar := SoundexGroup; end; end; end; end; end;
//Used by ExtractHTML and ExtractNonHTML function StripHTMLorNonHTML(const S : string; WantHTML : Boolean) : string; var X: Integer; TagCnt: Integer; ResChar: PChar; SrcChar: PChar; begin TagCnt := 0; SetLength(Result, Length(S)); if Length(S) = 0then Exit;
ResChar := @Result[1]; SrcChar := @S[1]; for X:=1to Length(S) do begin case SrcChar^ of '<': begin Inc(TagCnt); if WantHTML and (TagCnt = 1) then begin ResChar^ := '<'; Inc(ResChar); end; end; '>': begin Dec(TagCnt); if WantHTML and (TagCnt = 0) then begin ResChar^ := '>'; Inc(ResChar); end; end; else case WantHTML of False: if TagCnt <= 0then begin ResChar^ := SrcChar^; Inc(ResChar); TagCnt := 0; end; True: if TagCnt >= 1then begin ResChar^ := SrcChar^; Inc(ResChar); endelse if TagCnt < 0then TagCnt := 0; end; end; Inc(SrcChar); end; SetLength(Result, ResChar - PChar(@Result[1])); Result := FastReplace(Result, ' ', ' ', False); Result := FastReplace(Result,'&','&', False); Result := FastReplace(Result,'<','<', False); Result := FastReplace(Result,'>','>', False); Result := FastReplace(Result,'"','"', False); end;
//Generates a UniqueFilename, makes sure the file does not exist before returning a result function UniqueFilename(aFilename : string) : string; var Path, Filename, Ext : string; Index : Integer; begin Result := aFilename; if FileExists(aFilename) thenbegin Path := ExtractFilepath(aFilename); Ext := ExtractFileExt(aFilename); Filename := ExtractFilename(aFilename); if Length(Ext) > 0then Filename := Copy(Filename,1,Length(Filename)-Length(Ext)); Index := 2; repeat Result := Path + Filename + IntToStr(Index) + Ext; Inc(Index); untilnot FileExists(Result); end; end;
//Decodes all that %3c stuff you get in a URL function URLToText(aValue : string) : string; var X : Integer; begin Result := ''; X := 1; while X <= Length(aValue) dobegin if aValue[X] <> '%'then Result := Result + aValue[X] elsebegin Result := Result + Chr( HexToInt( Copy(aValue,X+1,2) ) ); Inc(X,2); end; Inc(X); end; end;
//Returns the whole word at a position function WordAt(Text : string; Position : Integer) : string; var L, X : Integer; begin Result := ''; L := Length(Text);
if (Position > L) or (Position < 1) then Exit; for X:=Position to L dobegin if Upcase(Text[X]) in ['A'..'Z','0'..'9'] then Result := Result + Text[X] else Break; end;
for X:=Position-1downto1dobegin if Upcase(Text[X]) in ['A'..'Z','0'..'9'] then Result := Text[X] + Result else Break; end; end;
end.
|
Просьба писать ваши замечания, наблюдения и все остальное,
что поможет улучшить предоставляемую информацию на этом сайте.
ВСЕ КОММЕНТАРИИ МОДЕРИРУЮТСЯ ВРУЧНУЮ, ТАК ЧТО СПАМИТЬ БЕСПОЛЕЗНО!