用这个函数进行字符串替换操作,比Delphi自带的ReplaceString要快N倍,效率一流,非常快,不愧为汇编级函数操作,哈哈.
下载函数单元文件:freplace.rar
delphi代码
- unit FReplace;
- interface
- Type
- TFastPosProc = function(
- const aSourceString, aFindString : String;
- const aSourceLen, aFindLen, StartPos : integer
- ) : integer;
- function FastReplace(
- var aSourceString : String;
- const aFindString, aReplaceString : String;
- CaseSensitive : Boolean = False) : String;
- function FastPos(
- const aSourceString, aFindString : String;
- const aSourceLen, aFindLen, StartPos : integer
- ) : integer;
- function FastPosNoCase(
- const aSourceString, aFindString : String;
- const aSourceLen, aFindLen, StartPos : integer
- ) : integer;
- implementation
- // This TYPE declaration will become apparent later.
- //The first thing to note here is that I’m passing the SourceLength and FindL
- //ength. As neither Source nor Find will alter at any point during FastReplace
- //, there’s no need to call the LENGTH subroutine each time!
- function FastPos(
- const aSourceString, aFindString : String;
- const aSourceLen, aFindLen, StartPos : integer
- ) : integer;
- var
- SourceLen : integer;
- begin
- // Next, we determine how many bytes we need to
- // scan to find the "start" of aFindString.
- SourceLen := aSourceLen;
- SourceLen := SourceLen - aFindLen;
- if (StartPos-1) > SourceLen then begin
- Result := 0;
- Exit;
- end;
- SourceLen := SourceLen - StartPos;
- SourceLen := SourceLen +2;
- // The ASM starts here.
- asm
- // Delphi uses ESI, EDI, and EBX a lot,
- // so we must preserve them.
- push ESI
- push EDI
- push EBX
- // Get the address of sourceString[1]
- // and Add (StartPos-1).
- // We do this for the purpose of finding
- // the NEXT occurrence, rather than
- // always the first!
- mov EDI, aSourceString
- add EDI, StartPos
- Dec EDI
- // Get the address of aFindString.
- mov ESI, aFindString
- // Note how many bytes we need to
- // look through in aSourceString
- // to find aFindString.
- mov ECX, SourceLen
- // Get the first char of aFindString;
- // note how it is done outside of the
- // main loop, as it never changes!
- Mov Al, [ESI]
- // Now the FindFirstCharacter loop!
- @ScaSB:
- // Get the value of the current
- // character in aSourceString.
- // This is equal to ah := EDI^, that
- // is what the [] are around [EDI].
- Mov Ah, [EDI]
- // Compare this character with aDestString[1].
- cmp Ah,Al
- // If they're not equal we don't
- // compare the strings.
- jne @NextChar
- // If they're equal, obviously we do!
- @CompareStrings:
- // Put the length of aFindLen in EBX.
- mov EBX, aFindLen
- // We DEC EBX to point to the end of
- // the string; that is, we don't want to
- // add 1 if aFindString is 1 in length!
- dec EBX
- // add by ShengQuanhu
- // If EBX is zero, then we've successfully
- // compared each character; i.e. it's A MATCH!
- // It will be happened when aFindLen=1
- Jz @EndOfMatch
- //add end
- //Here’s another optimization tip. People at this point usually PUSH ESI and
- //so on and then POP ESI and so forth at the end–instead, I opted not to chan
- //ge ESI and so on at all. This saves lots of pushing and popping!
- @CompareNext:
- // Get aFindString character +
- // aFindStringLength (the last char).
- mov Al, [ESI+EBX]
- // Get aSourceString character (current
- // position + aFindStringLength).
- mov Ah, [EDI+EBX]
- // Compare them.
- cmp Al, Ah
- Jz @Matches
- // If they don't match, we put the first char
- // of aFindString into Al again to continue
- // looking for the first character.
- Mov Al, [ESI]
- Jmp @NextChar
- @Matches:
- // If they match, we DEC EBX (point to
- // previous character to compare).
- Dec EBX
- // If EBX <> 0 ("J"ump "N"ot "Z"ero), we
- // continue comparing strings.
- Jnz @CompareNext
- //add by Shengquanhu
- @EndOfMatch:
- //add end
- // If EBX is zero, then we've successfully
- // compared each character; i.e. it's A MATCH!
- // Move the address of the *current*
- // character in EDI.
- // Note, we haven't altered EDI since
- // the first char was found.
- mov EAX, EDI
- // This is an address, so subtract the
- // address of aSourceString[1] to get
- // an actual character position.
- sub EAX, aSourceString
- // Inc EAX to make it 1-based,
- // rather than 0-based.
- inc EAX
- // Put it into result.
- mov Result, EAX
- // Finish this routine!
- jmp @TheEnd
- @NextChar:
- //This is where I jump to when I want to continue searching for the first char
- //acter of aFindString in aSearchString:
- // Point EDI (aFindString[X]) to
- // the next character.
- Inc EDI
- // Dec ECX tells us that we've checked
- // another character, and that we're
- // fast running out of string to check!
- dec ECX
- // If EBX <> 0, then continue scanning
- // for the first character.
- //add by shengquanhu
- //if ah is chinese char,jump again
- jz @Result0
- cmp ah, $80
- jb @ScaSB
- Inc EDI
- Dec ECX
- //add by shengquanhu end
- jnz @ScaSB
- //add by shengquanhu
- @Result0:
- //add by shengquanhu end
- // If EBX = 0, then move 0 into RESULT.
- mov Result,0
- // Restore EBX, EDI, ESI for Delphi
- // to work correctly.
- // Note that they're POPped in the
- // opposite order they were PUSHed.
- @TheEnd:
- pop EBX
- pop EDI
- pop ESI
- end;
- end;
- //This routine is an identical copy of FastPOS except where commented! The ide
- //a is that when grabbing bytes, it ANDs them with $df, effectively making the
- //m lowercase before comparing. Maybe this would be quicker if aFindString was
- // made lowercase in one fell swoop at the beginning of the function, saving a
- //n AND instruction each time.
- function FastPosNoCase(
- const aSourceString, aFindString : String;
- const aSourceLen, aFindLen, StartPos : integer
- ) : integer;
- var
- SourceLen : integer;
- begin
- SourceLen := aSourceLen;
- SourceLen := SourceLen - aFindLen;
- if (StartPos-1) > SourceLen then begin
- Result := 0;
- Exit;
- end;
- SourceLen := SourceLen - StartPos;
- SourceLen := SourceLen +2;
- asm
- push ESI
- push EDI
- push EBX
- mov EDI, aSourceString
- add EDI, StartPos
- Dec EDI
- mov ESI, aFindString
- mov ECX, SourceLen
- Mov Al, [ESI]
- //add by shengquanhu:just modified the lowercase 'a'..'z'
- cmp Al, $7A
- ja @ScaSB
- cmp Al, $61
- jb @ScaSB
- //end------------------------------------------
- // Make Al uppercase.
- and Al, $df
- @ScaSB:
- Mov Ah, [EDI]
- //add by shengquanhu:just modified the lowercase 'a'..'z'
- cmp Ah, $7A
- ja @CompareChar
- cmp Ah, $61
- jb @CompareChar
- //end------------------------------------------
- // Make Ah uppercase.
- and Ah, $df
- @CompareChar:
- cmp Ah,Al
- jne @NextChar
- @CompareStrings:
- mov EBX, aFindLen
- dec EBX
- //add by ShengQuanhu
- Jz @EndOfMatch
- //add end
- @CompareNext:
- mov Al, [ESI+EBX]
- mov Ah, [EDI+EBX]
- //add by shengquanhu:just modified the lowercase 'a'..'z'
- cmp Ah, $7A
- ja @LowerAh
- cmp Al, $61
- jb @LowerAh
- //end------------------------------------------
- // Make Al and Ah uppercase.
- and Al, $df
- //add by shengquanhu:just modified the lowercase 'a'..'z'
- @LowerAh:
- cmp Ah, $7A
- ja @CompareChar2
- cmp Ah, $61
- jb @CompareChar2
- //end------------------------------------------
- and Ah, $df
- @CompareChar2:
- cmp Al, Ah
- Jz @Matches
- Mov Al, [ESI]
- //add by shengquanhu:just modified the lowercase 'a'..'z'
- cmp Al, $7A
- ja @NextChar
- cmp Al, $61
- jb @NextChar
- //end------------------------------------------
- // Make Al uppercase.
- and Al, $df
- Jmp @NextChar
- @Matches:
- Dec EBX
- Jnz @CompareNext
- //add by Shengquanhu
- @EndOfMatch:
- //add end
- mov EAX, EDI
- sub EAX, aSourceString
- inc EAX
- mov Result, EAX
- jmp @TheEnd
- @NextChar:
- Inc EDI
- dec ECX
- //add by shengquanhu
- //if ah is chinese char,jump again
- jz @Result0
- cmp ah, $80
- jb @ScaSB
- Inc EDI
- Dec ECX
- //add by shengquanhu end
- jnz @ScaSB
- @Result0:
- mov Result,0
- @TheEnd:
- pop EBX
- pop EDI
- pop ESI
- end;
- end;
- //My move isn’t as fast as MOVE when source and destination are both DWord al
- //igned, but it’s certainly faster when they’re not. As we’re moving charac
- //ters in a string, it isn’t very likely at all that both source and destinat
- //ion are DWord aligned, so moving bytes avoids the cycle penalty of reading/w
- //riting DWords across physical boundaries.
- procedure MyMove(
- const Source; var Dest; Count : Integer);
- asm
- // Note: When this function is called,
- // Delphi passes the parameters as follows:
- // ECX = Count
- // EAX = Const Source
- // EDX = Var Dest
- // If there are no bytes to copy, just quit
- // altogether; there's no point pushing registers.
- cmp ECX,0
- Je @JustQuit
- // Preserve the critical Delphi registers.
- push ESI
- push EDI
- // Move Source into ESI (generally the
- // SOURCE register).
- // Move Dest into EDI (generally the DEST
- // register for string commands).
- // This might not actually be necessary,
- // as I'm not using MOVsb etc.
- // I might be able to just use EAX and EDX;
- // there could be a penalty for not using
- // ESI, EDI, but I doubt it.
- // This is another thing worth trying!
- mov ESI, EAX
- mov EDI, EDX
- // The following loop is the same as repNZ
- // MovSB, but oddly quicker!
- @Loop:
- // Get the source byte.
- Mov AL, [ESI]
- // Point to next byte.
- Inc ESI
- // Put it into the Dest.
- mov [EDI], AL
- // Point dest to next position.
- Inc EDI
- // Dec ECX to note how many we have left to copy.
- Dec ECX
- // If ECX <> 0, then loop.
- Jnz @Loop
- // Another optimization note.
- // Many people like to do this.
- // Mov AL, [ESI]
- // Mov [EDI], Al
- // Inc ESI
- // Inc ESI
- //There’s a hidden problem here. I won’t go into too much detail, but the Pe
- //ntium can continue processing instructions while it’s still working out the
- // result of INC ESI or INC EDI. If, however, you use them while they’re stil
- //l being calculated, the processor will stop until they’re calculated (a pen
- //alty). Therefore, I alter ESI and EDI as far in advance as possible of using
- // them.
- // Pop the critical Delphi registers
- // that we've altered.
- pop EDI
- pop ESI
- @JustQuit:
- end;
- //Point 1: I pass VAR aSourceString rather than just aSourceString. This is be
- //cause I’ll just be passed a pointer to the data rather than a 10M copy of t
- //he data itself, which is much quicker!
- function FastReplace(
- var aSourceString : String;
- const aFindString, aReplaceString : String;
- CaseSensitive : Boolean = False) : String;
- var
- // Size already passed to SetLength,
- // the REAL size of RESULT.
- ActualResultLen,
- // Position of aFindString is aSourceString.
- CurrentPos,
- // Last position the aFindString was found at.
- LastPos,
- // Bytes to copy (that is, lastpos to this pos).
- BytesToCopy,
- // The "running" result length, not the actual one.
- ResultLen,
- // Length of aFindString, to save
- // calling LENGTH repetitively.
- FindLen,
- // Length of aReplaceString, for the same reason.
- ReplaceLen,
- SourceLen : Integer;
- // This is where I explain the
- // TYPE TFastPosProc from earlier!
- FastPosProc : TFastPosProc;
- begin
- //As this function has the option of being case-insensitive, I’d need to call
- // either FastPOS or FastPOSNoCase. The problem is that you’d have to do this
- // within a loop. This is a bad idea, since the result never changes throughou
- //t the whole operation–in which case we can determine it in advance, like so
- //:
- if CaseSensitive then
- FastPosProc := FastPOS
- else
- FastPOSProc := FastPOSNoCase;
- // I don't think I actually need
- // this, but I don't really mind!
- Result := '';
- // Get the lengths of the strings.
- FindLen := Length(aFindString);
- ReplaceLen := Length(aReplaceString);
- SourceLen := Length(aSourceString);
- // If we already have room for the replacements,
- // then set the length of the result to
- // the length of the SourceString.
- if ReplaceLen <= FindLen then
- ActualResultLen := SourceLen
- else
- // If not, we need to calculate the
- // worst-case scenario.
- // That is, the Source consists ONLY of
- // aFindString, and we're going to replace
- // every one of them!
- ActualResultLen :=
- SourceLen +
- (SourceLen * ReplaceLen div FindLen) +
- ReplaceLen;
- // Set the length of Result; this
- // will assign the memory, etc.
- SetLength(Result,ActualResultLen);
- CurrentPos := 1;
- ResultLen := 0;
- LastPos := 1;
- //Again, I’m eliminating an IF statement in a loop by repeating code–this ap
- //proach results in very slightly larger code, but if ever you can trade some
- //memory in exchange for speed, go for it!
- if ReplaceLen > 0 then begin
- repeat
- // Get the position of the first (or next)
- // aFindString in aSourceString.
- // Note that there's no If CaseSensitive,
- // I just call FastPOSProc, which is pointing
- // to the correct pre-determined routine.
- CurrentPos :=
- FastPosProc(aSourceString, aFindString,
- SourceLen, FindLen, CurrentPos);
- // If 0, then we're finished.
- if CurrentPos = 0 then break;
- // Number of bytes to copy from the
- // source string is CurrentPos - lastPos,
- // i.e. " cat " in "the cat the".
- BytesToCopy := CurrentPos-LastPos;
- // Copy chars from aSourceString
- // to the end of Result.
- MyMove(aSourceString[LastPos],
- Result[ResultLen+1], BytesToCopy);
- // Copy chars 
最后修改:2009 年 08 月 16 日
© 允许规范转载