How to search for a pattern in a file #155
Question
I need to locate a pattern in a file (both text and binary) - just like
the Pos function does with the strings. Preferably, it should
deal with TFileStream. Straightforward solution first seemed
kind of expensive - that is to just plainly go through the stream
comparing patterns on every step.
Answer 1:
You can do it that way but it is much faster to load chunks of data into a sizeable buffer and do the search in the buffer. Here is an example:
function ScanFile( const filename: String; const forString: String; caseSensitive: Boolean ): LongInt; { returns position of string in file or -1, if not found } const BufferSize= $8001; { 32K + 1 bytes } var pBuf, pEnd, pScan, pPos: Pchar; filesize: LongInt; bytesRemaining: LongInt; bytesToRead: Word; F: File; SearchFor: Pchar; oldMode: Word; begin Result := - 1; { assume failure } if (Length( forString ) = 0) or (Length( filename ) = 0) then Exit; SearchFor := Nil; pBuf := Nil; { open file as binary, 1 byte recordsize } AssignFile( F, filename ); oldMode := FileMode; FileMode := 0; { read-only access } Reset( F, 1 ); FileMode := oldMode; try { allocate memory for buffer and pchar search string } SearchFor := StrAlloc( Length( forString ) +1 ); StrPCopy( SearchFor, forString ); if not caseSensitive then { convert to upper case } AnsiUpper( SearchFor ); GetMem( pBuf, BufferSize ); filesize := System.Filesize( F ); bytesRemaining := filesize; pPos := Nil; while bytesRemaining > 0 do begin { calc how many bytes to read this round } if bytesRemaining >= BufferSize then bytesToRead := Pred( BufferSize ) else bytesToRead := bytesRemaining; { read a buffer full and zero-terminate the buffer } BlockRead( F, pBuf^, bytesToRead, bytesToRead ); pEnd := @pBuf[ bytesToRead ]; pEnd^ := #0; { scan the buffer. Problem: buffer may contain #0 chars! So we treat it as a concatenation of zero-terminated strings. } pScan := pBuf; while pScan < pEnd do begin if not caseSensitive then { convert to upper case } AnsiUpper( pScan ); pPos := StrPos( pScan, SearchFor ); { search for substring } if pPos <> Nil then begin { Found it! } Result := FileSize - bytesRemaining + LongInt( pPos ) - LongInt( pBuf ); Break; end; pScan := StrEnd( pScan ); Inc( pScan ); end; if pPos <> Nil then Break; bytesRemaining := bytesRemaining - bytesToRead; if bytesRemaining > 0 then begin { no luck in this buffers load. We need to handle the case of the search string spanning two chunks of file now. We simply go back a bit in the file and read from there, thus inspecting some characters twice } Seek( F, FilePos(F) - Length( forString )); bytesRemaining := bytesRemaining + Length( forString ); end; end; finally CloseFile( F ); if SearchFor <> Nil then StrDispose( SearchFor ); if pBuf <> Nil then FreeMem( pBuf, BufferSize ); end; end;
Tip by Peter Below
Answer 2:
procedure TForm1.Button1Click(Sender: TObject);
var
s: String;
hFile: THandle;
hFileMapObj: THandle;
pSharedBuf: Pointer;
Time0: Integer;
p: PChar;
begin
if not OpenDialog1.Execute then
Exit;
s := InputBox('Find','Match','');
Time0 := GetTickCount;
hfile := 0;
hFileMapObj := 0;
pSharedBuf := nil;
try
hFile := FileOpen(OpenDialog1.FileName, fmOpenRead);
Win32Check(hFileMapObj <> INVALID_HANDLE_VALUE);
hFileMapObj :=
CreateFileMapping(hFile, nil, PAGE_READONLY, 0, 0, nil);
Win32Check(hFileMapObj <> 0);
pSharedBuf := MapViewOfFile(hFileMapObj, FILE_MAP_READ, 0, 0, 0);
Win32Check(pSharedBuf <> nil);
P := StrPos(PChar(pSharedBuf), PChar(s));
finally
if pSharedBuf <> nil then
UnMapViewOfFile(pSharedBuf);
if hFileMapObj <> 0 then
CloseHandle(hFileMapObj);
if hFile <> 0 then
CloseHandle(hFile);
end;
if P = nil then
Caption := Format('Not found, ticks=%d', [GetTickCount - Time0])
else
Caption := Format(
'Found it at pos %d, ticks=%d',
[Integer(P - PChar(pSharedBuf)), GetTickCount - Time0]);
end;
procedure TForm1.Button1Click(Sender: TObject); var s: String; hFile: THandle; hFileMapObj: THandle; pSharedBuf: Pointer; Time0: Integer; p: PChar; begin if not OpenDialog1.Execute then Exit; s := InputBox('Find','Match',''); Time0 := GetTickCount; hfile := 0; hFileMapObj := 0; pSharedBuf := nil; try hFile := FileOpen(OpenDialog1.FileName, fmOpenRead); Win32Check(hFileMapObj <> INVALID_HANDLE_VALUE); hFileMapObj := CreateFileMapping(hFile, nil, PAGE_READONLY, 0, 0, nil); Win32Check(hFileMapObj <> 0); pSharedBuf := MapViewOfFile(hFileMapObj, FILE_MAP_READ, 0, 0, 0); Win32Check(pSharedBuf <> nil); P := StrPos(PChar(pSharedBuf), PChar(s)); finally if pSharedBuf <> nil then UnMapViewOfFile(pSharedBuf); if hFileMapObj <> 0 then CloseHandle(hFileMapObj); if hFile <> 0 then CloseHandle(hFile); end; if P = nil then Caption := Format('Not found, ticks=%d', [GetTickCount - Time0]) else Caption := Format( 'Found it at pos %d, ticks=%d', [Integer(P - PChar(pSharedBuf)), GetTickCount - Time0]); end;
Tip by Leonid Troyanovsky
Original resource: | The Delphi Pool |
---|---|
Author: | P Below & L Troyanovsky |
Added: | 2010/02/22 |
Last updated: | 2010/02/22 |