Site scanner skeleton script

Free BrownRecluse scripts provided by SoftByte Labs and users. To use, copy the script and paste it in the BrownRecluse script editor. Modify to your need, save and run.
Post Reply
User avatar
Support
Site Admin
Posts: 1821
Joined: Sun Oct 02, 2011 10:49 am

Site scanner skeleton script

Post by Support » Sun Oct 02, 2011 9:17 pm

This skeleton script is functional and will list all html links at has scanned. Modify it to include images and other kind of files, either by file name or mime/type.

Code: Select all

PerlRegEx = Yes;
Output.Clear;

WebSite = Input('Site Scanner', 'Enter a URL to scan...', 'http://');
if WebSite = nothing then Terminate;

Abort = No;

DecodeURL(WebSite, [BaseHost], [HostName]);

Scan = New(Scanner);

function CheckLink();
begin
	if @Abort then begin
	  Result = No;
	  Exit;
	end;
  DecodeURL(@Scan.Location, [Host], [HostName]);
	Result = (Host = @BaseHost);
	if Result then begin
	  // Filter out URL here...
		//if @Scan.Location.Pos('&order=') > 0 then Result = False;
	end;
end;

function CheckMime();
begin
	Result = (@Scan.MimeType ~= 'text/html');
end;

function CheckData();
begin
	rx = New(RegEx);

  if @Scan.MimeType ~= 'text/html' then begin
    // Begin extract page info...
		PageTitle = Trim(Decode(WildGet(@Scan.Data, '<title>(.*)</title')));

    rx.Data = @Scan.Data;

    rx.Mask = '<meta\s+name\s*=\s*("|'')description\1\s*content=("|'')([^\2]*?)\2';
    rx.Reset;
    if rx.Match then PageDesc = Trim(Decode(rx.Value[3])) else PageDesc = Nothing;

    rx.Mask = '<meta\s+name\s*=\s*("|'')keywords\1\s*content=("|'')([^\2]*?)\2';
    rx.Reset;
    if rx.Match then PageKeyword = Trim(Decode(rx.Value[3])) else PageKeyword = Nothing;

    // End extract page info.

		// Output info as tab delimited fields...
		/*Output(
			@Scan.Location +tab+
			PageTitle      +tab+
			PageDesc       +tab+
			PageKeyword
		);*/
		Output(@Scan.Location);

    // Find links within page to scan...
		Page = New(Parser);
		Page.Parse(@Scan.Data);

		HrefTags = Page.Tags('a','href');
		for each HrefTags as Tag do
			@Scan.AddLink(Tag);

	end;
end;

Scan.Location = WebSite;

Scan.OnValidateLink   = CheckLink;
Scan.OnBeforeDownload = CheckMime;
Scan.OnAfterDownload  = CheckData;

Scan.Start;

function OnStop();
begin
	if not @Abort then begin
		Result = False;
		@Abort = True;
	end;
end;
Your support team.
http://SoftByteLabs.com

Post Reply