Code: Select all
// Edit this section as you see fit.
State = 'tx';
Business = 'used-Computers';
OutputFile = ScriptPath + Business + '.txt';
//--------------------------------------------------------------------------------------------------------------------
PerlRegEx = Yes;
Output.Clear;
Link = New(URL);
rx = New(RegEx);
sx = New(RegEx);
OutFile = New(File);
OutFile.Open(OutputFile);
OutFile.Truncate;
OutFile.Write('BusinessName'+tab+'Street'+tab+'City'+tab+'State'+tab+'ZipCode'+tab+'Phone'+tab+'Website'+crlf);
Abort = No;
CurPage = 1;
LastPage = 0;
while not Abort do begin
lnk = 'http://www.yellowpages.com/'+State+'/'+Business+'?page='+CurPage+'&sort=alpha';
Link.Get(lnk);
if LastPage = 0 then begin
LastPage = Val(WildGet(Link.Data, '<a href="[^"]+\?page=(\d+)[^"]*">Last</a>'));
Progress.Maximum = LastPage;
end;
Progress.Position = CurPage;
CurPage = Decode(WildGet(Link.Data, '<a href="[^"]+\?page=(\d+)[^"]+">Next</a>'));
sx.Data = Link.Data;
sx.Mask = '<div class="info">(.*?)</ul>';
while (sx.Match) and (not Abort) do begin
rx.Data = sx.Value[1];
BusName = Trim(Decode(WildGet(rx.Data, '<a\s+[^>]+>(.*?)</a>')));
Street = Trim(Decode(WildGet(rx.Data, 'class="street-address">(.*?)')));
City = Trim(Decode(WildGet(rx.Data, 'class="locality">(.*?)')));
st = Trim(Decode(WildGet(rx.Data, 'class="region">(.*?)')));
ZipCode = Trim(Decode(WildGet(rx.Data, 'class="postal-code">(.*?)')));
Phone = Trim(Decode(WildGet(rx.Data, 'phone">(.*?)')));
Website = Decode(WildGet(rx.Data, '<li><a href="([^"]+)" class="track-visit-website'));
DataLine =
BusName +tab+
Street +tab+
City +tab+
st +tab+
ZipCode +tab+
Phone +tab+
Website
;
Output(DataLine);
OutFile.Write(DataLine+crlf);
end;
if CurPage = Nothing then Break;
end;
function OnStop();
begin
Result = @Abort;
@Abort = Yes;
end;
function OnTerminate();
begin
if @OutFile then @OutFile.Close;
Display('Data saved to...'+crlf+@OutputFile);
end;