Example: convert %C3%A9 to é and also convert é to é
Code: Select all
SaveAsFileName = 'c:\test.html';
PerlRegEx = Yes;
Output.Clear;
function DecodeUTF8(txt);
begin
Result = '';
// This will convert %C3%A9 into é
rx = New(RegEx);
rx.Data = txt;
rx.Mask = '(\%(c[2-3])\%([8-9a-b][0-9a-f]))';
p = 1;
while rx.Match do begin
a = UpperCase(rx.Value[2]);
a = Ord(a.SubStr[2,1])-Ord('0');
a = (a - 2) * 64;
b = UpperCase(rx.Value[3]);
b1 = b.SubStr[1,1];
if b1 ~= '[0-9]' then b1 = Ord(b1)-Ord('0') else b1 = Ord(b1)-Ord('A')+10;
b2 = b.SubStr[2,1];
if b2 ~= '[0-9]' then b2 = Ord(b2)-Ord('0') else b2 = Ord(b2)-Ord('A')+10;
b = b1*16+b2-128;
Result = Result + txt.SubStr[p, rx.Pos[1]-p] + Chr(128+a+b);
p = rx.Pos[1] + rx.Len[1];
end;
Result = Result + txt.SubStr[p];
// This will convert é into é
z = 128;
for a = 194 to 195 do begin
for b = 128 to 191 do begin
Result.Replace(Chr(a)+Chr(b), Chr(z));
z++;
end;
end;
end;
Link = New(URL);
Link.Get('http://catalogue.proximus.be/PUB/gsmc_business/GSM_Catalog.jsp?language=fr');
f = New(File);
f.Open(SaveAsFileName);
f.Write(DecodeUTF8(Link.Data));
Display('Converted page save to...'+crlf+SaveAsFileName);