How can I extract domain suffix without entering http://
or https://
?
For example, if I enter stackoverflow.com
, I want to get the result of com
.
I have this function, but I must enter http://
to get the result.
Is there any way to skip entering http://
and https://
?
procedure TForm1.Button2Click(Sender: TObject);
function RatChar(S:String; C: Char):Integer;
var
i : Integer;
begin
i := Length(S);
//while (S[i] <> C) and (i > 0) do
while (i > 0) and (S[i] <> C) do
Dec(i);
Result := i;
end;
var
uri: TIdURI;
i: Integer;
begin
uri := TidURI.Create(Edit2.Text);
try
//Memo1.Lines.Add(uri.Protocol);
//Memo1.Lines.Add(uri.Host);
i := RatChar(uri.Host, '.');
Memo1.Lines.Add(Copy(uri.Host, i 1, Length(uri.Host)));
Memo1.Lines.Add(uri.Document);
finally
uri.Free;
end;
end;
CodePudding user response:
According to suggestion Extracting top-level and second-level domain from a URL using regex it should run like this in Delphi:
program Project1;
{$APPTYPE CONSOLE}
{$R *.res}
uses
System.SysUtils, System.RegularExpressions;
var
url,
rePattern: string;
rMatch : TMatch;
rGroup : TGroup;
arr : TArray<string>;
begin
try
url := 'https://www.answers.com/article/1194427/8-habits-of-extraordinarily likeable-people';
//url := 'https://stackoverflow.com/questions/71166883/how-to-extract-domain-suffix';
rePattern := '^(?:https?:\/\/)(?:w{3}\.)?.*?([^.\r\n\/] \.)([^.\r\n\/] \.[^.\r\n\/]{2,6}(?:\.[^.\r\n\/]{2,6})?).*$';
rMatch := TRegEx.Match(url, rePattern);
if rMatch.Success then
begin
rGroup := rMatch.Groups.Item[pred(rMatch.Groups.Count)];
arr := rGroup.Value.Split(['.']);
writeln('Top-Level-Domain: ', arr[High(arr)]);
end
else
writeln('Sorry');
readln;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
However, this regular expression only works when www.
is supplied.
CodePudding user response:
uses
System.SysUtils;
var
u : string;
arr: TArray<string>;
begin
try
u := 'https://stackoverflow.com/questions/71166883/how-to-extract-domain-suffix';
arr := u.Split(['://'], TStringSplitOptions.ExcludeEmpty);
u := arr[High(arr)]; //stackoverflow.com/questions/71166883/how-to-extract-domain-suffix';
arr := u.Split(['/'], TStringSplitOptions.ExcludeEmpty);
u := arr[0]; //stackoverflow.com
arr := u.Split(['.'], TStringSplitOptions.ExcludeEmpty);
u := arr[High(arr)]; //com
writeln('Top-Level-Domain: ', u);
readln;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;