Can you please tell me how to cram the contents of capture groups into a hash in perl?
Example:
I have a file:
https://www.youtube.com/watch?v=5qap5aO4i9A
http://example.com:8080/r/p?s=10&z=11#text
https://exapmle.com/test/p?var=100
http://test.org:81/
https://main.org
gopher://gopher.floodgap.com/gopher/relevance.txt
file:///home/user/.profile
gemini://transjovian.org/
I want to break each line of this file into a set of key - value and add them to the hash, then output the contents of that hash.
The content of my script:
#!/usr/bin/env perl
use strict;
use utf8;
use warnings;
use feature qw(say);
use Data::Dumper;
sub parse_url {
my ($url) = @_;
if ($url =~ m#(.*):/(.*)#) {
my (%hash, $scheme, $domain, $port, $path, $query_string, $anchor);
$url =~ m!^(?<scheme>[^:] ):/{2,3}(?<domain>[^:/] )(?::(?<port>(?:\d )?)?)(?<path>(?:/[^?] )?)(?:\?(?<query_string>(?:[^\#] )?)?)(?:\#(?<anchor>(?:. )?)?)!;
if(defined($scheme)) { $hash{'scheme'} = $scheme; }
if(defined($domain)) { $hash{'domain'} = $domain; }
if(defined($port)) { $hash{'port'} = $port; }
if(defined($path)) { $hash{'path'} = $path; }
if(defined($query_string)) { $hash{'query_string'} = $query_string; }
if(defined($anchor)) { $hash{'anchor'} = $anchor; }
return %hash;
}
}
while (my $row = <>) {
chomp $row;
say $row;
my %hash = parse_url($row);
print Dumper \%hash;
}
I want to get this output:
https://www.youtube.com/watch?v=5qap5aO4i9A
$VAR1 = {
scheme => 'https',
domain => 'www.youtube.com',
path => '/watch',
query_string => 'v=5qap5aO4i9A',
};
http://example.com:8080/r/p?s=10&z=11#text
$VAR1 = {
scheme => 'http',
domain => 'example.com',
port => '8080',
path => '/r/p',
query_string => 's=10&z=11',
anchor => 'text',
};
https://exapmle.com/test/p?var=100
$VAR1 = {
scheme => 'http',
domain => 'example.com',
path => '/test/p',
query_string => 'var=100',
};
http://test.org:81/
$VAR1 = {
scheme => 'http',
domain => 'test.org',
port => '81',
};
https://main.org
$VAR1 = {
scheme => 'https',
domain => 'main.org',
};
gopher://gopher.floodgap.com/gopher/relevance.txt
$VAR1 = {
scheme => 'gopher',
domain => 'gopher.floodgap.com',
path => '/gopher/relevance.txt',
};
file:///home/user/.profile
$VAR1 = {
scheme => 'file',
path => '/home/user/.profile',
};
gemini://transjovian.org/
$VAR1 = {
scheme => 'gemini',
domain => 'transjovian.org',
};
But I get this conclusion:
https://www.youtube.com/watch?v=5qap5aO4i9A
$VAR1 = {};
http://example.com:8080/r/p?s=10&z=11#text
$VAR1 = {};
https://exapmle.com/test/p?var=100
$VAR1 = {};
http://test.org:81/
$VAR1 = {};
https://main.org
$VAR1 = {};
gopher://gopher.floodgap.com/gopher/relevance.txt
$VAR1 = {};
file:///home/user/.profile
$VAR1 = {};
gemini://transjovian.org/
$VAR1 = {};
Thank you for your help!
CodePudding user response:
You can use the special variable %
(or %{^CAPTURE}
) to get the named captures like this:
use strict;
use utf8;
use warnings;
use feature qw(say);
use open ':std', ':encoding(utf-8)';
use Data::Dumper;
sub parse_url {
my ($url) = @_;
if ($url =~ m#(.*):/(.*)#) {
$url =~ m!
^(?<scheme>[^:] ):/{2,3}
(?<domain>[^:/] )
(?::?(?<port>(?:\d )?)?)
(?<path>(?:/[^?] )?)
(?:\??(?<query_string>(?:[^\#] )?)?)
(?:\#?(?<anchor>(?:. )?)?)
!x;
my %hash = % ;
return %hash;
}
}
while (my $row = <>) {
chomp $row;
say $row;
my %hash = parse_url($row);
if (%hash) {
print Dumper \%hash;
}
else {
say " -> No match";
}
}
Output:
$VAR1 = {
'anchor' => 'text',
'path' => '/r/p',
'query_string' => 's=10&z=11',
'port' => '8080',
'scheme' => 'http',
'domain' => 'example.com'
};
https://www.youtube.com/watch?v=5qap5aO4i9A
$VAR1 = {
'scheme' => 'https',
'domain' => 'www.youtube.com',
'port' => '',
'anchor' => '',
'query_string' => 'v=5qap5aO4i9A',
'path' => '/watch'
};
https://exapmle.com/test/p?var=100
$VAR1 = {
'port' => '',
'scheme' => 'https',
'anchor' => '',
'path' => '/test/p',
'domain' => 'exapmle.com',
'query_string' => 'var=100'
};
http://test.org:81/
$VAR1 = {
'scheme' => 'http',
'domain' => 'test.org',
'port' => '81',
'anchor' => '',
'query_string' => '/',
'path' => ''
};
https://main.org
$VAR1 = {
'port' => '',
'scheme' => 'https',
'domain' => 'main.org',
'anchor' => '',
'path' => '',
'query_string' => ''
};
gopher://gopher.floodgap.com/gopher/relevance.txt
$VAR1 = {
'domain' => 'gopher.floodgap.com',
'scheme' => 'gopher',
'port' => '',
'query_string' => '',
'path' => '/gopher/relevance.txt',
'anchor' => ''
};
file:///home/user/.profile
$VAR1 = {
'port' => '',
'scheme' => 'file',
'domain' => 'home',
'anchor' => '',
'path' => '/user/.profile',
'query_string' => ''
};
gemini://transjovian.org/
$VAR1 = {
'domain' => 'transjovian.org',
'scheme' => 'gemini',
'port' => '',
'query_string' => '/',
'path' => '',
'anchor' => ''
};