# $Id: URL.pm 35787 2011-10-17 15:26:12Z wsl $
# $URL: https://infix.uvt.nl/its-id/trunk/sources/aselect-perl/lib/Aselect/URL.pm $

use utf8;
use strict;
use warnings FATAL => 'all';;

package Aselect::URL;

use Encode;
use Exporter qw(import);

our @EXPORT = qw(normalize_url valid_http_url acl_to_re_url $fqdn $fqdnport $hostport);

our $fqdn = qr{(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*(?:\.(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*)+}i;
our $port = qr{:[1-9][0-9]{0,4}};
our $fqdnport = qr{$fqdn$port?};
our $hostport = qr{(?:\[[0-9a-f.:]+\]|$fqdn)$port?}i;

sub valid_http_url {
	return shift =~ qr{^https?://$hostport?(?:/|$)}o;
}

sub normalize_url {
	my $url = shift;
	Encode::_utf8_off($url);
	$url =~ s{^([^/:.]+://$hostport(?:/|$))}{lc($1)}e;
	$url =~ s{([^!-~])}{sprintf('%%%02X', ord($1))}eg;
	return $url;
}

# transform a simplified URL specification into a
# strict regular expression
sub acl_to_re_url {
	my $orig = shift;

	return qr{$orig}
		if $orig =~ /^\^/;

	my $url = $orig;
	my $re = quotemeta($url);
	$url =~ s{^(\w+)://}{};

	my $scheme = $1
		? qr{$1}
		: qr{https?://};

	$url =~ s{^((\.)$fqdnport|$hostport)([/?#]|$)}{$3}
		or die "can't parse url '$orig'\n";
	my $domain = $2
		? qr{(?:[^/]+)\Q$1\E}i
		: qr{\Q$1\E}i;

	die "in '$orig': unable to handle queries or fragments\n"
		if $url =~ m{[?#]};

	$url =~ s{//+}{/}g;
	$url =~ s{^/}{};
	my $path = $url eq ''
		? qr{(?:[?#/]|$)}
		: $url !~ m{/$}
			? qr{/\Q$url\E(?:[?#/]|$)}
			: qr{/\Q$url\E};

	return qr{^$scheme$domain$path};
}

1;
