aboutsummaryrefslogtreecommitdiff
path: root/challenge-021/paulo-custodio/perl/ch-2.pl
blob: f7c1dc11442f7f11d0fd7d469e1a463e7a4c5b97 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/perl

# Challenge 021
#
# Task #2
# Write a script for URL normalization based on rfc3986. This task was shared by
# Anonymous Contributor.
#
# According to Wikipedia, URL normalization is the process by which URLs are
# modified and standardized in a consistent manner. The goal of the
# normalization process is to transform a URL into a normalized URL so it is
# possible to determine if two syntactically different URLs may be equivalent.

use Modern::Perl;

sub decode_triplets {
    my($hex) = @_;
    my $c = chr(hex($hex));
    return $c if $c =~ /[a-zA-Z0-9\-._~]/;
    return '%'.uc($hex);
}

sub norm_uri {
    my($uri) = @_;
    for ($uri) {
        # Converting percent-encoded triplets to uppercase
        s/(\%[0-9a-f]{2})/\U$1/gi;

        # Converting the scheme and host to lowercase
        s/^(\w+:\/\/)((.*?@)?)(.*?\/)/\L$1\E$2\L$4/;

        # Decoding percent-encoded triplets of unreserved characters
        s/(\%([0-9a-f]{2}))/ decode_triplets($2) /gie;

        # Removing dot-segments
        s/\/\.\//\//g;
        s/\/[^\/]+\/\.\.\//\//g;

        # Converting an empty path to a "/" path
        s/^(\w+:\/\/[^\/]+)$/$1\//;

        # Removing the default port
        s/^(http:\/\/[^\/]+?):80\//$1\//;
    }
    return $uri;
}

my $uri = shift;
say norm_uri($uri);