aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMohammad S Anwar <Mohammad.Anwar@yahoo.com>2019-08-19 04:43:46 +0100
committerGitHub <noreply@github.com>2019-08-19 04:43:46 +0100
commit594122c69716340d30a37267a5f52ff5de670f7d (patch)
tree35909ec551a0c5e577a94b814449005f48f0ddab
parent40525ca06a4eb0ac531d8ddb872e56c40c51da3d (diff)
parent17699814884956669cab833d9ef035464bea86da (diff)
downloadperlweeklychallenge-club-594122c69716340d30a37267a5f52ff5de670f7d.tar.gz
perlweeklychallenge-club-594122c69716340d30a37267a5f52ff5de670f7d.tar.bz2
perlweeklychallenge-club-594122c69716340d30a37267a5f52ff5de670f7d.zip
Merge pull request #529 from dcw803/master
oops..forgot about pull request:-)
-rw-r--r--challenge-021/duncan-c-white/README34
-rwxr-xr-xchallenge-021/duncan-c-white/perl5/ch-1.pl26
-rwxr-xr-xchallenge-021/duncan-c-white/perl5/ch-2.pl146
3 files changed, 186 insertions, 20 deletions
diff --git a/challenge-021/duncan-c-white/README b/challenge-021/duncan-c-white/README
index bf5b030e87..4d99aac50e 100644
--- a/challenge-021/duncan-c-white/README
+++ b/challenge-021/duncan-c-white/README
@@ -1,25 +1,19 @@
-Challenge 1: "Write a script to accept a string from command line and
-split it on change of character. For example, if the string is "ABBCDEEF",
-then it should split like 'A', 'BB', 'C', 'D', 'EE', 'F'."
+Challenge 1: "Write a script to calculate the value of e, also known
+ as Euler's number and Napier's constant."
-My notes: Clearly defined, sounds like a job for regexes.
+My notes: The associated wiki page reminds us that the easiest way of
+ calculating e (the base of natural logarithms) is via:
+ e = 2 + 1/2! + 1/3! + 1/4! + ... 1/n! Let's use that.
-Challenge 2: "Write a script to print the smallest pair of Amicable Numbers."
+Challenge 2: "Write a script for URL normalization based on rfc3986. This
+task was shared by Anonymous Contributor.
-Amicable numbers are two different numbers so related that the sum of the
-proper divisors of each is equal to the other number. (A proper divisor
-of a number is a positive factor of that number other than the number
-itself. For example, the proper divisors of 6 are 1, 2, and 3.)
+According to Wikipedia, URL normalization is the process by which URLs
+are modified and standardized in a consistent manner. The goal of the
+normalization process is to transform a URL into a normalized URL so
+it is possible to determine if two syntactically different URLs may
+be equivalent.
-The smallest pair of amicable numbers is (220, 284). They are amicable
-because the proper divisors of 220 are 1, 2, 4, 5, 10, 11, 20, 22, 44,
-55 and 110, of which the sum is 284; and the proper divisors of 284 are 1,
-2, 4, 71 and 142, of which the sum is 220.
-
-The first ten amicable pairs are: (220, 284), (1184, 1210), (2620,
-2924), (5020, 5564), (6232, 6368), (10744, 10856), (12285, 14595),
-(17296, 18416), (63020, 76084), and (66928, 66992)
-
-My notes: Another clearly described problem. Obvious method involves
-a bit of caching.
+My notes: The RFC link points to a long list of regex-syle changes,
+not all of which have to be implemented. Easy to implement most.
diff --git a/challenge-021/duncan-c-white/perl5/ch-1.pl b/challenge-021/duncan-c-white/perl5/ch-1.pl
new file mode 100755
index 0000000000..ee09dc106c
--- /dev/null
+++ b/challenge-021/duncan-c-white/perl5/ch-1.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl
+#
+# Challenge 1: "Write a script to calculate the value of e, also known
+# as Euler's number and Napier's constant."
+#
+# My notes: The associated wiki page reminds us that the easiest way of
+# calculating e (the base of natural logarithms) is via:
+# e = 1 + 1/1! + 1/2! + 1/3! + 1/4! + ... 1/n! Let's use that.
+#
+
+use strict;
+use warnings;
+#use Function::Parameters;
+#use Data::Dumper;
+
+die "Usage: ch-1.pl [NUMTERMS]\n" if @ARGV>1;
+my $nterms = shift // 30;
+
+my $e = 0;
+my $nfact = 1;
+foreach my $n (1..$nterms)
+{
+ $e += 1/$nfact;
+ $nfact *= $n;
+}
+print "e=$e\n";
diff --git a/challenge-021/duncan-c-white/perl5/ch-2.pl b/challenge-021/duncan-c-white/perl5/ch-2.pl
new file mode 100755
index 0000000000..fc978b3a81
--- /dev/null
+++ b/challenge-021/duncan-c-white/perl5/ch-2.pl
@@ -0,0 +1,146 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script for URL normalization based on rfc3986.
+#
+# According to Wikipedia, URL normalization is the process by which URLs
+# are modified and standardized in a consistent manner. The goal of the
+# normalization process is to transform a URL into a normalized URL so
+# it is possible to determine if two syntactically different URLs may
+# be equivalent.
+#
+# My notes: The RFC link points to a long list of regex-syle changes,
+# not all of which have to be implemented. Easy to implement most.
+# Later thought: many of the changes only apply to parts of the URL,
+# so I'll reuse part of my solution to challenge 017, part 2.. url splitting
+#
+# ./ch-2.pl HTTP://ed@mit.edu:800/../%7e%64%75%6e%63%61%6e/%5d%20%ff/a/../../b/../c/../default.asp
+# normalized url is http://ed@mit.edu:800/~duncan/
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+use Data::Dumper;
+
+die "Usage: ch-2.pl URL\n" unless @ARGV==1;
+my $url = shift;
+
+$url = normalize( $url );
+print "normalized url is $url\n";
+
+#
+# my %info = parse_url($url);
+# Parse URL $url. Return a hash of the pieces. If parsing
+# fails, return an empty hash.
+# scheme:[//[userinfo@]host[:port]]path[?query][#fragment]
+# eg. jdbc://user:password@localhost:3306/pwc?profile=true#h1
+#
+# parses to:
+# scheme: jdbc
+# userinfo: user:password
+# host: localhost
+# port: 3306
+# path: /pwc
+# query: profile=true
+# fragment: h1
+#
+fun parse_url( $url )
+{
+ $url =~ s/^([^:]+):// || return ();
+
+ my %hash;
+ $hash{scheme} = $1;
+ if( $url =~ s|^//|| )
+ {
+ $hash{userinfo} = $1 if $url =~ s|^(.+)@||;
+ return () unless $url =~ s|^([\w\.]+)||;
+ $hash{host} = $1;
+ $hash{port} = $1 if $url =~ s/^:(\d+)//;
+ $hash{fragment} = $1 if $url =~ s/#([^#]+)$//;
+ $hash{query} = $1 if $url =~ s/\?([^\?]+)$//;
+ $hash{path} = $url;
+ }
+ return %hash;
+}
+
+
+#
+# $path = sanitize_path( $path );
+# remove '' and '.' path elements, and process '..' as if
+# we were descending a directory tree, and also remove trailing
+# inde.html and similar entries.
+#
+fun sanitize_path( $path )
+{
+ my @x = split( m|/|, $path );
+
+ # traverse the path elements, ignoring '.' and '' elements,
+ # pushing any element but a '..' on a stack,
+ # and popping the top element when you see a '..'
+ my @p;
+ foreach (@x)
+ {
+ next if $_ eq '.' || $_ eq '';
+ if( $_ eq '..' )
+ {
+ pop @p;
+ } else
+ {
+ push @p, $_;
+ }
+ }
+
+ my $path = '/'. join('/', @p );
+
+ # remove trailing index.htm[l]? if present
+ $path =~ s|/index.html?$|/|;
+
+ # remove trailing default.jsp if present
+ $path =~ s|/default.asp$|/|;
+
+ # add trailing slash if missing - no, don't, bad idea
+ #$path =~ s|([^/])$|$1/|;
+
+ return $path;
+}
+
+
+#
+# my $normalizedurl = normalize( $url );
+# Normalize $url according to RFC3986
+#
+fun normalize( $url )
+{
+ # 1. lowercase whole url
+ $url = lc($url);
+
+ # 2. uppercase %hh triples
+ $url =~ s/(%[0-9a-f][0-9a-f])/\U$1/g;
+
+ # 3. decode unnecessary %HH triples, viz:
+ # "ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+ # period (%2E), underscore (%5F), or tilde (%7E)"
+ $url =~
+ s/%(4[0-9A-F]|5[0-9A]|6[0-9A-F]|7[0-9A]|3[0-9]|2D|2E|5F|7E)/chr(hex("0x".$1))/eg;
+
+ # now parse url into parts..
+ my %info = parse_url($url);
+
+ # 4. remove default port
+ delete $info{port} if $info{port} eq "80";
+
+ my $path = $info{path};
+
+ # 5. sanitize path in various ways, eg remove '', '.'and '..' elements
+ $path = sanitize_path( $path );
+
+ # finally, merge bits back together
+ $url = $info{scheme}."://";
+ $url .= $info{userinfo}."@" if $info{userinfo};
+ $url .= $info{host};
+ $url .= ":".$info{port} if $info{port};
+ $url .= $path;
+ $url .= "?".$info{query} if $info{query};
+ $url .= "#".$info{fragment} if $info{fragment};
+ return $url;
+}