From 71e4cc2d14f0ff3c89d85d5da6b2c4303abcbbc2 Mon Sep 17 00:00:00 2001 From: dcw Date: Sun, 18 Aug 2019 20:47:42 +0100 Subject: added my solutions to challenge 21 --- challenge-021/duncan-c-white/README | 34 +++---- challenge-021/duncan-c-white/perl5/ch-1.pl | 26 +++++ challenge-021/duncan-c-white/perl5/ch-2.pl | 146 +++++++++++++++++++++++++++++ challenge-021/duncan-c-white/perl5/parse | 73 +++++++++++++++ 4 files changed, 259 insertions(+), 20 deletions(-) create mode 100755 challenge-021/duncan-c-white/perl5/ch-1.pl create mode 100755 challenge-021/duncan-c-white/perl5/ch-2.pl create mode 100755 challenge-021/duncan-c-white/perl5/parse diff --git a/challenge-021/duncan-c-white/README b/challenge-021/duncan-c-white/README index bf5b030e87..4d99aac50e 100644 --- a/challenge-021/duncan-c-white/README +++ b/challenge-021/duncan-c-white/README @@ -1,25 +1,19 @@ -Challenge 1: "Write a script to accept a string from command line and -split it on change of character. For example, if the string is "ABBCDEEF", -then it should split like 'A', 'BB', 'C', 'D', 'EE', 'F'." +Challenge 1: "Write a script to calculate the value of e, also known + as Euler's number and Napier's constant." -My notes: Clearly defined, sounds like a job for regexes. +My notes: The associated wiki page reminds us that the easiest way of + calculating e (the base of natural logarithms) is via: + e = 2 + 1/2! + 1/3! + 1/4! + ... 1/n! Let's use that. -Challenge 2: "Write a script to print the smallest pair of Amicable Numbers." +Challenge 2: "Write a script for URL normalization based on rfc3986. This +task was shared by Anonymous Contributor. -Amicable numbers are two different numbers so related that the sum of the -proper divisors of each is equal to the other number. (A proper divisor -of a number is a positive factor of that number other than the number -itself. For example, the proper divisors of 6 are 1, 2, and 3.) +According to Wikipedia, URL normalization is the process by which URLs +are modified and standardized in a consistent manner. The goal of the +normalization process is to transform a URL into a normalized URL so +it is possible to determine if two syntactically different URLs may +be equivalent. -The smallest pair of amicable numbers is (220, 284). They are amicable -because the proper divisors of 220 are 1, 2, 4, 5, 10, 11, 20, 22, 44, -55 and 110, of which the sum is 284; and the proper divisors of 284 are 1, -2, 4, 71 and 142, of which the sum is 220. - -The first ten amicable pairs are: (220, 284), (1184, 1210), (2620, -2924), (5020, 5564), (6232, 6368), (10744, 10856), (12285, 14595), -(17296, 18416), (63020, 76084), and (66928, 66992) - -My notes: Another clearly described problem. Obvious method involves -a bit of caching. +My notes: The RFC link points to a long list of regex-syle changes, +not all of which have to be implemented. Easy to implement most. diff --git a/challenge-021/duncan-c-white/perl5/ch-1.pl b/challenge-021/duncan-c-white/perl5/ch-1.pl new file mode 100755 index 0000000000..ee09dc106c --- /dev/null +++ b/challenge-021/duncan-c-white/perl5/ch-1.pl @@ -0,0 +1,26 @@ +#!/usr/bin/perl +# +# Challenge 1: "Write a script to calculate the value of e, also known +# as Euler's number and Napier's constant." +# +# My notes: The associated wiki page reminds us that the easiest way of +# calculating e (the base of natural logarithms) is via: +# e = 1 + 1/1! + 1/2! + 1/3! + 1/4! + ... 1/n! Let's use that. +# + +use strict; +use warnings; +#use Function::Parameters; +#use Data::Dumper; + +die "Usage: ch-1.pl [NUMTERMS]\n" if @ARGV>1; +my $nterms = shift // 30; + +my $e = 0; +my $nfact = 1; +foreach my $n (1..$nterms) +{ + $e += 1/$nfact; + $nfact *= $n; +} +print "e=$e\n"; diff --git a/challenge-021/duncan-c-white/perl5/ch-2.pl b/challenge-021/duncan-c-white/perl5/ch-2.pl new file mode 100755 index 0000000000..fc978b3a81 --- /dev/null +++ b/challenge-021/duncan-c-white/perl5/ch-2.pl @@ -0,0 +1,146 @@ +#!/usr/bin/perl +# +# Challenge 2: "Write a script for URL normalization based on rfc3986. +# +# According to Wikipedia, URL normalization is the process by which URLs +# are modified and standardized in a consistent manner. The goal of the +# normalization process is to transform a URL into a normalized URL so +# it is possible to determine if two syntactically different URLs may +# be equivalent. +# +# My notes: The RFC link points to a long list of regex-syle changes, +# not all of which have to be implemented. Easy to implement most. +# Later thought: many of the changes only apply to parts of the URL, +# so I'll reuse part of my solution to challenge 017, part 2.. url splitting +# +# ./ch-2.pl HTTP://ed@mit.edu:800/../%7e%64%75%6e%63%61%6e/%5d%20%ff/a/../../b/../c/../default.asp +# normalized url is http://ed@mit.edu:800/~duncan/ +# + +use strict; +use warnings; +use Function::Parameters; +use Data::Dumper; + +die "Usage: ch-2.pl URL\n" unless @ARGV==1; +my $url = shift; + +$url = normalize( $url ); +print "normalized url is $url\n"; + +# +# my %info = parse_url($url); +# Parse URL $url. Return a hash of the pieces. If parsing +# fails, return an empty hash. +# scheme:[//[userinfo@]host[:port]]path[?query][#fragment] +# eg. jdbc://user:password@localhost:3306/pwc?profile=true#h1 +# +# parses to: +# scheme: jdbc +# userinfo: user:password +# host: localhost +# port: 3306 +# path: /pwc +# query: profile=true +# fragment: h1 +# +fun parse_url( $url ) +{ + $url =~ s/^([^:]+):// || return (); + + my %hash; + $hash{scheme} = $1; + if( $url =~ s|^//|| ) + { + $hash{userinfo} = $1 if $url =~ s|^(.+)@||; + return () unless $url =~ s|^([\w\.]+)||; + $hash{host} = $1; + $hash{port} = $1 if $url =~ s/^:(\d+)//; + $hash{fragment} = $1 if $url =~ s/#([^#]+)$//; + $hash{query} = $1 if $url =~ s/\?([^\?]+)$//; + $hash{path} = $url; + } + return %hash; +} + + +# +# $path = sanitize_path( $path ); +# remove '' and '.' path elements, and process '..' as if +# we were descending a directory tree, and also remove trailing +# inde.html and similar entries. +# +fun sanitize_path( $path ) +{ + my @x = split( m|/|, $path ); + + # traverse the path elements, ignoring '.' and '' elements, + # pushing any element but a '..' on a stack, + # and popping the top element when you see a '..' + my @p; + foreach (@x) + { + next if $_ eq '.' || $_ eq ''; + if( $_ eq '..' ) + { + pop @p; + } else + { + push @p, $_; + } + } + + my $path = '/'. join('/', @p ); + + # remove trailing index.htm[l]? if present + $path =~ s|/index.html?$|/|; + + # remove trailing default.jsp if present + $path =~ s|/default.asp$|/|; + + # add trailing slash if missing - no, don't, bad idea + #$path =~ s|([^/])$|$1/|; + + return $path; +} + + +# +# my $normalizedurl = normalize( $url ); +# Normalize $url according to RFC3986 +# +fun normalize( $url ) +{ + # 1. lowercase whole url + $url = lc($url); + + # 2. uppercase %hh triples + $url =~ s/(%[0-9a-f][0-9a-f])/\U$1/g; + + # 3. decode unnecessary %HH triples, viz: + # "ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), + # period (%2E), underscore (%5F), or tilde (%7E)" + $url =~ + s/%(4[0-9A-F]|5[0-9A]|6[0-9A-F]|7[0-9A]|3[0-9]|2D|2E|5F|7E)/chr(hex("0x".$1))/eg; + + # now parse url into parts.. + my %info = parse_url($url); + + # 4. remove default port + delete $info{port} if $info{port} eq "80"; + + my $path = $info{path}; + + # 5. sanitize path in various ways, eg remove '', '.'and '..' elements + $path = sanitize_path( $path ); + + # finally, merge bits back together + $url = $info{scheme}."://"; + $url .= $info{userinfo}."@" if $info{userinfo}; + $url .= $info{host}; + $url .= ":".$info{port} if $info{port}; + $url .= $path; + $url .= "?".$info{query} if $info{query}; + $url .= "#".$info{fragment} if $info{fragment}; + return $url; +} diff --git a/challenge-021/duncan-c-white/perl5/parse b/challenge-021/duncan-c-white/perl5/parse new file mode 100755 index 0000000000..0fc5a6da02 --- /dev/null +++ b/challenge-021/duncan-c-white/perl5/parse @@ -0,0 +1,73 @@ +#!/usr/bin/perl + +# Challenge 2: "Create a script to parse URL and print the components of +# URL. According to the Wiki page https://en.wikipedia.org/wiki/URL, the URL +# syntax is as below: +# +# scheme:[//[userinfo@]host[:port]]path[?query][#fragment] +# +# eg. jdbc://user:password@localhost:3306/pwc?profile=true#h1 +# +# scheme: jdbc +# userinfo: user:password +# host: localhost +# port: 3306 +# path: /pwc +# query: profile=true +# fragment: h1 +# +# My notes: sounds pretty trivial for regexes, if the lexical syntax of +# each component is defined clearly. Ok, reading the above wiki page +# doesn't make it 100% clear, but let's hack it up, that's probably good +# enough for most cases. + +use strict; +use warnings; +use Function::Parameters; +use Data::Dumper; + +# +# my %info = parse_url($url); +# Parse URL $url. Return a hash of the pieces. If parsing +# fails, return an empty hash. +# scheme:[//[userinfo@]host[:port]]path[?query][#fragment] +# eg. jdbc://user:password@localhost:3306/pwc?profile=true#h1 +# +# parses to: +# scheme: jdbc +# userinfo: user:password +# host: localhost +# port: 3306 +# path: /pwc +# query: profile=true +# fragment: h1 +# +fun parse_url( $url ) +{ + $url =~ s/^([^:]+):// || return (); + + my %hash; + $hash{scheme} = $1; + if( $url =~ s|^//|| ) + { + $hash{userinfo} = $1 if $url =~ s|^(.+)@||; + return () unless $url =~ s|^([\w\.]+)||; + $hash{host} = $1; + $hash{port} = $1 if $url =~ s/^:(\d+)//; + $hash{fragment} = $1 if $url =~ s/#([^#]+)$//; + $hash{query} = $1 if $url =~ s/\?([^\?]+)$//; + $hash{path} = $url; + } + return %hash; +} + + + +#die "Usage: ch-2.pl URL*\n"; +push @ARGV, 'jdbc://user:password@localhost:3306/pwc?profile=true#h1' + unless @ARGV; +foreach my $url (@ARGV) +{ + my %info = parse_url($url); + print "$url:\n". Dumper(\%info); +} -- cgit