aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMohammad S Anwar <Mohammad.Anwar@yahoo.com>2019-08-26 00:10:19 +0100
committerGitHub <noreply@github.com>2019-08-26 00:10:19 +0100
commitc495bce4c4b9aa87ac694d447a84fd62bb25208d (patch)
tree867c0c2d8dd43f16afcd1b67aeb3468b639064b8
parent6fe586844ef2d48cab8dc8ff6a5df302148184fb (diff)
parentfbc975b7d7551368935a6ffaa7f5ea0fbe1927f7 (diff)
downloadperlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.gz
perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.bz2
perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.zip
Merge pull request #557 from dcw803/master
finally got the LZW stuff working (with 5 mins to spare:-)) after…
-rw-r--r--challenge-022/duncan-c-white/README37
-rw-r--r--challenge-022/duncan-c-white/perl5/LZW.pm251
-rw-r--r--challenge-022/duncan-c-white/perl5/MakePrimes.pm90
-rwxr-xr-xchallenge-022/duncan-c-white/perl5/ch-1.pl62
-rwxr-xr-xchallenge-022/duncan-c-white/perl5/ch-2.pl42
-rwxr-xr-xchallenge-022/duncan-c-white/perl5/encdecode.pl38
-rwxr-xr-xchallenge-022/duncan-c-white/perl5/testlzw.pl60
7 files changed, 559 insertions, 21 deletions
diff --git a/challenge-022/duncan-c-white/README b/challenge-022/duncan-c-white/README
index bf5b030e87..9abc0d15e2 100644
--- a/challenge-022/duncan-c-white/README
+++ b/challenge-022/duncan-c-white/README
@@ -1,25 +1,20 @@
-Challenge 1: "Write a script to accept a string from command line and
-split it on change of character. For example, if the string is "ABBCDEEF",
-then it should split like 'A', 'BB', 'C', 'D', 'EE', 'F'."
+Challenge 1: "Write a script to print first 10 Sexy Prime Pairs. Sexy
+primes are prime numbers that differ from each other by 6. For example,
+the numbers 5 and 11 are both sexy primes, because 11 - 5 = 6. The term
+"sexy prime" is a pun stemming from the Latin word for six: sex."
-My notes: Clearly defined, sounds like a job for regexes.
+My notes: Clearly defined, yet another prime-based task, sounds very
+easy - let's have a go..
-Challenge 2: "Write a script to print the smallest pair of Amicable Numbers."
+Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+compression algorithm. The script should have method to encode/decode
+algorithm. The wiki page
+https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+explains the compression algorithm very nicely."
-Amicable numbers are two different numbers so related that the sum of the
-proper divisors of each is equal to the other number. (A proper divisor
-of a number is a positive factor of that number other than the number
-itself. For example, the proper divisors of 6 are 1, 2, and 3.)
-
-The smallest pair of amicable numbers is (220, 284). They are amicable
-because the proper divisors of 220 are 1, 2, 4, 5, 10, 11, 20, 22, 44,
-55 and 110, of which the sum is 284; and the proper divisors of 284 are 1,
-2, 4, 71 and 142, of which the sum is 220.
-
-The first ten amicable pairs are: (220, 284), (1184, 1210), (2620,
-2924), (5020, 5564), (6232, 6368), (10744, 10856), (12285, 14595),
-(17296, 18416), (63020, 76084), and (66928, 66992)
-
-My notes: Another clearly described problem. Obvious method involves
-a bit of caching.
+My notes: I read the wiki page, perhaps without concentrating enough.
+Looks complicated, especially the decoding part. Also, what's not
+quite clear is what initial alphabet both encoding and decoding should
+use? but let's have a go, hopefully building an encoder will clarify
+most things?
diff --git a/challenge-022/duncan-c-white/perl5/LZW.pm b/challenge-022/duncan-c-white/perl5/LZW.pm
new file mode 100644
index 0000000000..5dac1ca36e
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/LZW.pm
@@ -0,0 +1,251 @@
+#
+# Lempel-Ziv-Welch (LZW) compression algorithm: encoding and decoding using
+# a common dictionary.
+#
+# The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+#
+# My notes: I read the wiki page, perhaps without concentrating enough.
+# Looks complicated, especially the decoding part. Also, what's not
+# quite clear is what initial alphabet both encoding and decoding should
+# use? but let's have a go anyway, hopefully building an encoder will
+# start to clarify most things?
+#
+# Update: the encoding was very straight forward, but I struggled for
+# several hours to get my head around the decoding - especially the
+# special case described in the wikipedia page where the dictionary
+# doesn't contain the entry. The description was NOT CLEAR ENOUGH
+# in that case (and giving an example text where the special case
+# applied would have helped a lot, eg TOTOTOT was the shortest I found)
+#
+# Eventually, with time running out, I checked some Rosetta code
+# implementations, discovered what pseudo-code the description
+# really mapped onto, and adapted it to fit my code..
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+use Data::Dumper;
+
+
+my @dict = ( '#', 'A'..'Z' ); # LZW dictionary
+my $debug = 0; # debugging
+
+
+#
+# lzw_setdict( @d );
+# Set the dictionary to @d, before encoding and decoding
+#
+fun lzw_setdict( @d )
+{
+ @dict = ( '#', @d );
+}
+
+
+#
+# lzw_setdebug( $d );
+# Set the debug flag to $d, before encoding and decoding
+#
+fun lzw_setdebug( $d )
+{
+ $debug = $d;
+}
+
+
+#
+# my( $x, $p ) = np2( $n );
+# Find the first 2^x >= $n, return x and 2^x.
+# eg np2(1023)=10,1024, np2(1024)=10,1024, np2(1025)=11,2048
+#
+fun np2( $n )
+{
+ my $x = 1;
+ my $p = 2;
+ while( $p < $n )
+ {
+ $p *= 2;
+ $x++;
+ }
+ return ( $x, $p );
+}
+
+
+#
+# my $binstr = lzw_encode( $text );
+# LZW encode $text, some plain text, using a dictionary containing
+# @dictletters. Return the encoded binary string (a sequence of 0s and 1s).
+#
+fun lzw_encode( $text )
+{
+ my $lastpos = length($text)-1;
+ $text .= '#' unless substr($text,$lastpos,1) eq '#';
+
+ my( $width, $transition ) = np2( scalar(@dict) );
+ #die "width=$width, transition=$transition\n";
+
+ my %indict = map { $dict[$_] => $_ } 0..$#dict;
+ my @newdict = @dict;
+
+ my @letter = split(//,$text);
+ foreach my $pos (0..@letter-1)
+ {
+ my $letter = $letter[$pos];
+ die "encode: letter $letter not in newdict\n"
+ unless defined $indict{$letter};
+ die "encode: letter $letter is the stop code!\n"
+ if $letter eq '#' && $pos<@letter-1;
+ }
+
+ my $result = "";
+
+ my $seq = shift @letter; # first letter
+
+ foreach my $letter (@letter) # rest of the letters
+ {
+ die "encode: logic error, seq $seq not in dict\n"
+ unless defined $indict{$seq};
+
+ print "encode: seq=$seq, letter=$letter\n" if $debug;
+
+ if( defined $indict{$seq.$letter} ) # extend sequence
+ {
+ $seq .= $letter;
+ next;
+ }
+
+ # otherwise: seq is in dict, but seq.letter is not
+ print "$seq in dict; but ",$seq.$letter, " is not in dict\n"
+ if $debug;
+ my $pos = $indict{$seq};
+ my $b = sprintf( "%0${width}b", $pos );
+
+ # emit $b
+ print "emit $b for $seq @ $pos\n" if $debug;
+ $result .= $b;
+ $result .= '-' if $debug;
+
+ # append $seq.$letter to the dictionary
+ $pos = @newdict;
+ $seq .= $letter;
+ push @newdict, $seq;
+ $indict{$seq} = $pos;
+ print "adding $seq to dict at $pos\n" if $debug;
+
+ if( $pos == $transition ) # need wider binary strings
+ {
+ $width++;
+ $transition *= 2;
+ print "increasing width to $width, ".
+ "transition to $transition\n"
+ if $debug;
+ }
+
+ # start again with seq = letter
+ $seq = $letter;
+ }
+
+ # handle the final seq (should be something ending in the stop code)
+ my $pos = $indict{$seq};
+ my $b = sprintf( "%0${width}b", $pos );
+
+ # emit $b
+ print "emit $b for $seq @ $pos\n" if $debug;
+ $result .= "$b";
+
+ return $result;
+}
+
+
+#
+# my $text = lzw_decode( $binstr );
+# LZW decode $binstr, a string of 0s and 1s that represents a LZW
+# compression of an unknown text, using a dictionary containing
+# @dictletters. Return the decoded text string (hopefully plain text).
+#
+fun lzw_decode( $binstr )
+{
+ my( $width, $transition ) = np2( scalar(@dict) );
+ #die "width=$width, transition=$transition\n";
+
+ my @newdict = @dict;
+ my %indict = map { $dict[$_] => $_ } 0..$#dict;
+
+ # remove the very first width $width bit-prefix
+ my $b = substr( $binstr, 0, $width, "" );
+
+ # find the position in the dictionary
+ my $pos = oct( "0b$b" ); # convert binary string->int
+ my $ndict = @newdict;
+ print "decoding first $b, pos=$pos, ndict=$ndict\n" if $debug;
+
+
+ die "LZW_decode: first bin $b (pos $pos) not in dict, $ndict entries\n" unless $pos<$ndict;
+
+ my $prevf = $newdict[$pos]; # previous text fragment
+
+ my $result = $prevf;
+
+ while( $binstr )
+ {
+ # remove next width $width bit-prefix
+ my $b = substr( $binstr, 0, $width, "" );
+
+ print "decoding $b, prevf=$prevf\n" if $debug;
+
+ # find the position in the dictionary
+ my $pos = oct( "0b$b" ); # convert binary string->int
+
+ my $ndict = @newdict;
+
+ my $f; # current decoded text fragment
+
+ # if pos in dict?
+ if( $pos < $ndict )
+ {
+ $f = $newdict[$pos];
+ } elsif( $pos == $ndict )
+ {
+ $f = $prevf . substr($prevf,0,1);
+ } else
+ {
+ die "decode: bad pos $pos (dictionary has $ndict ".
+ "entries) result=$result, b=$b, ".
+ "binstr=$binstr)\n";
+ }
+
+ # ok, so $b represents text frag $f
+ print "b $b, pos=$pos, prevf=$prevf, f=$f\n" if $debug;
+
+ $result .= $f;
+
+ # the next entry in the dictionary must be the WHOLE of
+ # the previous fragment plus the FIRST letter of the
+ # current fragment $f
+ my $new = $prevf . substr($f,0,1);
+
+ $pos = @newdict;
+ print "added $new @ pos $pos\n" if $debug;
+
+ # add it to the dictionary
+ push @newdict, $new;
+ $indict{$new} = $pos;
+
+ # do we need to change the width?
+ if( @newdict == $transition )
+ {
+ $width++;
+ $transition *= 2;
+ print "increasing width to $width, ".
+ "transition to $transition\n"
+ if $debug;
+ }
+
+ $prevf = $f;
+ }
+ return $result;
+}
+
+
+1;
diff --git a/challenge-022/duncan-c-white/perl5/MakePrimes.pm b/challenge-022/duncan-c-white/perl5/MakePrimes.pm
new file mode 100644
index 0000000000..2f52150e73
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/MakePrimes.pm
@@ -0,0 +1,90 @@
+#
+# mkprimes module (converted from mkprimes.c)
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+
+
+my @foundprimes; # remember all primes we've found..
+
+
+#
+# my @primes = primes_upto( $n );
+# Find all primes up to N; return a list of all such primes
+# (note that 1 is not usually considered a prime)
+#
+fun primes_upto( $n )
+{
+ my @isprime;
+
+ for( my $i=1; $i<=$n; $i++ )
+ {
+ $isprime[$i] = 1; # initially
+ }
+
+ # now sieve the non-primes out..
+ my $upper = int(sqrt($n));
+ printf( "debug: n=%d, upper=%d\n", $n, $upper );
+ for( my $i=2; $i<=$upper; $i++ )
+ {
+ if( $isprime[$i] )
+ {
+ #printf( "debug: crossing out multiples of %d\n", $i );
+ for( my $j=$i*$i; $j<=$n; $j+=$i )
+ {
+ $isprime[$j] = 0;
+ }
+ }
+ }
+
+ # after sieving, extract the primes
+ my @primes = grep { $isprime[$_] } 2..$n;
+
+ # remember them
+ @foundprimes = @primes;
+
+ return @primes;
+}
+
+
+#
+# my @moreprimes = more_primes( $n, $m );
+# Need more primes! Have @foundprimes up to $n, but need
+# to sieve primes from $n+1..$m, so re-sieve, return
+# a list of all new primes (in the range $n+1..$m) that we find.
+#
+fun more_primes( $n, $m )
+{
+ my %isprime;
+
+ print "finding more primes from ", $n+1, "..$m\n";
+
+ for( my $i=$n+1; $i<=$m; $i++ )
+ {
+ $isprime{$i} = 1; # pre-sieving
+ }
+
+ # now sieve the non-primes out..
+ foreach my $prime (@foundprimes)
+ {
+ # find first multiple of $prime > $n
+ my $mult = $prime * (int($n/$prime)+1);
+
+ #print "debug: xo multiples of $prime from $mult to $m\n";
+
+ for( my $j=$mult; $j<=$m; $j+=$prime )
+ {
+ delete $isprime{$j};
+ }
+ }
+
+ # after sieving, extract the primes
+ my @primes = grep { $isprime{$_} } $n+1..$m;
+ push @foundprimes, @primes;
+ return @primes;
+}
+
+
+1;
diff --git a/challenge-022/duncan-c-white/perl5/ch-1.pl b/challenge-022/duncan-c-white/perl5/ch-1.pl
new file mode 100755
index 0000000000..53dc176926
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/ch-1.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl
+#
+# Challenge 1: "Write a script to print first 10 Sexy Prime Pairs. Sexy
+# primes are prime numbers that differ from each other by 6. For example,
+# the numbers 5 and 11 are both sexy primes, because 11 - 5 = 6. The term
+# "sexy prime" is a pun stemming from the Latin word for six: sex."
+#
+# My notes: Clearly defined, yet another prime-based task, sounds very
+# easy - let's have a go.. easy source of prime numbers is via my
+# MakePrimes.pm module (itself converted from my earlier mkprimes.c)
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+#use Data::Dumper;
+
+use lib qw(.); # I hate this
+use MakePrimes;
+
+die "Usage: ch-1.pl [NUMSEXY]\n" if @ARGV>1;
+my $wanted = shift // 10;
+
+my $found = find_n_sexy_prime_pairs( $wanted );
+
+
+#
+# my $found = find_n_sexy_prime_pairs( $n );
+# Find $n sexy prime pairs.. return number of pairs found,
+# $found should always == $n
+#
+fun find_n_sexy_prime_pairs( $n )
+{
+ my %isprime;
+ my $found = 0;
+
+ my $w = 40000;
+ my $from = $w;
+ my @prime = primes_upto( $w );
+
+ for(;;)
+ {
+ print "searching for sexy primes in ", scalar(@prime), " primes\n";
+ foreach my $prime (@prime)
+ {
+ $isprime{$prime}++;
+ if( $prime > 6 && $isprime{$prime-6} )
+ {
+ $found++;
+ print "found sexy prime pair $found: ", $prime-6, " and $prime\n";
+ if( $wanted == $found )
+ {
+ print "found $found sexy prime pairs\n";
+ return $found;
+ }
+ }
+ }
+ print "found $found sexy prime pairs in pass, want $wanted\n";
+ @prime = more_primes( $from, $from+$w );
+ $from += $w;
+ }
+}
diff --git a/challenge-022/duncan-c-white/perl5/ch-2.pl b/challenge-022/duncan-c-white/perl5/ch-2.pl
new file mode 100755
index 0000000000..ab43ff181b
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/ch-2.pl
@@ -0,0 +1,42 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+# compression algorithm. The script should have method to encode/decode
+# algorithm. The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+#
+# My notes: I read the wiki page, perhaps without concentrating enough.
+# Looks complicated, especially the decoding part. My code is in LZW.pm
+#
+
+use strict;
+use warnings;
+
+use lib qw(.); # I hate this!
+use LZW;
+
+my @dict = ( 'A'..'Z' );
+
+lzw_setdict( @dict );
+lzw_setdebug( 1 );
+
+die "Usage: ch-2.pl E TEXT\nor : ch-2.pl D binarystring\n" unless @ARGV==2;
+my $op = shift;
+
+die "ch-2.pl: bad encode/decode op $op\n" unless $op eq "E" || $op eq "D";
+
+my $arg = shift;
+
+#die "debug: np2(1023)==", np2(1023), "\nnp2(1024)==", np2(1024), "\nnp2(1025)==", np2(1025);
+
+if( $op eq "E" )
+{
+ my $binstr = lzw_encode( $arg );
+ print "$binstr\n";
+}
+elsif( $op eq "D" )
+{
+ my $text = lzw_decode( $arg );
+ print "$text\n";
+}
diff --git a/challenge-022/duncan-c-white/perl5/encdecode.pl b/challenge-022/duncan-c-white/perl5/encdecode.pl
new file mode 100755
index 0000000000..24e7916c4a
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/encdecode.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+# compression algorithm. The script should have method to encode/decode
+# algorithm. The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+#
+# My notes: My code is in LZW.pm
+# Ok, having written the encode/decode routines, let's give them a
+# workout via encoding and then decoding a plain text string..
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+
+use lib qw(.); # I hate this!
+use LZW;
+
+my @dict = ( 'A'..'Z' );
+
+lzw_setdict( @dict );
+lzw_setdebug( 0 );
+
+die "Usage: encdecode STRING\n" unless @ARGV==1;
+my $text = shift;
+
+$text .= '#' unless substr($text,length($text)-1) eq '#';
+
+my $binstr = lzw_encode( $text );
+print "\ntext: $text\n" if 0;
+#print " encodes to: $binstr\n";
+my $text2 = lzw_decode( $binstr );
+#print " which decodes to: $text2\n";
+print " encodes+decodes to: $text2\n" if 0;
+die "text $text -> encodes to $binstr -> decodes to $text2\n"
+ unless $text eq $text2;
diff --git a/challenge-022/duncan-c-white/perl5/testlzw.pl b/challenge-022/duncan-c-white/perl5/testlzw.pl
new file mode 100755
index 0000000000..efb86814fc
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/testlzw.pl
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+# compression algorithm. The script should have method to encode/decode
+# algorithm. The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+#
+# My notes: My code is in LZW.pm
+# Ok, having written the encode/decode routines, let's give them a
+# workout..
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+
+use lib qw(.); # I hate this!
+use LZW;
+
+my @dict = ( 'A'..'Z' );
+
+lzw_setdict( @dict );
+lzw_setdebug( 0 );
+
+#
+# my $plain = make_random_string( $len );
+# Make a random string of length $len, using dictionary @dict.
+#
+fun make_random_string( $len )
+{
+ my $result = "";
+ my $dictsize = @dict;
+ foreach my $n (1..$len)
+ {
+ $result .= $dict[ int(rand($dictsize)) ];
+ }
+ return $result;
+}
+
+
+die "Usage: testlzw NSTRINGS LENGTH\n" unless @ARGV==2;
+my $n = shift;
+my $len = shift;
+
+srand( $$ ^ time() );
+
+foreach (1..$n)
+{
+ my $text = make_random_string( $len );
+ my $binstr = lzw_encode( $text );
+ $text .= '#';
+ print "\ntext: $text\n";
+ #print " encodes to: $binstr\n";
+ my $text2 = lzw_decode( $binstr );
+ #print " which decodes to: $text2\n";
+ print " encodes+decodes to: $text2\n" if 0;
+ die "text $text -> encodes to $binstr -> decodes to $text2\n"
+ unless $text eq $text2;
+}