Merge pull request #557 from dcw803/master

finally got the LZW stuff working (with 5 mins to spare:-)) after…
author: Mohammad S Anwar <Mohammad.Anwar@yahoo.com> 2019-08-26 00:10:19 +0100
committer: GitHub <noreply@github.com> 2019-08-26 00:10:19 +0100
commit: c495bce4c4b9aa87ac694d447a84fd62bb25208d (patch)
tree: 867c0c2d8dd43f16afcd1b67aeb3468b639064b8
parent: 6fe586844ef2d48cab8dc8ff6a5df302148184fb (diff)
parent: fbc975b7d7551368935a6ffaa7f5ea0fbe1927f7 (diff)
download: perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.gz
perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.bz2
perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.zip
7 files changed, 559 insertions, 21 deletions
diff --git a/challenge-022/duncan-c-white/README b/challenge-022/duncan-c-white/README
index bf5b030e87..9abc0d15e2 100644
--- a/challenge-022/duncan-c-white/README
+++ b/challenge-022/duncan-c-white/README
@@ -1,25 +1,20 @@
-Challenge 1: "Write a script to accept a string from command line and
-split it on change of character. For example, if the string is "ABBCDEEF",
-then it should split like 'A', 'BB', 'C', 'D', 'EE', 'F'."
+Challenge 1: "Write a script to print first 10 Sexy Prime Pairs. Sexy
+primes are prime numbers that differ from each other by 6. For example,
+the numbers 5 and 11 are both sexy primes, because 11 - 5 = 6. The term
+"sexy prime" is a pun stemming from the Latin word for six: sex."
 
-My notes: Clearly defined, sounds like a job for regexes.
+My notes: Clearly defined, yet another prime-based task, sounds very
+easy - let's have a go..
 
 
-Challenge 2: "Write a script to print the smallest pair of Amicable Numbers."
+Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+compression algorithm. The script should have method to encode/decode
+algorithm. The wiki page
+https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+explains the compression algorithm very nicely."
 
-Amicable numbers are two different numbers so related that the sum of the
-proper divisors of each is equal to the other number. (A proper divisor
-of a number is a positive factor of that number other than the number
-itself. For example, the proper divisors of 6 are 1, 2, and 3.)
-
-The smallest pair of amicable numbers is (220, 284). They are amicable
-because the proper divisors of 220 are 1, 2, 4, 5, 10, 11, 20, 22, 44,
-55 and 110, of which the sum is 284; and the proper divisors of 284 are 1,
-2, 4, 71 and 142, of which the sum is 220.
-
-The first ten amicable pairs are: (220, 284), (1184, 1210), (2620,
-2924), (5020, 5564), (6232, 6368), (10744, 10856), (12285, 14595),
-(17296, 18416), (63020, 76084), and (66928, 66992)
-
-My notes: Another clearly described problem.  Obvious method involves
-a bit of caching.
+My notes: I read the wiki page, perhaps without concentrating enough.
+Looks complicated, especially the decoding part.  Also, what's not
+quite clear is what initial alphabet both encoding and decoding should
+use?  but let's have a go, hopefully building an encoder will clarify
+most things?
diff --git a/challenge-022/duncan-c-white/perl5/LZW.pm b/challenge-022/duncan-c-white/perl5/LZW.pm
new file mode 100644
index 0000000000..5dac1ca36e
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/LZW.pm
@@ -0,0 +1,251 @@
+#
+# Lempel-Ziv-Welch (LZW) compression algorithm: encoding and decoding using
+# a common dictionary.
+#
+# The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+# 
+# My notes: I read the wiki page, perhaps without concentrating enough.
+# Looks complicated, especially the decoding part.  Also, what's not
+# quite clear is what initial alphabet both encoding and decoding should
+# use?  but let's have a go anyway, hopefully building an encoder will
+# start to clarify most things?
+#
+# Update: the encoding was very straight forward, but I struggled for
+# several hours to get my head around the decoding - especially the
+# special case described in the wikipedia page where the dictionary
+# doesn't contain the entry.  The description was NOT CLEAR ENOUGH
+# in that case (and giving an example text where the special case
+# applied would have helped a lot, eg TOTOTOT was the shortest I found)
+#
+# Eventually, with time running out, I checked some Rosetta code
+# implementations, discovered what pseudo-code the description
+# really mapped onto, and adapted it to fit my code..
+# 
+
+use strict;
+use warnings;
+use Function::Parameters;
+use Data::Dumper;
+
+
+my @dict = ( '#', 'A'..'Z' );	# LZW dictionary
+my $debug = 0;			# debugging
+
+
+#
+# lzw_setdict( @d );
+#	Set the dictionary to @d, before encoding and decoding
+#
+fun lzw_setdict( @d )
+{
+	@dict = ( '#', @d );
+}
+
+
+#
+# lzw_setdebug( $d );
+#	Set the debug flag to $d, before encoding and decoding
+#
+fun lzw_setdebug( $d )
+{
+	$debug = $d;
+}
+
+
+#
+# my( $x, $p ) = np2( $n );
+#	Find the first 2^x >= $n, return x and 2^x.
+#	eg np2(1023)=10,1024, np2(1024)=10,1024, np2(1025)=11,2048
+#
+fun np2( $n )
+{
+	my $x = 1;
+	my $p = 2;
+	while( $p < $n )
+	{
+		$p *= 2;
+		$x++;
+	}
+	return ( $x, $p );
+}
+
+
+#
+# my $binstr = lzw_encode( $text );
+#	LZW encode $text, some plain text, using a dictionary containing
+#	@dictletters.  Return the encoded binary string (a sequence of 0s and 1s).
+#
+fun lzw_encode( $text )
+{
+	my $lastpos = length($text)-1;
+	$text .= '#' unless substr($text,$lastpos,1) eq '#';
+
+	my( $width, $transition ) = np2( scalar(@dict) );
+	#die "width=$width, transition=$transition\n";
+
+	my %indict = map { $dict[$_] => $_ } 0..$#dict;
+	my @newdict = @dict;
+
+	my @letter = split(//,$text);
+	foreach my $pos (0..@letter-1)
+	{
+		my $letter = $letter[$pos];
+		die "encode: letter $letter not in newdict\n"
+			unless defined $indict{$letter};
+		die "encode: letter $letter is the stop code!\n"
+			if $letter eq '#' && $pos<@letter-1;
+	}
+
+	my $result = "";
+
+	my $seq = shift @letter;		# first letter
+
+	foreach my $letter (@letter)		# rest of the letters
+	{
+		die "encode: logic error, seq $seq not in dict\n"
+			unless defined $indict{$seq};
+
+		print "encode: seq=$seq, letter=$letter\n" if $debug;
+
+		if( defined $indict{$seq.$letter} )	   # extend sequence
+		{
+			$seq .= $letter;
+			next;
+		}
+
+		# otherwise: seq is in dict, but seq.letter is not
+		print "$seq in dict; but ",$seq.$letter, " is not in dict\n"
+			if $debug;
+		my $pos = $indict{$seq};
+		my $b = sprintf( "%0${width}b", $pos );
+		
+		# emit $b
+		print "emit $b for $seq @ $pos\n" if $debug;
+		$result .= $b;
+		$result .= '-' if $debug;
+
+		# append $seq.$letter to the dictionary
+		$pos = @newdict;
+		$seq .= $letter;
+		push @newdict, $seq;
+		$indict{$seq} = $pos;
+		print "adding $seq to dict at $pos\n" if $debug;
+
+		if( $pos == $transition )	# need wider binary strings
+		{
+			$width++;
+			$transition *= 2;
+			print "increasing width to $width, ".
+			      "transition to $transition\n"
+				if $debug;
+		}
+
+		# start again with seq = letter
+		$seq = $letter;
+	}
+
+	# handle the final seq (should be something ending in the stop code)
+	my $pos = $indict{$seq};
+	my $b = sprintf( "%0${width}b", $pos );
+	
+	# emit $b
+	print "emit $b for $seq @ $pos\n" if $debug;
+	$result .= "$b";
+
+	return $result;
+}
+
+
+#
+# my $text = lzw_decode( $binstr );
+#	LZW decode $binstr, a string of 0s and 1s that represents a LZW
+#	compression of an unknown text, using a dictionary containing
+#	@dictletters.  Return the decoded text string (hopefully plain text).
+#
+fun lzw_decode( $binstr )
+{
+	my( $width, $transition ) = np2( scalar(@dict) );
+	#die "width=$width, transition=$transition\n";
+
+	my @newdict = @dict;
+	my %indict = map { $dict[$_] => $_ } 0..$#dict;
+
+	# remove the very first width $width bit-prefix
+	my $b = substr( $binstr, 0, $width, "" );
+
+	# find the position in the dictionary
+	my $pos = oct( "0b$b" );	# convert binary string->int
+	my $ndict = @newdict;
+	print "decoding first $b, pos=$pos, ndict=$ndict\n" if $debug;
+
+
+	die "LZW_decode: first bin $b (pos $pos) not in dict, $ndict entries\n" unless $pos<$ndict;
+
+	my $prevf = $newdict[$pos];	# previous text fragment
+
+	my $result = $prevf;
+
+	while( $binstr )
+	{
+		# remove next width $width bit-prefix
+		my $b = substr( $binstr, 0, $width, "" );
+
+		print "decoding $b, prevf=$prevf\n" if $debug;
+
+		# find the position in the dictionary
+		my $pos = oct( "0b$b" );	# convert binary string->int
+
+		my $ndict = @newdict;
+
+		my $f;		# current decoded text fragment
+
+		# if pos in dict?
+		if( $pos < $ndict )
+		{
+			$f = $newdict[$pos];
+		} elsif( $pos == $ndict )
+		{
+			$f = $prevf . substr($prevf,0,1);
+		} else
+		{
+			die "decode: bad pos $pos (dictionary has $ndict ".
+			    "entries) result=$result, b=$b, ".
+			    "binstr=$binstr)\n";
+		}
+
+		# ok, so $b represents text frag $f
+		print "b $b, pos=$pos, prevf=$prevf, f=$f\n" if $debug;
+
+		$result .= $f;
+
+		# the next entry in the dictionary must be the WHOLE of
+		# the previous fragment plus the FIRST letter of the
+		# current fragment $f
+		my $new = $prevf . substr($f,0,1);
+
+		$pos = @newdict;
+		print "added $new @ pos $pos\n" if $debug;
+
+		# add it to the dictionary
+		push @newdict, $new;
+		$indict{$new} = $pos;
+
+		# do we need to change the width?
+		if( @newdict == $transition )
+		{
+			$width++;
+			$transition *= 2;
+			print "increasing width to $width, ".
+			      "transition to $transition\n"
+				if $debug;
+		}
+
+		$prevf = $f;
+	}
+	return $result;
+}
+
+
+1;
diff --git a/challenge-022/duncan-c-white/perl5/MakePrimes.pm b/challenge-022/duncan-c-white/perl5/MakePrimes.pm
new file mode 100644
index 0000000000..2f52150e73
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/MakePrimes.pm
@@ -0,0 +1,90 @@
+#
+#	mkprimes module (converted from mkprimes.c)
+#
+
+use strict;
+use warnings;
+use Function::Parameters;
+
+
+my @foundprimes;	# remember all primes we've found..
+
+
+#
+# my @primes = primes_upto( $n );
+#	Find all primes up to N; return a list of all such primes
+#	(note that 1 is not usually considered a prime)
+#
+fun primes_upto( $n )
+{
+	my @isprime;
+
+	for( my $i=1; $i<=$n; $i++ )
+	{
+		$isprime[$i] = 1;		# initially
+	}
+
+	# now sieve the non-primes out..
+	my $upper = int(sqrt($n));
+	printf( "debug: n=%d, upper=%d\n", $n, $upper );
+	for( my $i=2; $i<=$upper; $i++ )
+	{
+		if( $isprime[$i] )
+		{
+			#printf( "debug: crossing out multiples of %d\n", $i );
+			for( my $j=$i*$i; $j<=$n; $j+=$i )
+			{
+				$isprime[$j] = 0;
+			}
+		}
+	}
+
+	# after sieving, extract the primes
+	my @primes = grep { $isprime[$_] } 2..$n;
+
+	# remember them
+	@foundprimes = @primes;
+
+	return @primes;
+}
+
+
+#
+# my @moreprimes = more_primes( $n, $m );
+#	Need more primes!  Have @foundprimes up to $n, but need
+#	to sieve primes from $n+1..$m, so re-sieve, return
+#	a list of all new primes (in the range $n+1..$m) that we find.
+#
+fun more_primes( $n, $m )
+{
+	my %isprime;
+
+	print "finding more primes from ", $n+1, "..$m\n";
+
+	for( my $i=$n+1; $i<=$m; $i++ )
+	{
+		$isprime{$i} = 1;	# pre-sieving
+	}
+
+	# now sieve the non-primes out..
+	foreach my $prime (@foundprimes)
+	{
+		# find first multiple of $prime > $n
+		my $mult = $prime * (int($n/$prime)+1);
+
+		#print "debug: xo multiples of $prime from $mult to $m\n";
+
+		for( my $j=$mult; $j<=$m; $j+=$prime )
+		{
+			delete $isprime{$j};
+		}
+	}
+
+	# after sieving, extract the primes
+	my @primes = grep { $isprime{$_} } $n+1..$m;
+	push @foundprimes, @primes;
+	return @primes;
+}
+
+
+1;
diff --git a/challenge-022/duncan-c-white/perl5/ch-1.pl b/challenge-022/duncan-c-white/perl5/ch-1.pl
new file mode 100755
index 0000000000..53dc176926
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/ch-1.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl
+#
+# Challenge 1: "Write a script to print first 10 Sexy Prime Pairs. Sexy
+# primes are prime numbers that differ from each other by 6. For example,
+# the numbers 5 and 11 are both sexy primes, because 11 - 5 = 6. The term
+# "sexy prime" is a pun stemming from the Latin word for six: sex."
+# 
+# My notes: Clearly defined, yet another prime-based task, sounds very
+# easy - let's have a go..  easy source of prime numbers is via my
+# MakePrimes.pm module (itself converted from my earlier mkprimes.c)
+# 
+
+use strict;
+use warnings;
+use Function::Parameters;
+#use Data::Dumper;
+
+use lib qw(.);	# I hate this
+use MakePrimes;
+
+die "Usage: ch-1.pl [NUMSEXY]\n" if @ARGV>1;
+my $wanted = shift // 10;
+
+my $found = find_n_sexy_prime_pairs( $wanted );
+
+
+#
+# my $found = find_n_sexy_prime_pairs( $n );
+#	Find $n sexy prime pairs.. return number of pairs found,
+#	$found should always == $n
+#
+fun find_n_sexy_prime_pairs( $n )
+{
+	my %isprime;
+	my $found = 0;
+
+	my $w = 40000;
+	my $from = $w;
+	my @prime = primes_upto( $w );
+
+	for(;;)
+	{
+		print "searching for sexy primes in ", scalar(@prime), " primes\n";
+		foreach my $prime (@prime)
+		{
+			$isprime{$prime}++;
+			if( $prime > 6 && $isprime{$prime-6} )
+			{
+				$found++;
+				print "found sexy prime pair $found: ", $prime-6, " and $prime\n";
+				if( $wanted == $found )
+				{
+					print "found $found sexy prime pairs\n";
+					return $found;
+				}
+			}
+		}
+		print "found $found sexy prime pairs in pass, want $wanted\n";
+		@prime = more_primes( $from, $from+$w );
+		$from += $w;
+	}
+}
diff --git a/challenge-022/duncan-c-white/perl5/ch-2.pl b/challenge-022/duncan-c-white/perl5/ch-2.pl
new file mode 100755
index 0000000000..ab43ff181b
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/ch-2.pl
@@ -0,0 +1,42 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+# compression algorithm. The script should have method to encode/decode
+# algorithm. The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+# 
+# My notes: I read the wiki page, perhaps without concentrating enough.
+# Looks complicated, especially the decoding part.  My code is in LZW.pm
+# 
+
+use strict;
+use warnings;
+
+use lib qw(.);	# I hate this!
+use LZW;
+
+my @dict = ( 'A'..'Z' );
+
+lzw_setdict( @dict );
+lzw_setdebug( 1 );
+
+die "Usage: ch-2.pl E TEXT\nor   : ch-2.pl D binarystring\n" unless @ARGV==2;
+my $op = shift;
+
+die "ch-2.pl: bad encode/decode op $op\n" unless $op eq "E" || $op eq "D";
+
+my $arg = shift;
+
+#die "debug: np2(1023)==", np2(1023), "\nnp2(1024)==", np2(1024), "\nnp2(1025)==", np2(1025);
+
+if( $op eq "E" )
+{
+	my $binstr = lzw_encode( $arg );
+	print "$binstr\n";
+}
+elsif( $op eq "D" )
+{
+	my $text = lzw_decode( $arg );
+	print "$text\n";
+}
diff --git a/challenge-022/duncan-c-white/perl5/encdecode.pl b/challenge-022/duncan-c-white/perl5/encdecode.pl
new file mode 100755
index 0000000000..24e7916c4a
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/encdecode.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+# compression algorithm. The script should have method to encode/decode
+# algorithm. The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+# 
+# My notes: My code is in LZW.pm
+# Ok, having written the encode/decode routines, let's give them a
+# workout via encoding and then decoding a plain text string..
+# 
+
+use strict;
+use warnings;
+use Function::Parameters;
+
+use lib qw(.);	# I hate this!
+use LZW;
+
+my @dict = ( 'A'..'Z' );
+
+lzw_setdict( @dict );
+lzw_setdebug( 0 );
+
+die "Usage: encdecode STRING\n" unless @ARGV==1;
+my $text = shift;
+
+$text .= '#' unless substr($text,length($text)-1) eq '#';
+
+my $binstr = lzw_encode( $text );
+print "\ntext: $text\n" if 0;
+#print "  encodes to: $binstr\n";
+my $text2 = lzw_decode( $binstr );
+#print "  which decodes to: $text2\n";
+print "  encodes+decodes to: $text2\n" if 0;
+die "text $text -> encodes to $binstr -> decodes to $text2\n"
+	unless $text eq $text2;
diff --git a/challenge-022/duncan-c-white/perl5/testlzw.pl b/challenge-022/duncan-c-white/perl5/testlzw.pl
new file mode 100755
index 0000000000..efb86814fc
--- /dev/null
+++ b/challenge-022/duncan-c-white/perl5/testlzw.pl
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+#
+# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW)
+# compression algorithm. The script should have method to encode/decode
+# algorithm. The wiki page
+# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
+# explains the compression algorithm very nicely."
+# 
+# My notes: My code is in LZW.pm
+# Ok, having written the encode/decode routines, let's give them a
+# workout..
+# 
+
+use strict;
+use warnings;
+use Function::Parameters;
+
+use lib qw(.);	# I hate this!
+use LZW;
+
+my @dict = ( 'A'..'Z' );
+
+lzw_setdict( @dict );
+lzw_setdebug( 0 );
+
+#
+# my $plain = make_random_string( $len );
+#	Make a random string of length $len, using dictionary @dict.
+#
+fun make_random_string( $len )
+{
+	my $result = "";
+	my $dictsize = @dict;
+	foreach my $n (1..$len)
+	{
+		$result .= $dict[ int(rand($dictsize)) ];
+	}
+	return $result;
+}
+
+
+die "Usage: testlzw NSTRINGS LENGTH\n" unless @ARGV==2;
+my $n = shift;
+my $len = shift;
+
+srand( $$ ^ time() );
+
+foreach (1..$n)
+{
+	my $text = make_random_string( $len );
+	my $binstr = lzw_encode( $text );
+	$text .= '#';
+	print "\ntext: $text\n";
+	#print "  encodes to: $binstr\n";
+	my $text2 = lzw_decode( $binstr );
+	#print "  which decodes to: $text2\n";
+	print "  encodes+decodes to: $text2\n" if 0;
+	die "text $text -> encodes to $binstr -> decodes to $text2\n"
+		unless $text eq $text2;
+}
author	Mohammad S Anwar <Mohammad.Anwar@yahoo.com>	2019-08-26 00:10:19 +0100
committer	GitHub <noreply@github.com>	2019-08-26 00:10:19 +0100
commit	c495bce4c4b9aa87ac694d447a84fd62bb25208d (patch)
tree	867c0c2d8dd43f16afcd1b67aeb3468b639064b8
parent	6fe586844ef2d48cab8dc8ff6a5df302148184fb (diff)
parent	fbc975b7d7551368935a6ffaa7f5ea0fbe1927f7 (diff)
download	perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.gz perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.bz2 perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.zip