diff options
| author | Mohammad S Anwar <Mohammad.Anwar@yahoo.com> | 2019-08-26 00:10:19 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-08-26 00:10:19 +0100 |
| commit | c495bce4c4b9aa87ac694d447a84fd62bb25208d (patch) | |
| tree | 867c0c2d8dd43f16afcd1b67aeb3468b639064b8 | |
| parent | 6fe586844ef2d48cab8dc8ff6a5df302148184fb (diff) | |
| parent | fbc975b7d7551368935a6ffaa7f5ea0fbe1927f7 (diff) | |
| download | perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.gz perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.tar.bz2 perlweeklychallenge-club-c495bce4c4b9aa87ac694d447a84fd62bb25208d.zip | |
Merge pull request #557 from dcw803/master
finally got the LZW stuff working (with 5 mins to spare:-)) after…
| -rw-r--r-- | challenge-022/duncan-c-white/README | 37 | ||||
| -rw-r--r-- | challenge-022/duncan-c-white/perl5/LZW.pm | 251 | ||||
| -rw-r--r-- | challenge-022/duncan-c-white/perl5/MakePrimes.pm | 90 | ||||
| -rwxr-xr-x | challenge-022/duncan-c-white/perl5/ch-1.pl | 62 | ||||
| -rwxr-xr-x | challenge-022/duncan-c-white/perl5/ch-2.pl | 42 | ||||
| -rwxr-xr-x | challenge-022/duncan-c-white/perl5/encdecode.pl | 38 | ||||
| -rwxr-xr-x | challenge-022/duncan-c-white/perl5/testlzw.pl | 60 |
7 files changed, 559 insertions, 21 deletions
diff --git a/challenge-022/duncan-c-white/README b/challenge-022/duncan-c-white/README index bf5b030e87..9abc0d15e2 100644 --- a/challenge-022/duncan-c-white/README +++ b/challenge-022/duncan-c-white/README @@ -1,25 +1,20 @@ -Challenge 1: "Write a script to accept a string from command line and -split it on change of character. For example, if the string is "ABBCDEEF", -then it should split like 'A', 'BB', 'C', 'D', 'EE', 'F'." +Challenge 1: "Write a script to print first 10 Sexy Prime Pairs. Sexy +primes are prime numbers that differ from each other by 6. For example, +the numbers 5 and 11 are both sexy primes, because 11 - 5 = 6. The term +"sexy prime" is a pun stemming from the Latin word for six: sex." -My notes: Clearly defined, sounds like a job for regexes. +My notes: Clearly defined, yet another prime-based task, sounds very +easy - let's have a go.. -Challenge 2: "Write a script to print the smallest pair of Amicable Numbers." +Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW) +compression algorithm. The script should have method to encode/decode +algorithm. The wiki page +https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch +explains the compression algorithm very nicely." -Amicable numbers are two different numbers so related that the sum of the -proper divisors of each is equal to the other number. (A proper divisor -of a number is a positive factor of that number other than the number -itself. For example, the proper divisors of 6 are 1, 2, and 3.) - -The smallest pair of amicable numbers is (220, 284). They are amicable -because the proper divisors of 220 are 1, 2, 4, 5, 10, 11, 20, 22, 44, -55 and 110, of which the sum is 284; and the proper divisors of 284 are 1, -2, 4, 71 and 142, of which the sum is 220. - -The first ten amicable pairs are: (220, 284), (1184, 1210), (2620, -2924), (5020, 5564), (6232, 6368), (10744, 10856), (12285, 14595), -(17296, 18416), (63020, 76084), and (66928, 66992) - -My notes: Another clearly described problem. Obvious method involves -a bit of caching. +My notes: I read the wiki page, perhaps without concentrating enough. +Looks complicated, especially the decoding part. Also, what's not +quite clear is what initial alphabet both encoding and decoding should +use? but let's have a go, hopefully building an encoder will clarify +most things? diff --git a/challenge-022/duncan-c-white/perl5/LZW.pm b/challenge-022/duncan-c-white/perl5/LZW.pm new file mode 100644 index 0000000000..5dac1ca36e --- /dev/null +++ b/challenge-022/duncan-c-white/perl5/LZW.pm @@ -0,0 +1,251 @@ +# +# Lempel-Ziv-Welch (LZW) compression algorithm: encoding and decoding using +# a common dictionary. +# +# The wiki page +# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch +# explains the compression algorithm very nicely." +# +# My notes: I read the wiki page, perhaps without concentrating enough. +# Looks complicated, especially the decoding part. Also, what's not +# quite clear is what initial alphabet both encoding and decoding should +# use? but let's have a go anyway, hopefully building an encoder will +# start to clarify most things? +# +# Update: the encoding was very straight forward, but I struggled for +# several hours to get my head around the decoding - especially the +# special case described in the wikipedia page where the dictionary +# doesn't contain the entry. The description was NOT CLEAR ENOUGH +# in that case (and giving an example text where the special case +# applied would have helped a lot, eg TOTOTOT was the shortest I found) +# +# Eventually, with time running out, I checked some Rosetta code +# implementations, discovered what pseudo-code the description +# really mapped onto, and adapted it to fit my code.. +# + +use strict; +use warnings; +use Function::Parameters; +use Data::Dumper; + + +my @dict = ( '#', 'A'..'Z' ); # LZW dictionary +my $debug = 0; # debugging + + +# +# lzw_setdict( @d ); +# Set the dictionary to @d, before encoding and decoding +# +fun lzw_setdict( @d ) +{ + @dict = ( '#', @d ); +} + + +# +# lzw_setdebug( $d ); +# Set the debug flag to $d, before encoding and decoding +# +fun lzw_setdebug( $d ) +{ + $debug = $d; +} + + +# +# my( $x, $p ) = np2( $n ); +# Find the first 2^x >= $n, return x and 2^x. +# eg np2(1023)=10,1024, np2(1024)=10,1024, np2(1025)=11,2048 +# +fun np2( $n ) +{ + my $x = 1; + my $p = 2; + while( $p < $n ) + { + $p *= 2; + $x++; + } + return ( $x, $p ); +} + + +# +# my $binstr = lzw_encode( $text ); +# LZW encode $text, some plain text, using a dictionary containing +# @dictletters. Return the encoded binary string (a sequence of 0s and 1s). +# +fun lzw_encode( $text ) +{ + my $lastpos = length($text)-1; + $text .= '#' unless substr($text,$lastpos,1) eq '#'; + + my( $width, $transition ) = np2( scalar(@dict) ); + #die "width=$width, transition=$transition\n"; + + my %indict = map { $dict[$_] => $_ } 0..$#dict; + my @newdict = @dict; + + my @letter = split(//,$text); + foreach my $pos (0..@letter-1) + { + my $letter = $letter[$pos]; + die "encode: letter $letter not in newdict\n" + unless defined $indict{$letter}; + die "encode: letter $letter is the stop code!\n" + if $letter eq '#' && $pos<@letter-1; + } + + my $result = ""; + + my $seq = shift @letter; # first letter + + foreach my $letter (@letter) # rest of the letters + { + die "encode: logic error, seq $seq not in dict\n" + unless defined $indict{$seq}; + + print "encode: seq=$seq, letter=$letter\n" if $debug; + + if( defined $indict{$seq.$letter} ) # extend sequence + { + $seq .= $letter; + next; + } + + # otherwise: seq is in dict, but seq.letter is not + print "$seq in dict; but ",$seq.$letter, " is not in dict\n" + if $debug; + my $pos = $indict{$seq}; + my $b = sprintf( "%0${width}b", $pos ); + + # emit $b + print "emit $b for $seq @ $pos\n" if $debug; + $result .= $b; + $result .= '-' if $debug; + + # append $seq.$letter to the dictionary + $pos = @newdict; + $seq .= $letter; + push @newdict, $seq; + $indict{$seq} = $pos; + print "adding $seq to dict at $pos\n" if $debug; + + if( $pos == $transition ) # need wider binary strings + { + $width++; + $transition *= 2; + print "increasing width to $width, ". + "transition to $transition\n" + if $debug; + } + + # start again with seq = letter + $seq = $letter; + } + + # handle the final seq (should be something ending in the stop code) + my $pos = $indict{$seq}; + my $b = sprintf( "%0${width}b", $pos ); + + # emit $b + print "emit $b for $seq @ $pos\n" if $debug; + $result .= "$b"; + + return $result; +} + + +# +# my $text = lzw_decode( $binstr ); +# LZW decode $binstr, a string of 0s and 1s that represents a LZW +# compression of an unknown text, using a dictionary containing +# @dictletters. Return the decoded text string (hopefully plain text). +# +fun lzw_decode( $binstr ) +{ + my( $width, $transition ) = np2( scalar(@dict) ); + #die "width=$width, transition=$transition\n"; + + my @newdict = @dict; + my %indict = map { $dict[$_] => $_ } 0..$#dict; + + # remove the very first width $width bit-prefix + my $b = substr( $binstr, 0, $width, "" ); + + # find the position in the dictionary + my $pos = oct( "0b$b" ); # convert binary string->int + my $ndict = @newdict; + print "decoding first $b, pos=$pos, ndict=$ndict\n" if $debug; + + + die "LZW_decode: first bin $b (pos $pos) not in dict, $ndict entries\n" unless $pos<$ndict; + + my $prevf = $newdict[$pos]; # previous text fragment + + my $result = $prevf; + + while( $binstr ) + { + # remove next width $width bit-prefix + my $b = substr( $binstr, 0, $width, "" ); + + print "decoding $b, prevf=$prevf\n" if $debug; + + # find the position in the dictionary + my $pos = oct( "0b$b" ); # convert binary string->int + + my $ndict = @newdict; + + my $f; # current decoded text fragment + + # if pos in dict? + if( $pos < $ndict ) + { + $f = $newdict[$pos]; + } elsif( $pos == $ndict ) + { + $f = $prevf . substr($prevf,0,1); + } else + { + die "decode: bad pos $pos (dictionary has $ndict ". + "entries) result=$result, b=$b, ". + "binstr=$binstr)\n"; + } + + # ok, so $b represents text frag $f + print "b $b, pos=$pos, prevf=$prevf, f=$f\n" if $debug; + + $result .= $f; + + # the next entry in the dictionary must be the WHOLE of + # the previous fragment plus the FIRST letter of the + # current fragment $f + my $new = $prevf . substr($f,0,1); + + $pos = @newdict; + print "added $new @ pos $pos\n" if $debug; + + # add it to the dictionary + push @newdict, $new; + $indict{$new} = $pos; + + # do we need to change the width? + if( @newdict == $transition ) + { + $width++; + $transition *= 2; + print "increasing width to $width, ". + "transition to $transition\n" + if $debug; + } + + $prevf = $f; + } + return $result; +} + + +1; diff --git a/challenge-022/duncan-c-white/perl5/MakePrimes.pm b/challenge-022/duncan-c-white/perl5/MakePrimes.pm new file mode 100644 index 0000000000..2f52150e73 --- /dev/null +++ b/challenge-022/duncan-c-white/perl5/MakePrimes.pm @@ -0,0 +1,90 @@ +# +# mkprimes module (converted from mkprimes.c) +# + +use strict; +use warnings; +use Function::Parameters; + + +my @foundprimes; # remember all primes we've found.. + + +# +# my @primes = primes_upto( $n ); +# Find all primes up to N; return a list of all such primes +# (note that 1 is not usually considered a prime) +# +fun primes_upto( $n ) +{ + my @isprime; + + for( my $i=1; $i<=$n; $i++ ) + { + $isprime[$i] = 1; # initially + } + + # now sieve the non-primes out.. + my $upper = int(sqrt($n)); + printf( "debug: n=%d, upper=%d\n", $n, $upper ); + for( my $i=2; $i<=$upper; $i++ ) + { + if( $isprime[$i] ) + { + #printf( "debug: crossing out multiples of %d\n", $i ); + for( my $j=$i*$i; $j<=$n; $j+=$i ) + { + $isprime[$j] = 0; + } + } + } + + # after sieving, extract the primes + my @primes = grep { $isprime[$_] } 2..$n; + + # remember them + @foundprimes = @primes; + + return @primes; +} + + +# +# my @moreprimes = more_primes( $n, $m ); +# Need more primes! Have @foundprimes up to $n, but need +# to sieve primes from $n+1..$m, so re-sieve, return +# a list of all new primes (in the range $n+1..$m) that we find. +# +fun more_primes( $n, $m ) +{ + my %isprime; + + print "finding more primes from ", $n+1, "..$m\n"; + + for( my $i=$n+1; $i<=$m; $i++ ) + { + $isprime{$i} = 1; # pre-sieving + } + + # now sieve the non-primes out.. + foreach my $prime (@foundprimes) + { + # find first multiple of $prime > $n + my $mult = $prime * (int($n/$prime)+1); + + #print "debug: xo multiples of $prime from $mult to $m\n"; + + for( my $j=$mult; $j<=$m; $j+=$prime ) + { + delete $isprime{$j}; + } + } + + # after sieving, extract the primes + my @primes = grep { $isprime{$_} } $n+1..$m; + push @foundprimes, @primes; + return @primes; +} + + +1; diff --git a/challenge-022/duncan-c-white/perl5/ch-1.pl b/challenge-022/duncan-c-white/perl5/ch-1.pl new file mode 100755 index 0000000000..53dc176926 --- /dev/null +++ b/challenge-022/duncan-c-white/perl5/ch-1.pl @@ -0,0 +1,62 @@ +#!/usr/bin/perl +# +# Challenge 1: "Write a script to print first 10 Sexy Prime Pairs. Sexy +# primes are prime numbers that differ from each other by 6. For example, +# the numbers 5 and 11 are both sexy primes, because 11 - 5 = 6. The term +# "sexy prime" is a pun stemming from the Latin word for six: sex." +# +# My notes: Clearly defined, yet another prime-based task, sounds very +# easy - let's have a go.. easy source of prime numbers is via my +# MakePrimes.pm module (itself converted from my earlier mkprimes.c) +# + +use strict; +use warnings; +use Function::Parameters; +#use Data::Dumper; + +use lib qw(.); # I hate this +use MakePrimes; + +die "Usage: ch-1.pl [NUMSEXY]\n" if @ARGV>1; +my $wanted = shift // 10; + +my $found = find_n_sexy_prime_pairs( $wanted ); + + +# +# my $found = find_n_sexy_prime_pairs( $n ); +# Find $n sexy prime pairs.. return number of pairs found, +# $found should always == $n +# +fun find_n_sexy_prime_pairs( $n ) +{ + my %isprime; + my $found = 0; + + my $w = 40000; + my $from = $w; + my @prime = primes_upto( $w ); + + for(;;) + { + print "searching for sexy primes in ", scalar(@prime), " primes\n"; + foreach my $prime (@prime) + { + $isprime{$prime}++; + if( $prime > 6 && $isprime{$prime-6} ) + { + $found++; + print "found sexy prime pair $found: ", $prime-6, " and $prime\n"; + if( $wanted == $found ) + { + print "found $found sexy prime pairs\n"; + return $found; + } + } + } + print "found $found sexy prime pairs in pass, want $wanted\n"; + @prime = more_primes( $from, $from+$w ); + $from += $w; + } +} diff --git a/challenge-022/duncan-c-white/perl5/ch-2.pl b/challenge-022/duncan-c-white/perl5/ch-2.pl new file mode 100755 index 0000000000..ab43ff181b --- /dev/null +++ b/challenge-022/duncan-c-white/perl5/ch-2.pl @@ -0,0 +1,42 @@ +#!/usr/bin/perl +# +# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW) +# compression algorithm. The script should have method to encode/decode +# algorithm. The wiki page +# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch +# explains the compression algorithm very nicely." +# +# My notes: I read the wiki page, perhaps without concentrating enough. +# Looks complicated, especially the decoding part. My code is in LZW.pm +# + +use strict; +use warnings; + +use lib qw(.); # I hate this! +use LZW; + +my @dict = ( 'A'..'Z' ); + +lzw_setdict( @dict ); +lzw_setdebug( 1 ); + +die "Usage: ch-2.pl E TEXT\nor : ch-2.pl D binarystring\n" unless @ARGV==2; +my $op = shift; + +die "ch-2.pl: bad encode/decode op $op\n" unless $op eq "E" || $op eq "D"; + +my $arg = shift; + +#die "debug: np2(1023)==", np2(1023), "\nnp2(1024)==", np2(1024), "\nnp2(1025)==", np2(1025); + +if( $op eq "E" ) +{ + my $binstr = lzw_encode( $arg ); + print "$binstr\n"; +} +elsif( $op eq "D" ) +{ + my $text = lzw_decode( $arg ); + print "$text\n"; +} diff --git a/challenge-022/duncan-c-white/perl5/encdecode.pl b/challenge-022/duncan-c-white/perl5/encdecode.pl new file mode 100755 index 0000000000..24e7916c4a --- /dev/null +++ b/challenge-022/duncan-c-white/perl5/encdecode.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl +# +# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW) +# compression algorithm. The script should have method to encode/decode +# algorithm. The wiki page +# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch +# explains the compression algorithm very nicely." +# +# My notes: My code is in LZW.pm +# Ok, having written the encode/decode routines, let's give them a +# workout via encoding and then decoding a plain text string.. +# + +use strict; +use warnings; +use Function::Parameters; + +use lib qw(.); # I hate this! +use LZW; + +my @dict = ( 'A'..'Z' ); + +lzw_setdict( @dict ); +lzw_setdebug( 0 ); + +die "Usage: encdecode STRING\n" unless @ARGV==1; +my $text = shift; + +$text .= '#' unless substr($text,length($text)-1) eq '#'; + +my $binstr = lzw_encode( $text ); +print "\ntext: $text\n" if 0; +#print " encodes to: $binstr\n"; +my $text2 = lzw_decode( $binstr ); +#print " which decodes to: $text2\n"; +print " encodes+decodes to: $text2\n" if 0; +die "text $text -> encodes to $binstr -> decodes to $text2\n" + unless $text eq $text2; diff --git a/challenge-022/duncan-c-white/perl5/testlzw.pl b/challenge-022/duncan-c-white/perl5/testlzw.pl new file mode 100755 index 0000000000..efb86814fc --- /dev/null +++ b/challenge-022/duncan-c-white/perl5/testlzw.pl @@ -0,0 +1,60 @@ +#!/usr/bin/perl +# +# Challenge 2: "Write a script to implement Lempel-Ziv-Welch (LZW) +# compression algorithm. The script should have method to encode/decode +# algorithm. The wiki page +# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch +# explains the compression algorithm very nicely." +# +# My notes: My code is in LZW.pm +# Ok, having written the encode/decode routines, let's give them a +# workout.. +# + +use strict; +use warnings; +use Function::Parameters; + +use lib qw(.); # I hate this! +use LZW; + +my @dict = ( 'A'..'Z' ); + +lzw_setdict( @dict ); +lzw_setdebug( 0 ); + +# +# my $plain = make_random_string( $len ); +# Make a random string of length $len, using dictionary @dict. +# +fun make_random_string( $len ) +{ + my $result = ""; + my $dictsize = @dict; + foreach my $n (1..$len) + { + $result .= $dict[ int(rand($dictsize)) ]; + } + return $result; +} + + +die "Usage: testlzw NSTRINGS LENGTH\n" unless @ARGV==2; +my $n = shift; +my $len = shift; + +srand( $$ ^ time() ); + +foreach (1..$n) +{ + my $text = make_random_string( $len ); + my $binstr = lzw_encode( $text ); + $text .= '#'; + print "\ntext: $text\n"; + #print " encodes to: $binstr\n"; + my $text2 = lzw_decode( $binstr ); + #print " which decodes to: $text2\n"; + print " encodes+decodes to: $text2\n" if 0; + die "text $text -> encodes to $binstr -> decodes to $text2\n" + unless $text eq $text2; +} |
