diff options
| author | Jörg Sommrey <28217714+jo-37@users.noreply.github.com> | 2023-03-20 14:18:23 +0100 |
|---|---|---|
| committer | Jörg Sommrey <28217714+jo-37@users.noreply.github.com> | 2023-03-23 18:03:42 +0100 |
| commit | 47a94633bb256f4f2ba93f2e45f25d6b4a40cc09 (patch) | |
| tree | 5b01b8613f0b372deaf965f79104f3078e27c04c | |
| parent | e633a0f727f4eaa5175b1fbd068125c89886825e (diff) | |
| download | perlweeklychallenge-club-47a94633bb256f4f2ba93f2e45f25d6b4a40cc09.tar.gz perlweeklychallenge-club-47a94633bb256f4f2ba93f2e45f25d6b4a40cc09.tar.bz2 perlweeklychallenge-club-47a94633bb256f4f2ba93f2e45f25d6b4a40cc09.zip | |
Challenge 022 task 2
| -rwxr-xr-x | challenge-022/jo-37/perl/ch-2.pl | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/challenge-022/jo-37/perl/ch-2.pl b/challenge-022/jo-37/perl/ch-2.pl new file mode 100755 index 0000000000..56266d2576 --- /dev/null +++ b/challenge-022/jo-37/perl/ch-2.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl -s + +use v5.16; +use Test2::V0; +use Math::Utils qw(log2 ceil); +use experimental qw(signatures postderef); + +our ($tests, $examples, $decompress); + +run_tests() if $tests || $examples; # does not return + +die <<EOS unless @ARGV; +usage: $0 [-examples] [-tests] [-decompress] [STR] + +-examples + run the examples from the challenge + +-tests + run some tests + +-decompress + decompress bitstring + +STR + String made of capital letters (for compression) or 0/1 (for decompression) + +EOS + + +### Input and Output + +say $decompress ? lzw_decompress($ARGV[0]) : lzw_compress($ARGV[0] . '#'); + + +### Implementation + +# Following the description in the Wiki article with '#' as the +# end-of-string marker encoded as zero and a remaining dictionary of 'A' +# - 'Z'. Not packing the resulting bits for better visibility. + +use constant DICT => ('#', 'A' .. 'Z'); + +sub lzw_compress ($s) { + $s =~ tr/[A-Z]#//cd; + my @dict = DICT; + my $outbits; + while () { + # Current code length + my $bits = ceil log2 scalar @dict; + my $dict_ind; + my $len; + # Find the longest dictionary entry matching the string head. + for ($len = 1;; $len++) { + my $found; + for (my $ind = 0; $ind < @dict; $ind++) { + ($dict_ind, $found) = ($ind, 1) + if substr($s, 0, $len) eq $dict[$ind]; + } + last unless $found; + } + # Encode the string head as the found entry. + $outbits .= sprintf "%0*b", $bits, $dict_ind; + # Create a new dictionary entry from the matched head and the + # next character and remove the matched head. + push @dict, substr($s, 0, $len - 1, '') . substr($s, 0, 1); + if (substr($s, 0, 1) eq '#') { + $outbits .= sprintf("%0*b", $bits, 0); + last; + } + } + $outbits; +} + +sub lzw_decompress ($s) { + $s =~ tr/01//cd; + my @dict = DICT; + my $outtext; + while () { + # Current code length + my $bits = ceil log2 scalar @dict; + # Pick bits in the current length and convert to integer. + my $code = oct "0b" . substr $s, 0, $bits, ''; + # Replace a question mark in the previous dictionary entry with + # the start character of the selected entry. + $dict[-1] =~ s/\?/substr $dict[$code], 0, 1/e; + # Add an incomplete new entry to the dictionary. Its last + # character is still unknown. + push @dict, "$dict[$code]?"; + # Collect the dictionary entry in the result. + $outtext .= $dict[$code]; + last if $dict[$code] eq '#'; + } + $outtext; +} + +### Examples and tests + +sub run_tests { + SKIP: { + skip "examples" unless $examples; + + is lzw_compress("TOBEORNOTTOBEORTOBEORNOT#"), + "101000111100010001010111110010001110001111010100011011011101011111100100011110100000100010000000", 'compression example from Wiki'; + is lzw_decompress("101000111100010001010111110010001110001111010100011011011101011111100100011110100000100010000000"), + "TOBEORNOTTOBEORTOBEORNOT#", 'decompression example from Wiki'; + } + + SKIP: { + skip "tests" unless $tests; + } + + done_testing; + exit; +} |
