aboutsummaryrefslogtreecommitdiff
path: root/challenge-022/jo-37/perl/ch-2.pl
blob: 56266d25762cde0e45055218c580820ec98a3498 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/perl -s

use v5.16;
use Test2::V0;
use Math::Utils qw(log2 ceil);
use experimental qw(signatures postderef);

our ($tests, $examples, $decompress);

run_tests() if $tests || $examples;	# does not return

die <<EOS unless @ARGV;
usage: $0 [-examples] [-tests] [-decompress] [STR]

-examples
    run the examples from the challenge
 
-tests
    run some tests

-decompress
    decompress bitstring

STR
    String made of capital letters (for compression) or 0/1 (for decompression)

EOS


### Input and Output

say $decompress ? lzw_decompress($ARGV[0]) : lzw_compress($ARGV[0] . '#');


### Implementation

# Following the description in the Wiki article with '#' as the
# end-of-string marker encoded as zero and a remaining dictionary of 'A'
# - 'Z'.  Not packing the resulting bits for better visibility.

use constant DICT => ('#', 'A' .. 'Z');

sub lzw_compress ($s) {
    $s =~ tr/[A-Z]#//cd;
    my @dict = DICT;
    my $outbits;
    while () {
        # Current code length
        my $bits = ceil log2 scalar @dict;
        my $dict_ind;
        my $len;
        # Find the longest dictionary entry matching the string head.
        for ($len = 1;; $len++) {
            my $found;
            for (my $ind = 0; $ind < @dict; $ind++) {
                ($dict_ind, $found) = ($ind, 1)
                    if substr($s, 0, $len) eq $dict[$ind];
            }
            last unless $found;
        }
        # Encode the string head as the found entry.
        $outbits .= sprintf "%0*b", $bits, $dict_ind;
        # Create a new dictionary entry from the matched head and the
        # next character and remove the matched head.
        push @dict, substr($s, 0, $len - 1, '') . substr($s, 0, 1);
        if (substr($s, 0, 1) eq '#') {
            $outbits .= sprintf("%0*b", $bits, 0);
            last;
        }
    }
    $outbits;
}

sub lzw_decompress ($s) {
    $s =~ tr/01//cd;
    my @dict = DICT;
    my $outtext;
    while () {
        # Current code length
        my $bits = ceil log2 scalar @dict;
        # Pick bits in the current length and convert to integer.
        my $code = oct "0b" . substr $s, 0, $bits, '';
        # Replace a question mark in the previous dictionary entry with
        # the start character of the selected entry.
        $dict[-1] =~ s/\?/substr $dict[$code], 0, 1/e;
        # Add an incomplete new entry to the dictionary. Its last
        # character is still unknown.
        push @dict, "$dict[$code]?";
        # Collect the dictionary entry in the result.
        $outtext .= $dict[$code];
        last if $dict[$code] eq '#';
    }
    $outtext;
}

### Examples and tests

sub run_tests {
    SKIP: {
        skip "examples" unless $examples;

        is lzw_compress("TOBEORNOTTOBEORTOBEORNOT#"),
        "101000111100010001010111110010001110001111010100011011011101011111100100011110100000100010000000", 'compression example from Wiki';
        is lzw_decompress("101000111100010001010111110010001110001111010100011011011101011111100100011110100000100010000000"),
        "TOBEORNOTTOBEORTOBEORNOT#", 'decompression example from Wiki';
    }

    SKIP: {
        skip "tests" unless $tests;
	}

    done_testing;
    exit;
}