1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
#!/usr/bin/perl
use Modern::Perl;
# Write a script to implement Lempel Ziv Welch (LZW) compression algorithm.
# The script should have method to encode/decode algorithm.
my $str = shift || "TOBEORNOTTOBEORTOBEORNOT";
my $LZW = LZW->new();
my @e = $LZW->encode($str);
my $d = $LZW->decode(@e);
say "@e";
say "$d";
exit;
package LZW;
use Data::Dumper;
sub new {
return bless { DEBUG => 0 }, shift;
}
sub encode {
my $self = shift;
my @input = split("",shift);
my @output;
# dictionary to encode bytes into keys
my %dict = map { chr($_) => $_ } (0 .. 255);
my $key = '';
foreach my $byte (@input) {
# Add entries to our dictionary by building longer and longer keys,
# the side effect being compression. The more the key is used or
# the longer key gets ... the more compression.
my $next_key = $key . $byte;
if (exists $dict{$next_key}) {
$key = $next_key;
} else {
push @output, $dict{$key};
# Because we're 0 indexed $next_key equals number of keys
# eg. (0..255), scalar(keys %dict) == 256, next key is 256
$dict{$next_key} = scalar(keys %dict);
$key = $byte;
}
}
push @output, $dict{$key} if ($key);
say Dumper(\%dict) if ($self->{DEBUG});
return @output;
}
sub decode {
my $self = shift;
my @input = @_;
my @output;
# dictionary to decode from keys back to bytes
my %dict = map { $_ => chr($_) } (0 .. 255);
my $byte = '';
foreach my $key (@input) {
my $str;
# The first key (byte) could never be encoded,
# because we had nothing to compare it too.
$str = chr($key) if not ($byte);
my $next_key = scalar(keys %dict);
if (exists $dict{$key}) {
$str = $dict{$key};
} elsif ($key == $next_key) {
$str = $byte . substr($byte,0,1);
} else {
die "decoding failed.";
}
# Reconstruct dictionary by concatenating previous entries.
# If you data dump the dictionary, you will see that a
# key's last byte always equals the next key's first byte.
$dict{$next_key} = $byte . substr($str,0,1) if ($byte);
push @output, $str;
$byte = $str;
}
say Dumper(\%dict) if ($self->{DEBUG});
return join("",@output);
}
1;
__END__
This was a cool problem.
I didn't write the above from scratch...
I had to read lots of psuedo code and examples,
but now I understand how LZW compression works!
./ch-2.pl
84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263
TOBEORNOTTOBEORTOBEORNOT
./ch-2.pl "A"
65
A
./ch-2.pl "AAA"
65 256
AAA
./ch-2.pl "AAABBBCCC"
65 256 66 258 67 260
AAABBBCCC
|