1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
#!/usr/bin/python3
# Challenge 022
#
# Task #2
# Write a script to implement Lempel-Ziv-Welch (LZW) compression algorithm.
# The script should have method to encode/decode algorithm. The wiki page
# explains the compression algorithm very nicely.
import sys
class Dict():
EOM = '#'
SYMBOLS = ['A','B','C','D','E','F','G','H','I','J','K','L','M',
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
def __init__(self):
self.dict = {}
self.symbols = []
for sym in [Dict.EOM, *Dict.SYMBOLS]:
self.add(sym)
def __str__(self):
return "dict="+str(self.dict)+",symbols="+str(self.symbols)
def last(self):
return len(self.symbols)-1
def width(self):
return len("{:b}".format(self.last()))
def next_width(self):
return len("{:b}".format(self.last()+1))
def add(self, seq):
seq = seq.upper()
id = self.last()+1
self.dict[seq] = id
self.symbols.append(seq)
def longest_match(self, text):
text = text.upper()
# find longest match
match_len = 0
while match_len < len(text) and \
text[:match_len+1] in self.dict:
match_len += 1
w = text[:match_len]
text = text[match_len:]
code = self.dict[w]
old_width = self.width()
# store new prefix in the dictionary
if text != "":
next_prefix = w+text[0]
self.add(next_prefix)
# return code and new text
return ("{:0"+str(old_width)+"b}").format(code), text
def encode(text):
text = text.upper()+Dict.EOM
encoded = ""
dict = Dict()
while text != "":
code, text = dict.longest_match(text)
encoded += code
return encoded
def decode(encoded):
text = ""
dict = Dict()
while encoded != "":
width = dict.width()
code = int(encoded[:width], 2)
encoded = encoded[width:]
seq = dict.symbols[code]
text += seq
if encoded != "":
next_width = dict.next_width()
next_code = int(encoded[:next_width], 2)
next_seq = dict.symbols[next_code]
dict.add(seq + next_seq[0])
# remove end terminator
text = text[:-1]
return text
if sys.argv[1] == "encode":
print(encode(sys.argv[2]))
elif sys.argv[1] == "decode":
print(decode(sys.argv[2]))
else:
print("Usage: ch-2.py encode|decode text")
|