Grey Repo

Files

lzw.src encoder.src test.src example.txt

lzw.src

~~import_code("/home/me/projects/lzw/libs/lst.src")~~
~~import_code("/home/me/projects/lzw/encoder.src")~~
// require lst
Lzw = {}
~~generate_dict = function(length)~~
~~f = function(i)~~
~~return [char(i), char(i)]~~
~~end function~~
~~return range(length - 1).sort.map(@f).to_map~~
Lzw.generate_dict = function(length)
result = {}
for i in range(length - 1).sort
result[char(i)] = char(i)
end for
return result
end function
~~compress = function(uncompressed)~~
Lzw.compress = function(uncompressed)
dict_size = 256
~~dictionary = generate_dict(dict_size)~~
dictionary = self.generate_dict(dict_size)
w = ""
result = []
for c in uncompressed.values
wc = w + c
if dictionary.hasIndex(wc) then
w = wc
else
result.push(dictionary[w])
dictionary[wc] = dict_size
dict_size = dict_size + 1
w = c
end if
end for
if w != "" then result.push(dictionary[w])
return result
end function
~~decompress = function(compressed)~~
Lzw.decompress = function(compressed)
dict_size = 256
~~dictionary = generate_dict(dict_size)~~
dictionary = self.generate_dict(dict_size)
w = compressed[0]
result = compressed[0]
compressed = compressed[1:]
for k in compressed
if dictionary.hasIndex(k) then
entry = dictionary[k]
else if k == dict_size then
entry = w + w[0]
else
exit("Bad compressed k: " + k)
end if
result = result + entry
dictionary[dict_size] = w + entry[0]
dict_size = dict_size + 1
w = entry
end for
return result
~~end function~~
~~//compressed = compress("TOBEORNOTTOBEORTOBEORNOT")~~
~~user_in = user_input("in: ")~~
~~compressed = compress(user_in)~~
~~//print compressed~~
~~//print bin_to_lzw_table(string_to_stream(encode_lzw_table(compressed)))~~
~~//print(user_in.len)~~
~~//print(compressed.len)~~
~~//print "original"~~
~~//print user_in~~
~~//print "compressed"~~
~~//print encode_lzw_table(compressed)~~
~~print "original len: " + user_in.len~~
~~print "compressed len: " + encode_lzw_table(compressed).len~~
~~exit~~
~~f = function(c)~~
~~r = c~~
~~if c isa string then r = c.code~~
~~return "0" * (CELL_LEN - to_bin(r).len) + to_bin(r)~~
~~end function~~
~~compressed = compressed.map(@f)~~
~~print compressed~~
~~compressed = divide(compressed.join(""), 7)~~
~~print compressed~~
~~f = function(s)~~
~~return to_int(s)~~
~~end function~~
~~compressed = compressed.map(@f)~~
~~print compressed~~
~~//print char_set.len~~
~~//print to_bin(char_set.len)~~
~~f = function(n)~~
~~return char(char_set[n])~~
~~end function~~
~~compressed = compressed.map(@f)~~
~~print compressed.join("")~~
~~exit~~
~~decompressed = decompress(compressed)~~
~~print decompressed~~
end function

encoder.src

~~CELL_LEN = 14~~
// require lst
Encoder = {}
Encoder.CELL_LEN = 14
char_set = []
char_set.push(range(48, 57)) // numbers
char_set.push(range(65, 90)) // capital_letters
char_set.push(range(97, 122)) // letters
char_set.push(range(128, 193)) // extended
char_set = char_set.flat // <- 7 bit char set
Encoder.CHAR_SET = char_set
// UTILS ------------------------------------------------------
Encoder.Utils = {}
~~to_bin = function(n)~~
Encoder.Utils.to_bin = function(n)
if n == 0 then return "0"
if n == 1 then return "1"
~~return to_bin(floor(n / 2)) + str(n % 2)~~
return self.to_bin(floor(n / 2)) + str(n % 2)
end function
~~to_int = function(n)~~
Encoder.Utils.to_int = function(n)
n = n.values
n.reverse
r = 0
for i in n.indexes
if n[i] == "1" then
r = r + 2 ^ i
end if
end for
return r
end function
~~divide = function(s, n)~~
Encoder.Utils.divide = function(s, n)
if s.len == 0 then
return []
end if
if s.len < n then
//prepend = "0" * (n - s.len)
//s = prepend + s
return [s]
end if
~~return divide(s[n:], n).insert(0, s[0:n])~~
return self.divide(s[n:], n).insert(0, s[0:n])
end function
~~char_set = []~~
~~char_set.push(range(48, 57)) // numbers~~
~~char_set.push(range(65, 90)) // capital_letters~~
~~char_set.push(range(97, 122)) // letters~~
~~char_set.push(range(128, 192)) // extended~~
~~char_set.push("@".code)~~
~~char_set = char_set.flat // <- 7 bit char set~~
// -------------------------------------------------------------
~~lzw_table_to_bin = function(table)~~
~~f = function(c)~~
~~r = c~~
~~if c isa string then r = c.code~~
~~return "0" * (CELL_LEN - to_bin(r).len) + to_bin(r)~~
~~end function~~
~~return table.map(@f).join("")~~
~~end function~~
~~bin_to_lzw_table = function(stream)~~
~~f = function(s)~~
n = to_int(s)
if n < 256 then return char(n)
~~return n~~
~~return~~
~~end function~~
return divide(stream, CELL_LEN).map(@f)
~~end function~~
~~stream_to_string = function(bits_stream)~~
~~f = function(s)~~
~~return char(char_set[to_int(s)])~~
~~end function~~
~~return divide(bits_stream, 7).map(@f).join("")~~
~~end function~~
~~string_to_stream = function(string)~~
~~f = function(s)~~
bin = to_bin(char_set.indexOf(s.code))
~~return "0" * (7 - bin.len) + bin~~
~~end function~~
~~return string.values.map(@f).join("")~~
~~end function~~
Encoder.encode = function(lzw)
l = lzw[0:]
CELL_SIZE = 0
for i in l.indexes
if l[i] isa string then l[i] = l[i].code
l[i] = self.Utils.to_bin(l[i])
if l[i].len > CELL_SIZE then CELL_SIZE = l[i].len
end for
for i in l.indexes
l[i] = "0" * (CELL_SIZE - l[i].len) + l[i]
end for
~~encode_lzw_table = function(table)~~
return stream_to_string(lzw_table_to_bin(table))
l = l.join("")
FAT_ADDED = l.len % 7
if FAT_ADDED > 0 then FAT_ADDED = 7 - FAT_ADDED
l = l + "0" * FAT_ADDED
l = self.Utils.divide(l, 7)
fat_bin = self.Utils.to_bin(FAT_ADDED)
l.insert(0, "0" * (6 - fat_bin.len) + fat_bin)
cell_bin = self.Utils.to_bin(CELL_SIZE)
l.insert(0, "0" * (6 - cell_bin.len) + cell_bin)
l = self.Utils.divide(l.join(""), 7)
for i in l.indexes
l[i] = char(self.CHAR_SET[self.Utils.to_int(l[i])])
end for
return l.join("")
end function
~~decode_lzw_table = function(string)~~
Encoder.decode = function(string)
l = string.values
for i in l.indexes
l[i] = self.Utils.to_bin(self.CHAR_SET.indexOf(l[i].code))
l[i] = "0" * (7 - l[i].len) + l[i]
end for
l[-1] = l[-1][2:]
l = l.join("")
CELL_SIZE = self.Utils.to_int(l[0:6])
FAT_ADDED = self.Utils.to_int(l[6: 12])
l = self.Utils.divide(l[12:], 7)
l[-1] = l[-1][0:FAT_ADDED * -1]
l = self.Utils.divide(l.join(""), CELL_SIZE)
for i in l.indexes
l[i] = self.Utils.to_int(l[i])
if l[i] < 256 then l[i] = char(l[i])
end for
return l
end function

test.src

~~import_code("/home/me/projects/lzw/libs/lst.src")~~
~~import_code("/home/me/projects/lzw/lzw.src")~~
~~import_code("/home/me/projects/lzw/encoder.src")~~
~~input = user_input("in: ")~~
~~compressed = Lzw.compress(input)~~
~~//compressed = Lzw.compress("TOBEORNOTTOBEORTOBEORNOT")~~
~~print "compressed: " + compressed~~
~~e = Encoder.encode(compressed)~~
~~print "encoded: " + e~~
~~dd = Lzw.decompress(Encoder.decode(e))~~
~~print "decoded and decompressed: " + dd~~
~~print "input len: " + input.len~~
~~print "encoded len: " + e.len~~
~~print input~~
~~print dd~~
~~print input == dd~~

example.txt

~~ENCODE :~~
~~TOBEORNOTTOBEORTOBEORNOT~~
~~["T", "O", "B", "E", "O", "R", "N", "O", "T", 256, 258, 260, 265, 259, 261, 263]~~
~~[84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263]~~
~~CELL_SIZE = 9~~
~~["1010100", "1001111", "1000010", "1000101", "1001111", "1010010", "1001110", "1001111", "1010100", "100000000", "100000010", "100000100", "100001001", "100000011", "100000101", "100000111"]~~
["001010100", "001001111", "001000010", "001000101", "001001111", "001010010", "001001110", "001001111", "001010100", "100000000", "100000010", "100000100", "100001001", "100000011", "100000101", "100000111"]
~~FAT_ADDED = 3~~
["0010101", "0000100", "1111001", "0000100", "0100010", "1001001", "1110010", "1001000", "1001110", "0010011", "1100101", "0100100", "0000001", "0000001", "0100000", "1001000", "0100110", "0000011", "1000001", "0110000", "0111000"]
["001001", "000011", "0010101", "0000100", "1111001", "0000100", "0100010", "1001001", "1110010", "1001000", "1001110", "0010011", "1100101", "0100100", "0000001", "0000001", "0100000", "1001000", "0100110", "0000011", "1000001", "0110000", "0111000"]
["0010010", "0001100", "1010100", "0010011", "1100100", "0010001", "0001010", "0100111", "1001010", "0100010", "0111000", "1001111", "0010101", "0010000", "0000100", "0000101", "0000010", "0100001", "0011000", "0001110", "0000101", "1000001", "11000"]
~~[18, 12, 84, 19, 100, 17, 10, 39, 74, 34, 56, 79, 21, 16, 4, 5, 2, 33, 24, 14, 5, 65, 24]~~
~~[73, 67, 150, 74, 166, 72, 65, 100, 140, 89, 117, 145, 76, 71, 52, 53, 50, 88, 79, 69, 53, 131, 79]~~
~~["I", "C", "", "J", "¦", "H", "A", "d", "", "Y", "u", "", "L", "G", "4", "5", "2", "X", "O", "E", "5", "", "O"]~~
~~ICJ¦HAdYuLG452XOE5O~~