Files
encoder.src
CELL_LEN = 14
- // require lst
- Encoder = {}
- Encoder.CELL_LEN = 14
- char_set = []
- char_set.push(range(48, 57)) // numbers
- char_set.push(range(65, 90)) // capital_letters
- char_set.push(range(97, 122)) // letters
- char_set.push(range(128, 193)) // extended
- char_set = char_set.flat // <- 7 bit char set
- Encoder.CHAR_SET = char_set
- // UTILS ------------------------------------------------------
- Encoder.Utils = {}
to_bin = function(n)
- Encoder.Utils.to_bin = function(n)
- if n == 0 then return "0"
- if n == 1 then return "1"
return to_bin(floor(n / 2)) + str(n % 2)
- return self.to_bin(floor(n / 2)) + str(n % 2)
- end function
to_int = function(n)
- Encoder.Utils.to_int = function(n)
- n = n.values
- n.reverse
- r = 0
- for i in n.indexes
- if n[i] == "1" then
- r = r + 2 ^ i
- end if
- end for
- return r
- end function
divide = function(s, n)
- Encoder.Utils.divide = function(s, n)
- if s.len == 0 then
- return []
- end if
- if s.len < n then
- //prepend = "0" * (n - s.len)
- //s = prepend + s
- return [s]
- end if
-
return divide(s[n:], n).insert(0, s[0:n])
- return self.divide(s[n:], n).insert(0, s[0:n])
- end function
char_set = []
char_set.push(range(48, 57)) // numbers
char_set.push(range(65, 90)) // capital_letters
char_set.push(range(97, 122)) // letters
char_set.push(range(128, 192)) // extended
char_set.push("@".code)
char_set = char_set.flat // <- 7 bit char set
- // -------------------------------------------------------------
lzw_table_to_bin = function(table)
f = function(c)
r = c
if c isa string then r = c.code
return "0" * (CELL_LEN - to_bin(r).len) + to_bin(r)
end function
return table.map(@f).join("")
end function
bin_to_lzw_table = function(stream)
f = function(s)
n = to_int(s)
if n < 256 then return char(n)
return n
return
end function
return divide(stream, CELL_LEN).map(@f)
end function
stream_to_string = function(bits_stream)
f = function(s)
return char(char_set[to_int(s)])
end function
return divide(bits_stream, 7).map(@f).join("")
end function
string_to_stream = function(string)
f = function(s)
bin = to_bin(char_set.indexOf(s.code))
return "0" * (7 - bin.len) + bin
end function
return string.values.map(@f).join("")
end function
- Encoder.encode = function(lzw)
- l = lzw[0:]
- CELL_SIZE = 0
- for i in l.indexes
- if l[i] isa string then l[i] = l[i].code
- l[i] = self.Utils.to_bin(l[i])
- if l[i].len > CELL_SIZE then CELL_SIZE = l[i].len
- end for
-
- for i in l.indexes
- l[i] = "0" * (CELL_SIZE - l[i].len) + l[i]
- end for
encode_lzw_table = function(table)
return stream_to_string(lzw_table_to_bin(table))
- l = l.join("")
- FAT_ADDED = l.len % 7
- if FAT_ADDED > 0 then FAT_ADDED = 7 - FAT_ADDED
-
- l = l + "0" * FAT_ADDED
-
- l = self.Utils.divide(l, 7)
-
- fat_bin = self.Utils.to_bin(FAT_ADDED)
- l.insert(0, "0" * (6 - fat_bin.len) + fat_bin)
-
- cell_bin = self.Utils.to_bin(CELL_SIZE)
- l.insert(0, "0" * (6 - cell_bin.len) + cell_bin)
-
- l = self.Utils.divide(l.join(""), 7)
-
- for i in l.indexes
- l[i] = char(self.CHAR_SET[self.Utils.to_int(l[i])])
- end for
- return l.join("")
- end function
decode_lzw_table = function(string)
- Encoder.decode = function(string)
- l = string.values
- for i in l.indexes
- l[i] = self.Utils.to_bin(self.CHAR_SET.indexOf(l[i].code))
- l[i] = "0" * (7 - l[i].len) + l[i]
- end for
- l[-1] = l[-1][2:]
- l = l.join("")
-
- CELL_SIZE = self.Utils.to_int(l[0:6])
- FAT_ADDED = self.Utils.to_int(l[6: 12])
-
- l = self.Utils.divide(l[12:], 7)
- l[-1] = l[-1][0:FAT_ADDED * -1]
- l = self.Utils.divide(l.join(""), CELL_SIZE)
-
- for i in l.indexes
- l[i] = self.Utils.to_int(l[i])
- if l[i] < 256 then l[i] = char(l[i])
- end for
-
- return l
- end function
test.src
import_code("/home/me/projects/lzw/libs/lst.src")
import_code("/home/me/projects/lzw/lzw.src")
import_code("/home/me/projects/lzw/encoder.src")
input = user_input("in: ")
compressed = Lzw.compress(input)
//compressed = Lzw.compress("TOBEORNOTTOBEORTOBEORNOT")
print "compressed: " + compressed
e = Encoder.encode(compressed)
print "encoded: " + e
dd = Lzw.decompress(Encoder.decode(e))
print "decoded and decompressed: " + dd
print "input len: " + input.len
print "encoded len: " + e.len
print input
print dd
print input == dd
example.txt
ENCODE :
TOBEORNOTTOBEORTOBEORNOT
["T", "O", "B", "E", "O", "R", "N", "O", "T", 256, 258, 260, 265, 259, 261, 263]
[84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263]
CELL_SIZE = 9
["1010100", "1001111", "1000010", "1000101", "1001111", "1010010", "1001110", "1001111", "1010100", "100000000", "100000010", "100000100", "100001001", "100000011", "100000101", "100000111"]
["001010100", "001001111", "001000010", "001000101", "001001111", "001010010", "001001110", "001001111", "001010100", "100000000", "100000010", "100000100", "100001001", "100000011", "100000101", "100000111"]
FAT_ADDED = 3
["0010101", "0000100", "1111001", "0000100", "0100010", "1001001", "1110010", "1001000", "1001110", "0010011", "1100101", "0100100", "0000001", "0000001", "0100000", "1001000", "0100110", "0000011", "1000001", "0110000", "0111000"]
["001001", "000011", "0010101", "0000100", "1111001", "0000100", "0100010", "1001001", "1110010", "1001000", "1001110", "0010011", "1100101", "0100100", "0000001", "0000001", "0100000", "1001000", "0100110", "0000011", "1000001", "0110000", "0111000"]
["0010010", "0001100", "1010100", "0010011", "1100100", "0010001", "0001010", "0100111", "1001010", "0100010", "0111000", "1001111", "0010101", "0010000", "0000100", "0000101", "0000010", "0100001", "0011000", "0001110", "0000101", "1000001", "11000"]
[18, 12, 84, 19, 100, 17, 10, 39, 74, 34, 56, 79, 21, 16, 4, 5, 2, 33, 24, 14, 5, 65, 24]
[73, 67, 150, 74, 166, 72, 65, 100, 140, 89, 117, 145, 76, 71, 52, 53, 50, 88, 79, 69, 53, 131, 79]
["I", "C", "", "J", "¦", "H", "A", "d", "", "Y", "u", "", "L", "G", "4", "5", "2", "X", "O", "E", "5", "", "O"]
ICJ¦HAdYuLG452XOE5O