Files

lzw.src
  • import_code("/home/me/projects/lzw/libs/lst.src")
  • import_code("/home/me/projects/lzw/encoder.src")
  • // require lst
  • Lzw = {}
  • generate_dict = function(length)
  • f = function(i)
  • return [char(i), char(i)]
  • end function
  • return range(length - 1).sort.map(@f).to_map
  • Lzw.generate_dict = function(length)
  • result = {}
  • for i in range(length - 1).sort
  • result[char(i)] = char(i)
  • end for
  • return result
  • end function
  • compress = function(uncompressed)
  • Lzw.compress = function(uncompressed)
  • dict_size = 256
  • dictionary = generate_dict(dict_size)
  • dictionary = self.generate_dict(dict_size)
  • w = ""
  • result = []
  • for c in uncompressed.values
  • wc = w + c
  • if dictionary.hasIndex(wc) then
  • w = wc
  • else
  • result.push(dictionary[w])
  • dictionary[wc] = dict_size
  • dict_size = dict_size + 1
  • w = c
  • end if
  • end for
  • if w != "" then result.push(dictionary[w])
  • return result
  • end function
  • decompress = function(compressed)
  • Lzw.decompress = function(compressed)
  • dict_size = 256
  • dictionary = generate_dict(dict_size)
  • dictionary = self.generate_dict(dict_size)
  • w = compressed[0]
  • result = compressed[0]
  • compressed = compressed[1:]
  • for k in compressed
  • if dictionary.hasIndex(k) then
  • entry = dictionary[k]
  • else if k == dict_size then
  • entry = w + w[0]
  • else
  • exit("Bad compressed k: " + k)
  • end if
  • result = result + entry
  • dictionary[dict_size] = w + entry[0]
  • dict_size = dict_size + 1
  • w = entry
  • end for
  • return result
  • end function
  • //compressed = compress("TOBEORNOTTOBEORTOBEORNOT")
  • user_in = user_input("in: ")
  • compressed = compress(user_in)
  • //print compressed
  • //print bin_to_lzw_table(string_to_stream(encode_lzw_table(compressed)))
  • //print(user_in.len)
  • //print(compressed.len)
  • //print "original"
  • //print user_in
  • //print "compressed"
  • //print encode_lzw_table(compressed)
  • print "original len: " + user_in.len
  • print "compressed len: " + encode_lzw_table(compressed).len
  • exit
  • f = function(c)
  • r = c
  • if c isa string then r = c.code
  • return "0" * (CELL_LEN - to_bin(r).len) + to_bin(r)
  • end function
  • compressed = compressed.map(@f)
  • print compressed
  • compressed = divide(compressed.join(""), 7)
  • print compressed
  • f = function(s)
  • return to_int(s)
  • end function
  • compressed = compressed.map(@f)
  • print compressed
  • //print char_set.len
  • //print to_bin(char_set.len)
  • f = function(n)
  • return char(char_set[n])
  • end function
  • compressed = compressed.map(@f)
  • print compressed.join("")
  • exit
  • decompressed = decompress(compressed)
  • print decompressed
  • end function
encoder.src
  • CELL_LEN = 14
  • // require lst
  • Encoder = {}
  • Encoder.CELL_LEN = 14
  • char_set = []
  • char_set.push(range(48, 57)) // numbers
  • char_set.push(range(65, 90)) // capital_letters
  • char_set.push(range(97, 122)) // letters
  • char_set.push(range(128, 193)) // extended
  • char_set = char_set.flat // <- 7 bit char set
  • Encoder.CHAR_SET = char_set
  • // UTILS ------------------------------------------------------
  • Encoder.Utils = {}
  • to_bin = function(n)
  • Encoder.Utils.to_bin = function(n)
  • if n == 0 then return "0"
  • if n == 1 then return "1"
  • return to_bin(floor(n / 2)) + str(n % 2)
  • return self.to_bin(floor(n / 2)) + str(n % 2)
  • end function
  • to_int = function(n)
  • Encoder.Utils.to_int = function(n)
  • n = n.values
  • n.reverse
  • r = 0
  • for i in n.indexes
  • if n[i] == "1" then
  • r = r + 2 ^ i
  • end if
  • end for
  • return r
  • end function
  • divide = function(s, n)
  • Encoder.Utils.divide = function(s, n)
  • if s.len == 0 then
  • return []
  • end if
  • if s.len < n then
  • //prepend = "0" * (n - s.len)
  • //s = prepend + s
  • return [s]
  • end if
  • return divide(s[n:], n).insert(0, s[0:n])
  • return self.divide(s[n:], n).insert(0, s[0:n])
  • end function
  • char_set = []
  • char_set.push(range(48, 57)) // numbers
  • char_set.push(range(65, 90)) // capital_letters
  • char_set.push(range(97, 122)) // letters
  • char_set.push(range(128, 192)) // extended
  • char_set.push("@".code)
  • char_set = char_set.flat // <- 7 bit char set
  • // -------------------------------------------------------------
  • lzw_table_to_bin = function(table)
  • f = function(c)
  • r = c
  • if c isa string then r = c.code
  • return "0" * (CELL_LEN - to_bin(r).len) + to_bin(r)
  • end function
  • return table.map(@f).join("")
  • end function
  • bin_to_lzw_table = function(stream)
  • f = function(s)
  • n = to_int(s)
  • if n < 256 then return char(n)
  • return n
  • return
  • end function
  • return divide(stream, CELL_LEN).map(@f)
  • end function
  • stream_to_string = function(bits_stream)
  • f = function(s)
  • return char(char_set[to_int(s)])
  • end function
  • return divide(bits_stream, 7).map(@f).join("")
  • end function
  • string_to_stream = function(string)
  • f = function(s)
  • bin = to_bin(char_set.indexOf(s.code))
  • return "0" * (7 - bin.len) + bin
  • end function
  • return string.values.map(@f).join("")
  • end function
  • Encoder.encode = function(lzw)
  • l = lzw[0:]
  • CELL_SIZE = 0
  • for i in l.indexes
  • if l[i] isa string then l[i] = l[i].code
  • l[i] = self.Utils.to_bin(l[i])
  • if l[i].len > CELL_SIZE then CELL_SIZE = l[i].len
  • end for
  • for i in l.indexes
  • l[i] = "0" * (CELL_SIZE - l[i].len) + l[i]
  • end for
  • encode_lzw_table = function(table)
  • return stream_to_string(lzw_table_to_bin(table))
  • l = l.join("")
  • FAT_ADDED = l.len % 7
  • if FAT_ADDED > 0 then FAT_ADDED = 7 - FAT_ADDED
  • l = l + "0" * FAT_ADDED
  • l = self.Utils.divide(l, 7)
  • fat_bin = self.Utils.to_bin(FAT_ADDED)
  • l.insert(0, "0" * (6 - fat_bin.len) + fat_bin)
  • cell_bin = self.Utils.to_bin(CELL_SIZE)
  • l.insert(0, "0" * (6 - cell_bin.len) + cell_bin)
  • l = self.Utils.divide(l.join(""), 7)
  • for i in l.indexes
  • l[i] = char(self.CHAR_SET[self.Utils.to_int(l[i])])
  • end for
  • return l.join("")
  • end function
  • decode_lzw_table = function(string)
  • Encoder.decode = function(string)
  • l = string.values
  • for i in l.indexes
  • l[i] = self.Utils.to_bin(self.CHAR_SET.indexOf(l[i].code))
  • l[i] = "0" * (7 - l[i].len) + l[i]
  • end for
  • l[-1] = l[-1][2:]
  • l = l.join("")
  • CELL_SIZE = self.Utils.to_int(l[0:6])
  • FAT_ADDED = self.Utils.to_int(l[6: 12])
  • l = self.Utils.divide(l[12:], 7)
  • l[-1] = l[-1][0:FAT_ADDED * -1]
  • l = self.Utils.divide(l.join(""), CELL_SIZE)
  • for i in l.indexes
  • l[i] = self.Utils.to_int(l[i])
  • if l[i] < 256 then l[i] = char(l[i])
  • end for
  • return l
  • end function
test.src
  • import_code("/home/me/projects/lzw/libs/lst.src")
  • import_code("/home/me/projects/lzw/lzw.src")
  • import_code("/home/me/projects/lzw/encoder.src")
  • input = user_input("in: ")
  • compressed = Lzw.compress(input)
  • //compressed = Lzw.compress("TOBEORNOTTOBEORTOBEORNOT")
  • print "compressed: " + compressed
  • e = Encoder.encode(compressed)
  • print "encoded: " + e
  • dd = Lzw.decompress(Encoder.decode(e))
  • print "decoded and decompressed: " + dd
  • print "input len: " + input.len
  • print "encoded len: " + e.len
  • print input
  • print dd
  • print input == dd
example.txt
  • ENCODE :
  • TOBEORNOTTOBEORTOBEORNOT
  • ["T", "O", "B", "E", "O", "R", "N", "O", "T", 256, 258, 260, 265, 259, 261, 263]
  • [84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263]
  • CELL_SIZE = 9
  • ["1010100", "1001111", "1000010", "1000101", "1001111", "1010010", "1001110", "1001111", "1010100", "100000000", "100000010", "100000100", "100001001", "100000011", "100000101", "100000111"]
  • ["001010100", "001001111", "001000010", "001000101", "001001111", "001010010", "001001110", "001001111", "001010100", "100000000", "100000010", "100000100", "100001001", "100000011", "100000101", "100000111"]
  • FAT_ADDED = 3
  • ["0010101", "0000100", "1111001", "0000100", "0100010", "1001001", "1110010", "1001000", "1001110", "0010011", "1100101", "0100100", "0000001", "0000001", "0100000", "1001000", "0100110", "0000011", "1000001", "0110000", "0111000"]
  • ["001001", "000011", "0010101", "0000100", "1111001", "0000100", "0100010", "1001001", "1110010", "1001000", "1001110", "0010011", "1100101", "0100100", "0000001", "0000001", "0100000", "1001000", "0100110", "0000011", "1000001", "0110000", "0111000"]
  • ["0010010", "0001100", "1010100", "0010011", "1100100", "0010001", "0001010", "0100111", "1001010", "0100010", "0111000", "1001111", "0010101", "0010000", "0000100", "0000101", "0000010", "0100001", "0011000", "0001110", "0000101", "1000001", "11000"]
  • [18, 12, 84, 19, 100, 17, 10, 39, 74, 34, 56, 79, 21, 16, 4, 5, 2, 33, 24, 14, 5, 65, 24]
  • [73, 67, 150, 74, 166, 72, 65, 100, 140, 89, 117, 145, 76, 71, 52, 53, 50, 88, 79, 69, 53, 131, 79]
  • ["I", "C", "–", "J", "¦", "H", "A", "d", "Œ", "Y", "u", "‘", "L", "G", "4", "5", "2", "X", "O", "E", "5", "ƒ", "O"]
  • IC–J¦HAdŒYu‘LG452XOE5ƒO