From 7a387ac2ff353b35a1540d73f417a00b32c49639 Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Sun, 7 Jun 2015 16:51:32 -0700 Subject: [PATCH 01/13] support both python2 and python3 --- README.md | 2 +- lzstring.py | 1357 ++++++++++++++++++++++++++------------------------- test.py | 100 ++-- 3 files changed, 741 insertions(+), 718 deletions(-) diff --git a/README.md b/README.md index a537054..fcbd151 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ lz-string-python ================ -lz-string for python 3 +lz-string for python 2/3 Based on the LZ-String javascript found here: http://pieroxy.net/blog/pages/lz-string/index.html diff --git a/lzstring.py b/lzstring.py index 18775de..1b191c5 100644 --- a/lzstring.py +++ b/lzstring.py @@ -1,672 +1,685 @@ -import math -import re - - -class LZString: - - def __init__(self): - self.keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" - - def compress(self, uncompressed): - - if uncompressed is None: - return '' - - value = 0 - context_dictionary = {} - context_dictionaryToCreate = {} - context_c = '' - context_wc = '' - context_w = '' - context_enlargeIn = 2 - - context_dictSize = 3 - context_numBits = 2 - context_data_string = '' - context_data_val = 0 - context_data_position = 0 - - uncompressed = uncompressed - - for ii in range(len(uncompressed)): - context_c = uncompressed[ii] - - if not context_c in context_dictionary: - context_dictionary[context_c] = context_dictSize - context_dictSize += 1 - context_dictionaryToCreate[context_c] = True - - context_wc = context_w + context_c - - if context_wc in context_dictionary: - context_w = context_wc - else: - if context_w in context_dictionaryToCreate: - if ord(context_w[0]) < 256: - for i in range(context_numBits): - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = ord(context_w[0]) - - for i in range(8): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - else: - value = 1 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | value - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = 0 - - value = ord(context_w[0]) - - for i in range(16): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionaryToCreate.pop(context_w, None) - #del context_dictionaryToCreate[context_w] - else: - value = context_dictionary[context_w] - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionary[context_wc] = context_dictSize - context_dictSize += 1 - context_w = context_c - if context_w != '': - if context_w in context_dictionaryToCreate: - if ord(context_w[0]) < 256: - for i in range(context_numBits): - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = ord(context_w[0]) - - for i in range(8): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - else: - value = 1 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | value - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = 0 - - value = ord(context_w[0]) - - for i in range(16): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionaryToCreate.pop(context_w, None) - #del context_dictionaryToCreate[context_w] - else: - value = context_dictionary[context_w] - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - value = 2 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - while True: - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_string += chr(context_data_val) - break - else: - context_data_position += 1 - - return context_data_string - - def compressToBase64(self, string): - if string is None: - return '' - - output = '' - - chr1 = float('NaN') - chr2 = float('NaN') - chr3 = float('NaN') - enc1 = 0 - enc2 = 0 - enc3 = 0 - enc4 = 0 - - i = 0 - - string = self.compress(string) - strlen = len(string) - - while i < (strlen * 2): - if (i % 2) == 0: - chr1 = ord(string[int(i / 2)]) >> 8 - chr2 = ord(string[int(i / 2)]) & 255 - - if (i / 2) + 1 < strlen: - chr3 = ord(string[int((i / 2) + 1)]) >> 8 - else: - chr3 = float('NaN') - else: - chr1 = ord(string[int((i - 1) / 2)]) & 255 - if (i + 1) / 2 < strlen: - chr2 = ord(string[int((i + 1) / 2)]) >> 8 - chr3 = ord(string[int((i + 1) / 2)]) & 255 - else: - chr2 = float('NaN') - chr3 = float('NaN') - - i += 3 - - # python dont support bit operation with NaN like javascript - enc1 = chr1 >> 2 - enc2 = ((chr1 & 3) << 4) | (chr2 >> 4 if not math.isnan(chr2) else 0) - enc3 = ((chr2 & 15 if not math.isnan(chr2) else 0) << 2) | (chr3 >> 6 if not math.isnan(chr3) else 0) - enc4 = (chr3 if not math.isnan(chr3) else 0) & 63 - - if math.isnan(chr2): - enc3 = 64 - enc4 = 64 - elif math.isnan(chr3): - enc4 = 64 - - output += self.keyStr[enc1] + self.keyStr[enc2] + self.keyStr[enc3] + self.keyStr[enc4] - - return output - - def compressToUTF16(self, string): - - if string is None: - return '' - - output = '' - c = 0 - current = 0 - status = 0 - - string = self.compress(string) - - for i in range(len(string)): - c = ord(string[i]) - - if status == 0: - status += 1 - output += chr(((c >> 1) + 32)) - current = (c & 1) << 14 - elif status == 1: - status += 1 - output += chr(((current + (c >> 2)) + 32)) - current = (c & 3) << 13 - elif status == 2: - status += 1 - output += chr(((current + (c >> 3)) + 32)) - current = (c & 7) << 12 - elif status == 3: - status += 1 - output += chr(((current + (c >> 4)) + 32)) - current = (c & 15) << 11 - elif status == 4: - status += 1 - output += chr(((current + (c >> 5)) + 32)) - current = (c & 31) << 10 - elif status == 5: - status += 1 - output += chr(((current + (c >> 6)) + 32)) - current = (c & 63) << 9 - elif status == 6: - status += 1 - output += chr(((current + (c >> 7)) + 32)) - current = (c & 127) << 8 - elif status == 7: - status += 1 - output += chr(((current + (c >> 8)) + 32)) - current = (c & 255) << 7 - elif status == 8: - status += 1 - output += chr(((current + (c >> 9)) + 32)) - current = (c & 511) << 6 - elif status == 9: - status += 1 - output += chr(((current + (c >> 10)) + 32)) - current = (c & 1023) << 5 - elif status == 10: - status += 1 - output += chr(((current + (c >> 11)) + 32)) - current = (c & 2047) << 4 - elif status == 11: - status += 1 - output += chr(((current + (c >> 12)) + 32)) - current = (c & 4095) << 3 - elif status == 12: - status += 1 - output += chr(((current + (c >> 13)) + 32)) - current = (c & 8191) << 2 - elif status == 13: - status += 1 - output += chr(((current + (c >> 14)) + 32)) - current = (c & 16383) << 1 - elif status == 14: - status += 1 - output += chr(((current + (c >> 15)) + 32)) - output += chr((c & 32767) + 32) - - status = 0 - - output += chr(current + 32) - - return output - - #written by https://github.com/v-python - def decompressFromUTF16(self, string): - if not string: - return "" - - output = "" - status = 0 - i = 0 - - while i < len(string): - c = ord(string[i]) - 32 - i += 1 - - if status == 0: - status = 1 - current = c << 1 - elif status == 1: - status = 2 - output += chr(current + (c >> 14)) - current = (c & 16383) << 2 - elif status == 2: - status = 3 - output += chr(current + (c >> 13)) - current = (c & 8191) << 3 - elif status == 3: - status = 4 - output += chr(current + (c >> 12)) - current = (c & 4095) << 4 - elif status == 4: - status = 5 - output += chr(current + (c >> 11)) - current = (c & 2047) << 5 - elif status == 5: - status = 6 - output += chr(current + (c >> 10)) - current = (c & 1023) << 6 - elif status == 6: - status = 7 - output += chr(current + (c >> 9)) - current = (c & 511) << 7 - elif status == 7: - status = 8 - output += chr(current + (c >> 8)) - current = (c & 255) << 8 - elif status == 8: - status = 9 - output += chr(current + (c >> 7)) - current = (c & 127) << 9 - elif status == 9: - status = 10 - output += chr(current + (c >> 6)) - current = (c & 63) << 10 - elif status == 10: - status = 11 - output += chr(current + (c >> 5)) - current = (c & 31) << 11 - elif status == 11: - status = 12 - output += chr(current + (c >> 4)) - current = (c & 15) << 12 - elif status == 12: - status = 13 - output += chr(current + (c >> 3)) - current = (c & 7) << 13 - elif status == 13: - status = 14 - output += chr(current + (c >> 2)) - current = (c & 3) << 14 - elif status == 14: - status = 15 - output += chr(current + (c >> 1)) - current = (c & 1) << 15 - elif status == 15: - status = 0 - output += chr(current + c) - current = (c & 1) << 15 - - return self.decompress(output) - - def decompress(self, compressed): - - if (compressed is None) or (compressed == ''): - return '' - - dictionary = {} - enlargeIn = 4 - dictSize = 4 - numBits = 3 - (entry, result, w, c) = ('', '', '', '') - (i, nnext, bits, resb, maxpower, power) = (0, 0, 0, 0, 0, 0) - - data_string = compressed - data_val = ord(compressed[0]) - data_position = 32768 - data_index = 1 - - for i in range(3): - #dictionary[i] = i - dictionary[i] = '' - - bits = 0 - maxpower = pow(2, 2) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - nnext = bits - if nnext == 0: - bits = 0 - maxpower = pow(2, 8) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = chr(bits) - elif nnext == 1: - bits = 0 - maxpower = pow(2, 16) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = chr(bits) - elif nnext == 2: - return '' - - dictionary[3] = c - result = c - w = result - - while True: - if data_index > len(data_string): - return '' - - bits = 0 - maxpower = pow(2, numBits) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = bits - - if c == 0: - bits = 0 - maxpower = pow(2, 8) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - dictionary[dictSize] = chr(bits) - dictSize += 1 - c = dictSize - 1 - enlargeIn -= 1 - elif c == 1: - bits = 0 - maxpower = pow(2, 16) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - dictionary[dictSize] = chr(bits) - dictSize += 1 - c = dictSize - 1 - enlargeIn -= 1 - elif c == 2: - return result - - if enlargeIn == 0: - enlargeIn = pow(2, numBits) - numBits += 1 - - if c in dictionary: - entry = dictionary[c] - else: - if c == dictSize: - entry = w + w[0] - else: - return None - - result += entry - - dictionary[dictSize] = w + entry[0] - dictSize += 1 - enlargeIn -= 1 - - w = entry - - if enlargeIn == 0: - enlargeIn = pow(2, numBits) - numBits += 1 - - def decompresFromBase64(self, iinput): - if iinput is None: - return '' - - output = "" - ol = 0 - output_ = '' - - i = 0 - - iinput = re.sub(r'[^A-Za-z0-9\+\/\=]', '', iinput) - - while i < len(iinput): - enc1 = self.keyStr.index(iinput[i]) - i += 1 - enc2 = self.keyStr.index(iinput[i]) - i += 1 - enc3 = self.keyStr.index(iinput[i]) - i += 1 - enc4 = self.keyStr.index(iinput[i]) - i += 1 - - chr1 = (enc1 << 2) | (enc2 >> 4) - chr2 = ((enc2 & 15) << 4) | (enc3 >> 2) - chr3 = ((enc3 & 3) << 6) | enc4 - - if (ol % 2) == 0: - output_ = chr1 << 8 - - if enc3 != 64: - output += chr(output_ | chr2) - - if enc4 != 64: - output_ = chr3 << 8 - else: - output = output + chr(output_ | chr1) - - if enc3 != 64: - output_ = chr2 << 8 - - if enc4 != 64: - output += chr(output_ | chr3) - - ol += 3 - - return self.decompress(output) + #!/usr/bin/python + # -*- coding: utf-8 -*- + +from __future__ import division +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import absolute_import +from builtins import range +from builtins import int +from builtins import chr +from future import standard_library +standard_library.install_aliases() +from builtins import object +import math +import re + + +class LZString(object): + + def __init__(self): + self.keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" + + def compress(self, uncompressed): + + if uncompressed is None: + return '' + + value = 0 + context_dictionary = {} + context_dictionaryToCreate = {} + context_c = '' + context_wc = '' + context_w = '' + context_enlargeIn = 2 + + context_dictSize = 3 + context_numBits = 2 + context_data_string = '' + context_data_val = 0 + context_data_position = 0 + + uncompressed = uncompressed + + for ii in range(len(uncompressed)): + context_c = uncompressed[ii] + + if not context_c in context_dictionary: + context_dictionary[context_c] = context_dictSize + context_dictSize += 1 + context_dictionaryToCreate[context_c] = True + + context_wc = context_w + context_c + + if context_wc in context_dictionary: + context_w = context_wc + else: + if context_w in context_dictionaryToCreate: + if ord(context_w[0]) < 256: + for i in range(context_numBits): + context_data_val = (context_data_val << 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = ord(context_w[0]) + + for i in range(8): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + else: + value = 1 + + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | value + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = 0 + + value = ord(context_w[0]) + + for i in range(16): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + + context_enlargeIn -= 1 + + if context_enlargeIn == 0: + context_enlargeIn = pow(2, context_numBits) + context_numBits += 1 + + context_dictionaryToCreate.pop(context_w, None) + #del context_dictionaryToCreate[context_w] + else: + value = context_dictionary[context_w] + + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + + context_enlargeIn -= 1 + + if context_enlargeIn == 0: + context_enlargeIn = pow(2, context_numBits) + context_numBits += 1 + + context_dictionary[context_wc] = context_dictSize + context_dictSize += 1 + context_w = context_c + if context_w != '': + if context_w in context_dictionaryToCreate: + if ord(context_w[0]) < 256: + for i in range(context_numBits): + context_data_val = (context_data_val << 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = ord(context_w[0]) + + for i in range(8): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + else: + value = 1 + + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | value + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = 0 + + value = ord(context_w[0]) + + for i in range(16): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + + context_enlargeIn -= 1 + + if context_enlargeIn == 0: + context_enlargeIn = pow(2, context_numBits) + context_numBits += 1 + + context_dictionaryToCreate.pop(context_w, None) + #del context_dictionaryToCreate[context_w] + else: + value = context_dictionary[context_w] + + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + + context_enlargeIn -= 1 + + if context_enlargeIn == 0: + context_enlargeIn = pow(2, context_numBits) + context_numBits += 1 + + value = 2 + + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + + if context_data_position == 15: + context_data_position = 0 + context_data_string += chr(context_data_val) + context_data_val = 0 + else: + context_data_position += 1 + + value = value >> 1 + + while True: + context_data_val = (context_data_val << 1) + + if context_data_position == 15: + context_data_string += chr(context_data_val) + break + else: + context_data_position += 1 + + return context_data_string + + def compressToBase64(self, string): + if string is None: + return '' + + output = '' + + chr1 = float('NaN') + chr2 = float('NaN') + chr3 = float('NaN') + enc1 = 0 + enc2 = 0 + enc3 = 0 + enc4 = 0 + + i = 0 + + string = self.compress(string) + strlen = len(string) + + while i < (strlen * 2): + if (i % 2) == 0: + chr1 = ord(string[int(i / 2)]) >> 8 + chr2 = ord(string[int(i / 2)]) & 255 + + if (i / 2) + 1 < strlen: + chr3 = ord(string[int((i / 2) + 1)]) >> 8 + else: + chr3 = float('NaN') + else: + chr1 = ord(string[int((i - 1) / 2)]) & 255 + if (i + 1) / 2 < strlen: + chr2 = ord(string[int((i + 1) / 2)]) >> 8 + chr3 = ord(string[int((i + 1) / 2)]) & 255 + else: + chr2 = float('NaN') + chr3 = float('NaN') + + i += 3 + + # python dont support bit operation with NaN like javascript + enc1 = chr1 >> 2 + enc2 = ((chr1 & 3) << 4) | (chr2 >> 4 if not math.isnan(chr2) else 0) + enc3 = ((chr2 & 15 if not math.isnan(chr2) else 0) << 2) | (chr3 >> 6 if not math.isnan(chr3) else 0) + enc4 = (chr3 if not math.isnan(chr3) else 0) & 63 + + if math.isnan(chr2): + enc3 = 64 + enc4 = 64 + elif math.isnan(chr3): + enc4 = 64 + + output += self.keyStr[enc1] + self.keyStr[enc2] + self.keyStr[enc3] + self.keyStr[enc4] + + return output + + def compressToUTF16(self, string): + + if string is None: + return '' + + output = '' + c = 0 + current = 0 + status = 0 + + string = self.compress(string) + + for i in range(len(string)): + c = ord(string[i]) + + if status == 0: + status += 1 + output += chr(((c >> 1) + 32)) + current = (c & 1) << 14 + elif status == 1: + status += 1 + output += chr(((current + (c >> 2)) + 32)) + current = (c & 3) << 13 + elif status == 2: + status += 1 + output += chr(((current + (c >> 3)) + 32)) + current = (c & 7) << 12 + elif status == 3: + status += 1 + output += chr(((current + (c >> 4)) + 32)) + current = (c & 15) << 11 + elif status == 4: + status += 1 + output += chr(((current + (c >> 5)) + 32)) + current = (c & 31) << 10 + elif status == 5: + status += 1 + output += chr(((current + (c >> 6)) + 32)) + current = (c & 63) << 9 + elif status == 6: + status += 1 + output += chr(((current + (c >> 7)) + 32)) + current = (c & 127) << 8 + elif status == 7: + status += 1 + output += chr(((current + (c >> 8)) + 32)) + current = (c & 255) << 7 + elif status == 8: + status += 1 + output += chr(((current + (c >> 9)) + 32)) + current = (c & 511) << 6 + elif status == 9: + status += 1 + output += chr(((current + (c >> 10)) + 32)) + current = (c & 1023) << 5 + elif status == 10: + status += 1 + output += chr(((current + (c >> 11)) + 32)) + current = (c & 2047) << 4 + elif status == 11: + status += 1 + output += chr(((current + (c >> 12)) + 32)) + current = (c & 4095) << 3 + elif status == 12: + status += 1 + output += chr(((current + (c >> 13)) + 32)) + current = (c & 8191) << 2 + elif status == 13: + status += 1 + output += chr(((current + (c >> 14)) + 32)) + current = (c & 16383) << 1 + elif status == 14: + status += 1 + output += chr(((current + (c >> 15)) + 32)) + output += chr((c & 32767) + 32) + + status = 0 + + output += chr(current + 32) + + return output + + #written by https://github.com/v-python + def decompressFromUTF16(self, string): + if not string: + return "" + + output = "" + status = 0 + i = 0 + + while i < len(string): + c = ord(string[i]) - 32 + i += 1 + + if status == 0: + status = 1 + current = c << 1 + elif status == 1: + status = 2 + output += chr(current + (c >> 14)) + current = (c & 16383) << 2 + elif status == 2: + status = 3 + output += chr(current + (c >> 13)) + current = (c & 8191) << 3 + elif status == 3: + status = 4 + output += chr(current + (c >> 12)) + current = (c & 4095) << 4 + elif status == 4: + status = 5 + output += chr(current + (c >> 11)) + current = (c & 2047) << 5 + elif status == 5: + status = 6 + output += chr(current + (c >> 10)) + current = (c & 1023) << 6 + elif status == 6: + status = 7 + output += chr(current + (c >> 9)) + current = (c & 511) << 7 + elif status == 7: + status = 8 + output += chr(current + (c >> 8)) + current = (c & 255) << 8 + elif status == 8: + status = 9 + output += chr(current + (c >> 7)) + current = (c & 127) << 9 + elif status == 9: + status = 10 + output += chr(current + (c >> 6)) + current = (c & 63) << 10 + elif status == 10: + status = 11 + output += chr(current + (c >> 5)) + current = (c & 31) << 11 + elif status == 11: + status = 12 + output += chr(current + (c >> 4)) + current = (c & 15) << 12 + elif status == 12: + status = 13 + output += chr(current + (c >> 3)) + current = (c & 7) << 13 + elif status == 13: + status = 14 + output += chr(current + (c >> 2)) + current = (c & 3) << 14 + elif status == 14: + status = 15 + output += chr(current + (c >> 1)) + current = (c & 1) << 15 + elif status == 15: + status = 0 + output += chr(current + c) + current = (c & 1) << 15 + + return self.decompress(output) + + def decompress(self, compressed): + + if (compressed is None) or (compressed == ''): + return '' + + dictionary = {} + enlargeIn = 4 + dictSize = 4 + numBits = 3 + (entry, result, w, c) = ('', '', '', '') + (i, nnext, bits, resb, maxpower, power) = (0, 0, 0, 0, 0, 0) + + data_string = compressed + data_val = ord(compressed[0]) + data_position = 32768 + data_index = 1 + + for i in range(3): + #dictionary[i] = i + dictionary[i] = '' + + bits = 0 + maxpower = pow(2, 2) + power = 1 + + while power != maxpower: + resb = data_val & data_position + data_position >>= 1 + + if data_position == 0: + data_position = 32768 + data_val = ord(data_string[data_index]) + data_index += 1 + + bits |= (1 if resb > 0 else 0) * power + power <<= 1 + + nnext = bits + if nnext == 0: + bits = 0 + maxpower = pow(2, 8) + power = 1 + + while power != maxpower: + resb = data_val & data_position + data_position >>= 1 + + if data_position == 0: + data_position = 32768 + data_val = ord(data_string[data_index]) + data_index += 1 + + bits |= (1 if resb > 0 else 0) * power + power <<= 1 + + c = chr(bits) + elif nnext == 1: + bits = 0 + maxpower = pow(2, 16) + power = 1 + + while power != maxpower: + resb = data_val & data_position + data_position >>= 1 + + if data_position == 0: + data_position = 32768 + data_val = ord(data_string[data_index]) + data_index += 1 + + bits |= (1 if resb > 0 else 0) * power + power <<= 1 + + c = chr(bits) + elif nnext == 2: + return '' + + dictionary[3] = c + result = c + w = result + + while True: + if data_index > len(data_string): + return '' + + bits = 0 + maxpower = pow(2, numBits) + power = 1 + + while power != maxpower: + resb = data_val & data_position + data_position >>= 1 + + if data_position == 0: + data_position = 32768 + data_val = ord(data_string[data_index]) + data_index += 1 + + bits |= (1 if resb > 0 else 0) * power + power <<= 1 + + c = bits + + if c == 0: + bits = 0 + maxpower = pow(2, 8) + power = 1 + + while power != maxpower: + resb = data_val & data_position + data_position >>= 1 + + if data_position == 0: + data_position = 32768 + data_val = ord(data_string[data_index]) + data_index += 1 + + bits |= (1 if resb > 0 else 0) * power + power <<= 1 + + dictionary[dictSize] = chr(bits) + dictSize += 1 + c = dictSize - 1 + enlargeIn -= 1 + elif c == 1: + bits = 0 + maxpower = pow(2, 16) + power = 1 + + while power != maxpower: + resb = data_val & data_position + data_position >>= 1 + + if data_position == 0: + data_position = 32768 + data_val = ord(data_string[data_index]) + data_index += 1 + + bits |= (1 if resb > 0 else 0) * power + power <<= 1 + + dictionary[dictSize] = chr(bits) + dictSize += 1 + c = dictSize - 1 + enlargeIn -= 1 + elif c == 2: + return result + + if enlargeIn == 0: + enlargeIn = pow(2, numBits) + numBits += 1 + + if c in dictionary: + entry = dictionary[c] + else: + if c == dictSize: + entry = w + w[0] + else: + return None + + result += entry + + dictionary[dictSize] = w + entry[0] + dictSize += 1 + enlargeIn -= 1 + + w = entry + + if enlargeIn == 0: + enlargeIn = pow(2, numBits) + numBits += 1 + + def decompresFromBase64(self, iinput): + if iinput is None: + return '' + + output = "" + ol = 0 + output_ = '' + + i = 0 + + iinput = re.sub(r'[^A-Za-z0-9\+\/\=]', '', iinput) + + while i < len(iinput): + enc1 = self.keyStr.index(iinput[i]) + i += 1 + enc2 = self.keyStr.index(iinput[i]) + i += 1 + enc3 = self.keyStr.index(iinput[i]) + i += 1 + enc4 = self.keyStr.index(iinput[i]) + i += 1 + + chr1 = (enc1 << 2) | (enc2 >> 4) + chr2 = ((enc2 & 15) << 4) | (enc3 >> 2) + chr3 = ((enc3 & 3) << 6) | enc4 + + if (ol % 2) == 0: + output_ = chr1 << 8 + + if enc3 != 64: + output += chr(output_ | chr2) + + if enc4 != 64: + output_ = chr3 << 8 + else: + output = output + chr(output_ | chr1) + + if enc3 != 64: + output_ = chr2 << 8 + + if enc4 != 64: + output += chr(output_ | chr3) + + ol += 3 + + return self.decompress(output) diff --git a/test.py b/test.py index 57ca491..b558767 100644 --- a/test.py +++ b/test.py @@ -1,45 +1,55 @@ -import json -import lzstring -import pprint - - -if __name__ == '__main__': - x = lzstring.LZString() - - s = 'Žluťoučký kůň úpěl ďábelské ódy!' - - # generated with original js lib - jsLzStringBase64 = 'r6ABsK6KaAD2aLCADWBfgBPQ9oCAlAZAvgDobEARlB4QAEOAjAUxAGd4BL5AZ4BMBPAQiAAA' - jsLzStringBase64Json = 'N4Ig5gNg9gzjCGAnAniAXKALgS0xApuiPgB7wC2ADgQASSwIogA0IA4tHACLYBu6WXASIBlFu04wAMthiYBEhgFEAdpiYYQASS6i2AWSniRURJgCCMPYfEcGAFXyJyozPBUATJB5pt8Kp3gIbAAvfB99JABrAFdKGil3MBj4MEJWcwBjRCgVZBc0EBEDIwyAIzLEfH5CrREAeRoADiaAdgBONABGdqaANltJLnwAMwVKJHgicxpyfDcAWnJouJoIJJS05hoYmHCaTCgabPx4THxZlfj1lWTU/BgaGBjMgAsaeEeuKEyAISgoFEAHSDBgifD4cwQGBQdAAbXYNlYAA0bABdAC+rDscHBhEKy0QsUoIAxZLJQAAA==' - - print('String for encode: ' + s) - print() - - print('Compress to base64:') - base2 = x.compressToBase64(s) - print('result: ' + base2) - print('result js: ' + jsLzStringBase64) - print('equals: ' + str(base2 == jsLzStringBase64)) - - print() - - print('Decompress from base64:') - print('result: ' + x.decompresFromBase64(base2)) - print('result from js: ' + x.decompresFromBase64(jsLzStringBase64)) - - print() - - jsonString = '{"glossary":{"title":"example glossary","GlossDiv":{"title":"S","GlossList":{"GlossEntry":{"ID":"SGML","SortAs":"SGML","GlossTerm":"Standard Generalized Markup Language","Acronym":"SGML","Abbrev":"ISO 8879:1986","GlossDef":{"para":"A meta-markup language, used to create markup languages such as DocBook.","GlossSeeAlso":["GML","XML"]},"GlossSee":"markup"}}}}}' - - print('Compress json to base64:') - jresult = x.compressToBase64(jsonString) - print('result: ' + jresult) - print() - print('result js: ' + jsLzStringBase64Json) - print() - print('equals: ' + str(jresult == jsLzStringBase64Json)) - - print() - - print('Decompress json from base64:') - pprint.pprint(json.loads(x.decompresFromBase64(jsLzStringBase64Json))) + #!/usr/bin/python + # -*- coding: utf-8 -*- + +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import str +from future import standard_library +standard_library.install_aliases() +import json +import lzstring +import pprint + + +if __name__ == '__main__': + x = lzstring.LZString() + + s = 'Žluťoučký kůň úpěl ďábelské ódy!' + + # generated with original js lib + jsLzStringBase64 = 'r6ABsK6KaAD2aLCADWBfgBPQ9oCAlAZAvgDobEARlB4QAEOAjAUxAGd4BL5AZ4BMBPAQiAAA' + jsLzStringBase64Json = 'N4Ig5gNg9gzjCGAnAniAXKALgS0xApuiPgB7wC2ADgQASSwIogA0IA4tHACLYBu6WXASIBlFu04wAMthiYBEhgFEAdpiYYQASS6i2AWSniRURJgCCMPYfEcGAFXyJyozPBUATJB5pt8Kp3gIbAAvfB99JABrAFdKGil3MBj4MEJWcwBjRCgVZBc0EBEDIwyAIzLEfH5CrREAeRoADiaAdgBONABGdqaANltJLnwAMwVKJHgicxpyfDcAWnJouJoIJJS05hoYmHCaTCgabPx4THxZlfj1lWTU/BgaGBjMgAsaeEeuKEyAISgoFEAHSDBgifD4cwQGBQdAAbXYNlYAA0bABdAC+rDscHBhEKy0QsUoIAxZLJQAAA==' + + print('String for encode: ' + s) + print() + + print('Compress to base64:') + base2 = x.compressToBase64(s) + print('result: ' + base2) + print('result js: ' + jsLzStringBase64) + print('equals: ' + str(base2 == jsLzStringBase64)) + + print() + + print('Decompress from base64:') + print('result: ' + x.decompresFromBase64(base2)) + print('result from js: ' + x.decompresFromBase64(jsLzStringBase64)) + + print() + + jsonString = '{"glossary":{"title":"example glossary","GlossDiv":{"title":"S","GlossList":{"GlossEntry":{"ID":"SGML","SortAs":"SGML","GlossTerm":"Standard Generalized Markup Language","Acronym":"SGML","Abbrev":"ISO 8879:1986","GlossDef":{"para":"A meta-markup language, used to create markup languages such as DocBook.","GlossSeeAlso":["GML","XML"]},"GlossSee":"markup"}}}}}' + + print('Compress json to base64:') + jresult = x.compressToBase64(jsonString) + print('result: ' + jresult) + print() + print('result js: ' + jsLzStringBase64Json) + print() + print('equals: ' + str(jresult == jsLzStringBase64Json)) + + print() + + print('Decompress json from base64:') + pprint.pprint(json.loads(x.decompresFromBase64(jsLzStringBase64Json))) From 46c3b9f54d9b9f5afaa578b3825c06b02e0f614f Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Sun, 7 Jun 2015 17:07:14 -0700 Subject: [PATCH 02/13] upload to pip --- README.md => README.rst | 0 lzstring.py => lzstring/__init__.py | 0 lzstring/__init__.pyc | Bin 0 -> 10978 bytes setup.cfg | 2 ++ setup.py | 34 ++++++++++++++++++++++++++++ 5 files changed, 36 insertions(+) rename README.md => README.rst (100%) rename lzstring.py => lzstring/__init__.py (100%) create mode 100644 lzstring/__init__.pyc create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/README.md b/README.rst similarity index 100% rename from README.md rename to README.rst diff --git a/lzstring.py b/lzstring/__init__.py similarity index 100% rename from lzstring.py rename to lzstring/__init__.py diff --git a/lzstring/__init__.pyc b/lzstring/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9df4201f3a4ec63f82c08f47ceccd33a5af7e58 GIT binary patch literal 10978 zcmcgyYm8e*6`p&qef?Ok?R_M>X}3+2Qo0b*=GANqO?I2^vQHW<)UhSau5@8 zILk=`i5EtabFMfQr|9aMBxjKBDcv&}>E$1-kVmVvBrdw>Z6c;b& zm%KTV^-Ox+g9sOwP%l56#VmK~L1glZ8-)GAq_H#DHD)Ys7c&8svm_H#xqxJBl?zHH zq;j@o!YVaC83L~*a^}B@|C1bnO9z%Ur83AUEGC=gswa@1m_2#wfzuB@^zb8(p84$D zV~>9>HGlSr&o6x8$#iycDVM)+@se|Sxv=6ES4-uJSG{s|?b`JN2M--Sa?jDJd+*zK z_p#P=m-E*!M_mqpbtPZ8Fo{nkNo5qd!u5)*!rhsS<2qg@bJyxMS)N9johp=08B6@& z0&_QH9(yponk>_DSe28U(>zva-K1iyTasd_47HARLW)H~iWRcddYCJ>9+7-h>Ib;G z>oG|O$#>mQYo{#FP@c)+4f#8D{(!X}XSo(vyTG`HKT9o#RQUx}i!hIx`;o(jOkn^I z6#=PpA#I4cF-TIwAINM~{uA?%s#KEZfK>j~^gskrO|=UHV^5c%g1<4=<`o30MJdgx z^AD)?gsMU0pgIo8YdAb1wQQ>Ll)<@R76ZzId@**PT5rs-^uBbZe1i&wg|@M+}FvC1D^L!>#_C$+Xovq`%SHF zFm}X1e>S=&nCE!8$QCE~?AFKhP4J|-P=lHU)xFR*=1kd}S4|I!2(7>-LO?V@|F{=X zMGXn4%TN;RJm3zP?3P8PLeMoVlZf5e{SVxiW7d|J$|v@Fn~VKbtFPEv^KHN8L({cd zFK{}BTDDZ#wRr@zj{hOprgwE6|6X8=I(~DY-|R|nT09PQ9e>m6R)Ov5_}BZ6)~0s8 zwLBg8P&)pvtq)e$@s|SK6WpGTUk>(L&R!F|Gq|aa|18*7S^kGQzQ_JBV!yt**sk4E z?2>EabkcHc`xw6Rp+tPfURXO$>rtKvZ3gj)P$#%QUV6j!>HHh^0(#6(Mqw`Jsz|iE z?b&iQFBQddE=9*i3aiDd z_0QEwa!C!aC58k}R&iyuoUc^!IhZ4*u4D_~FpS(%(e?6cUMA;=9|m{HyJ^$vv&EC; zeAdf*5nuC?$H_H#TwQ8wyy^}48uM-;TfUfo*!3t|gM`D*JJ<6b7obt;R##>nui}mP z>T+2xo6){XI|J5ZI^K@X&a1_WL)Q|{1D4f+xxvR)-JiyKje)3gfTU*32$;6v`)!p> z+uUVD(IblAL6f-%{&yN9QfnJYl;dX795IGbugg($2y`Px+*cFr>Jt}k32+^;q8;?D zHjW!LV)*^2#!QOr{@KLMh|D&Lb&8wfEK>KH&f>4I4_(e8oIF!J0h68pTt<2YIv?=m z;XdNvi`8zqkEm(5kMN`~H+)2RjW`C{UxdfCcG85s#zBO;IZ7Yy=PaYJ6D;Yf-DJ=T zf(U+^G~5hH#ODjDX-d?i)mAg>XwoEb`^GQ^`44pQW22vEP?dfn{+*s^P`S^yj}lVj z5U?o+M-q31jMz{g>D%oi)f@dIVGaG#cl01%3_k8#==MeD3#%Ep zZpVSubd>_!y2{@TWKcWpqJyPou}^L7A(QPB9|r-HvU9 ztil_2(HT1SzTwNuW^Pa}6-Xm8C*3N<byEb~NggZ>?_^wuMY-Wtz16BGbhr~AY-A2K zGKVFTcb9Y%GP((wX48?>9*$k-44bnR(A1!=jm+8NY_^g=dgOh|b_R(8FabOPbo{MD zD9Q;V21rX9w;H#aqsAR(!rW_&8hepLKQn4YjeW+9u^attBVr@~qDM8b@LEmz-yXgz zc?+ik+(3&|lyVvx%~MW|(KR!0evf11(j#>eqRm^~MMPM5y;$t%X0a120DP@Y-61Sq>cwKD zo5dJd4Eb4%2#fc6u}F5a7zc~EpG8tw9J6}p#b7s!U0^ZbXE7)&p67Y(I;zu&@I?CIW-pxv3LiVfk4Eg+=4Y`A(@mBe4z^%@ceJ7`2X{ zMhl!={1NsG4h^)P0K@B|V1gxjK z_ZA;0X)6NSX|&WhErTe_6dgpNC1SeNpSmr9_X8_a+AYm^jY0Dd`B=hcXd6rjJT0I+ ztwO`7ph$~AqtYbR+Wt4XWSyi$Kp!bQFEJ)fQhnBT8^D21QXUc?=^#i67EMxZ0n!Fg zsFO6&PMQQMp{hx$k^gxE7}!ZV+)g?IQUYa@R3rYYMsBo=lu+#xm@qx+Cmod)9x!ub zU8G~}q{P-SKk1l|o;G1RIt3nWCncuRzBC2a#^D(=H{3-^3+EGiV@^Tt5cni-3-T~cp z3qbsw+yt@Zp(U4Zyi05b;Hkr)6`)QXWbP1I;s1leJ$zt za|ir$K5y8@pm_2cy6rlI|NlQnjU5`c8JFUk$_-!wiwy|QDTO#xWnA(=XMpO206DH{ z@XGx_4xa%i4cGV_AX%b6hy(CBVL+WjF{1D(8tK=E1h9t* z((5B??N-7z_(G>nollYn><7Rcm+1RTǡe~fnxpg8^*dO&;?vSM5JWo3IE&^%C}-4-obj+ZGo1-bdJi4kbxE zjraWDi_{uHUNGCFMr^5<)Za1tFLBA-?h+$F)ko^RrUVC?Qe)fj9;L5hFPn5V7`PPe zVc-%^CkWC&A=ge=a6HRnHue_ISS?BA;*P#6ME>c*S?D?4=dAwIeahPIbgRVqo*Fe| ziAG^cI8a(?3jx2da720v`{_V`VFv@-6?Si+pRh0M$dv32Pip`XG&pZONjNT&8;7Rp zEBd{#ZSFfVFRgQ@f^GBaqjST-{_}c!+w*#_@4UXH15(?V7qk-lrM2>G5E?koN$dTq z1vPNh)l5H#aEdPlY3|+z$YCRji(!G6=-8C&HM6H&rPoaSEpQZhFRPn)+EyKXK?c#6r`wFZ*k!(D zi4B&ZO-CZPD;N~g_DHmGrlX*r@;GFQu@H&Xq-f&hJ~l}srl0Y27^NAbVbK?nq2JJo zpHRkT+=Mo;5ig+!f@>Q>ICMS`?x_|$xz%0O4#6JO%1ff_BIF}4fs2f@P(JitlrMg) zWY-W#%9o|1&e5sy$6rK%M%;zdl*VSfMTBRwok6nPvc zSi2y-0z^XKhS2%IGbj8UJagXQOI!1qlh@YY8gzmY2gY3$J%ied(3j4Doa@cnaZKSg z^=`+}?As2UXtm2%rj7#vaJHvs;V&`E8!m1KL<-y+X6cq7TMsw@Sme7Xm|YO@8~q@g zHyEbSkDTm_%PG95hYg?zZ6-i`K+V?g9~!IbU0yU8)~&yezG%3Q%hg+xM1^FGZ@EpD z-!y2ob^lIA&9P!_V?j{HPb^iM(YFpPpagx|APM`dstm?{@k=N6r?dO7AJ~82xqavE zK6mWA?;b*R(7^<`hsgax;A#d95;4{h1zuXpIy_vdF6xIL9!WXSawIhj@zBYa_*#Jk z$8ixo(62#cQ!L1HP*H*k8GW_bhx*i~nCxeA7ZaZG)F+sH63G@$yo7d}cMe`Tbj09k zj9Qrik9x#Fj-_qxz`bK9j)?CcM`kC2sW?_RG-LQr;z36rsWHHWL|E!yWOK7C`AkMG zP?^k1F;^|1nBv7vzmFYY?g*2kOt>zoIV7!lNUrDiqB3tdet(AMlY4ZnN}$-a*Eq2qApgQg6Fsu3HYao<~q2~t=7aT!o9@G(ap;i v3Yl!dL9C^c3bXgem^{RUB5hTE8T)>{FvqE7xIy8IegIwnZxkg&Wvu@K$wet& literal 0 HcmV?d00001 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3480374 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal=1 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e3829a0 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from setuptools import setup + +import io + +long_description = io.open('README.rst', encoding='utf8').read() + +setup( + name='lzstring', + version='1.0.0', + description='lz-string for python', + author='Geza Kovacs', + author_email='geza0kovacs@gmail.com', + packages=['lzstring'], + package_dir={'lzstring': 'lzstring'}, + package_data={}, + long_description=long_description, + url='https://github.com/gkovacs/lz-string-python', + download_url='https://github.com/gkovacs/lz-string-python', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.1', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + ], + install_requires=['future'], +) \ No newline at end of file From 39713c36bc2266fb15687ad79b9a52bf7fd44af1 Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Sun, 7 Jun 2015 17:21:27 -0700 Subject: [PATCH 03/13] fix typo in method name, and update README.rst --- .gitignore | 5 +++++ README.rst | 15 +++++++++++++++ lzstring/__init__.py | 2 +- lzstring/__init__.pyc | Bin 10978 -> 0 bytes setup.py | 2 +- test.py | 6 +++--- 6 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 .gitignore delete mode 100644 lzstring/__init__.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2528606 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.pyc +__pycache__ +/lzstring.egg-info +/build/ +/dist/ diff --git a/README.rst b/README.rst index fcbd151..0314655 100644 --- a/README.rst +++ b/README.rst @@ -4,3 +4,18 @@ lz-string-python lz-string for python 2/3 Based on the LZ-String javascript found here: http://pieroxy.net/blog/pages/lz-string/index.html + +Example +------- +:: + + >>> import lzstring + >>> x = lzstring.LZString() + >>> compressed = x.compressToBase64(u"你好") # u'gbyl9NIA' + >>> x.decompressFromBase64(compressed) # u'\u4f60\u597d' + +Installation +------------ +:: + + $ pip install lzstring diff --git a/lzstring/__init__.py b/lzstring/__init__.py index 1b191c5..8a9a891 100644 --- a/lzstring/__init__.py +++ b/lzstring/__init__.py @@ -637,7 +637,7 @@ def decompress(self, compressed): enlargeIn = pow(2, numBits) numBits += 1 - def decompresFromBase64(self, iinput): + def decompressFromBase64(self, iinput): if iinput is None: return '' diff --git a/lzstring/__init__.pyc b/lzstring/__init__.pyc deleted file mode 100644 index b9df4201f3a4ec63f82c08f47ceccd33a5af7e58..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10978 zcmcgyYm8e*6`p&qef?Ok?R_M>X}3+2Qo0b*=GANqO?I2^vQHW<)UhSau5@8 zILk=`i5EtabFMfQr|9aMBxjKBDcv&}>E$1-kVmVvBrdw>Z6c;b& zm%KTV^-Ox+g9sOwP%l56#VmK~L1glZ8-)GAq_H#DHD)Ys7c&8svm_H#xqxJBl?zHH zq;j@o!YVaC83L~*a^}B@|C1bnO9z%Ur83AUEGC=gswa@1m_2#wfzuB@^zb8(p84$D zV~>9>HGlSr&o6x8$#iycDVM)+@se|Sxv=6ES4-uJSG{s|?b`JN2M--Sa?jDJd+*zK z_p#P=m-E*!M_mqpbtPZ8Fo{nkNo5qd!u5)*!rhsS<2qg@bJyxMS)N9johp=08B6@& z0&_QH9(yponk>_DSe28U(>zva-K1iyTasd_47HARLW)H~iWRcddYCJ>9+7-h>Ib;G z>oG|O$#>mQYo{#FP@c)+4f#8D{(!X}XSo(vyTG`HKT9o#RQUx}i!hIx`;o(jOkn^I z6#=PpA#I4cF-TIwAINM~{uA?%s#KEZfK>j~^gskrO|=UHV^5c%g1<4=<`o30MJdgx z^AD)?gsMU0pgIo8YdAb1wQQ>Ll)<@R76ZzId@**PT5rs-^uBbZe1i&wg|@M+}FvC1D^L!>#_C$+Xovq`%SHF zFm}X1e>S=&nCE!8$QCE~?AFKhP4J|-P=lHU)xFR*=1kd}S4|I!2(7>-LO?V@|F{=X zMGXn4%TN;RJm3zP?3P8PLeMoVlZf5e{SVxiW7d|J$|v@Fn~VKbtFPEv^KHN8L({cd zFK{}BTDDZ#wRr@zj{hOprgwE6|6X8=I(~DY-|R|nT09PQ9e>m6R)Ov5_}BZ6)~0s8 zwLBg8P&)pvtq)e$@s|SK6WpGTUk>(L&R!F|Gq|aa|18*7S^kGQzQ_JBV!yt**sk4E z?2>EabkcHc`xw6Rp+tPfURXO$>rtKvZ3gj)P$#%QUV6j!>HHh^0(#6(Mqw`Jsz|iE z?b&iQFBQddE=9*i3aiDd z_0QEwa!C!aC58k}R&iyuoUc^!IhZ4*u4D_~FpS(%(e?6cUMA;=9|m{HyJ^$vv&EC; zeAdf*5nuC?$H_H#TwQ8wyy^}48uM-;TfUfo*!3t|gM`D*JJ<6b7obt;R##>nui}mP z>T+2xo6){XI|J5ZI^K@X&a1_WL)Q|{1D4f+xxvR)-JiyKje)3gfTU*32$;6v`)!p> z+uUVD(IblAL6f-%{&yN9QfnJYl;dX795IGbugg($2y`Px+*cFr>Jt}k32+^;q8;?D zHjW!LV)*^2#!QOr{@KLMh|D&Lb&8wfEK>KH&f>4I4_(e8oIF!J0h68pTt<2YIv?=m z;XdNvi`8zqkEm(5kMN`~H+)2RjW`C{UxdfCcG85s#zBO;IZ7Yy=PaYJ6D;Yf-DJ=T zf(U+^G~5hH#ODjDX-d?i)mAg>XwoEb`^GQ^`44pQW22vEP?dfn{+*s^P`S^yj}lVj z5U?o+M-q31jMz{g>D%oi)f@dIVGaG#cl01%3_k8#==MeD3#%Ep zZpVSubd>_!y2{@TWKcWpqJyPou}^L7A(QPB9|r-HvU9 ztil_2(HT1SzTwNuW^Pa}6-Xm8C*3N<byEb~NggZ>?_^wuMY-Wtz16BGbhr~AY-A2K zGKVFTcb9Y%GP((wX48?>9*$k-44bnR(A1!=jm+8NY_^g=dgOh|b_R(8FabOPbo{MD zD9Q;V21rX9w;H#aqsAR(!rW_&8hepLKQn4YjeW+9u^attBVr@~qDM8b@LEmz-yXgz zc?+ik+(3&|lyVvx%~MW|(KR!0evf11(j#>eqRm^~MMPM5y;$t%X0a120DP@Y-61Sq>cwKD zo5dJd4Eb4%2#fc6u}F5a7zc~EpG8tw9J6}p#b7s!U0^ZbXE7)&p67Y(I;zu&@I?CIW-pxv3LiVfk4Eg+=4Y`A(@mBe4z^%@ceJ7`2X{ zMhl!={1NsG4h^)P0K@B|V1gxjK z_ZA;0X)6NSX|&WhErTe_6dgpNC1SeNpSmr9_X8_a+AYm^jY0Dd`B=hcXd6rjJT0I+ ztwO`7ph$~AqtYbR+Wt4XWSyi$Kp!bQFEJ)fQhnBT8^D21QXUc?=^#i67EMxZ0n!Fg zsFO6&PMQQMp{hx$k^gxE7}!ZV+)g?IQUYa@R3rYYMsBo=lu+#xm@qx+Cmod)9x!ub zU8G~}q{P-SKk1l|o;G1RIt3nWCncuRzBC2a#^D(=H{3-^3+EGiV@^Tt5cni-3-T~cp z3qbsw+yt@Zp(U4Zyi05b;Hkr)6`)QXWbP1I;s1leJ$zt za|ir$K5y8@pm_2cy6rlI|NlQnjU5`c8JFUk$_-!wiwy|QDTO#xWnA(=XMpO206DH{ z@XGx_4xa%i4cGV_AX%b6hy(CBVL+WjF{1D(8tK=E1h9t* z((5B??N-7z_(G>nollYn><7Rcm+1RTǡe~fnxpg8^*dO&;?vSM5JWo3IE&^%C}-4-obj+ZGo1-bdJi4kbxE zjraWDi_{uHUNGCFMr^5<)Za1tFLBA-?h+$F)ko^RrUVC?Qe)fj9;L5hFPn5V7`PPe zVc-%^CkWC&A=ge=a6HRnHue_ISS?BA;*P#6ME>c*S?D?4=dAwIeahPIbgRVqo*Fe| ziAG^cI8a(?3jx2da720v`{_V`VFv@-6?Si+pRh0M$dv32Pip`XG&pZONjNT&8;7Rp zEBd{#ZSFfVFRgQ@f^GBaqjST-{_}c!+w*#_@4UXH15(?V7qk-lrM2>G5E?koN$dTq z1vPNh)l5H#aEdPlY3|+z$YCRji(!G6=-8C&HM6H&rPoaSEpQZhFRPn)+EyKXK?c#6r`wFZ*k!(D zi4B&ZO-CZPD;N~g_DHmGrlX*r@;GFQu@H&Xq-f&hJ~l}srl0Y27^NAbVbK?nq2JJo zpHRkT+=Mo;5ig+!f@>Q>ICMS`?x_|$xz%0O4#6JO%1ff_BIF}4fs2f@P(JitlrMg) zWY-W#%9o|1&e5sy$6rK%M%;zdl*VSfMTBRwok6nPvc zSi2y-0z^XKhS2%IGbj8UJagXQOI!1qlh@YY8gzmY2gY3$J%ied(3j4Doa@cnaZKSg z^=`+}?As2UXtm2%rj7#vaJHvs;V&`E8!m1KL<-y+X6cq7TMsw@Sme7Xm|YO@8~q@g zHyEbSkDTm_%PG95hYg?zZ6-i`K+V?g9~!IbU0yU8)~&yezG%3Q%hg+xM1^FGZ@EpD z-!y2ob^lIA&9P!_V?j{HPb^iM(YFpPpagx|APM`dstm?{@k=N6r?dO7AJ~82xqavE zK6mWA?;b*R(7^<`hsgax;A#d95;4{h1zuXpIy_vdF6xIL9!WXSawIhj@zBYa_*#Jk z$8ixo(62#cQ!L1HP*H*k8GW_bhx*i~nCxeA7ZaZG)F+sH63G@$yo7d}cMe`Tbj09k zj9Qrik9x#Fj-_qxz`bK9j)?CcM`kC2sW?_RG-LQr;z36rsWHHWL|E!yWOK7C`AkMG zP?^k1F;^|1nBv7vzmFYY?g*2kOt>zoIV7!lNUrDiqB3tdet(AMlY4ZnN}$-a*Eq2qApgQg6Fsu3HYao<~q2~t=7aT!o9@G(ap;i v3Yl!dL9C^c3bXgem^{RUB5hTE8T)>{FvqE7xIy8IegIwnZxkg&Wvu@K$wet& diff --git a/setup.py b/setup.py index e3829a0..6bfe6cc 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='lzstring', - version='1.0.0', + version='1.0.2', description='lz-string for python', author='Geza Kovacs', author_email='geza0kovacs@gmail.com', diff --git a/test.py b/test.py index b558767..b3f6f9e 100644 --- a/test.py +++ b/test.py @@ -34,8 +34,8 @@ print() print('Decompress from base64:') - print('result: ' + x.decompresFromBase64(base2)) - print('result from js: ' + x.decompresFromBase64(jsLzStringBase64)) + print('result: ' + x.decompressFromBase64(base2)) + print('result from js: ' + x.decompressFromBase64(jsLzStringBase64)) print() @@ -52,4 +52,4 @@ print() print('Decompress json from base64:') - pprint.pprint(json.loads(x.decompresFromBase64(jsLzStringBase64Json))) + pprint.pprint(json.loads(x.decompressFromBase64(jsLzStringBase64Json))) From 1d72a1172f1f3ea1e858794b96e9d37c8882b82e Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Sun, 7 Jun 2015 17:32:28 -0700 Subject: [PATCH 04/13] updated example in readme --- README.rst | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 0314655..887fe91 100644 --- a/README.rst +++ b/README.rst @@ -11,8 +11,8 @@ Example >>> import lzstring >>> x = lzstring.LZString() - >>> compressed = x.compressToBase64(u"你好") # u'gbyl9NIA' - >>> x.decompressFromBase64(compressed) # u'\u4f60\u597d' + >>> compressed = x.compressToBase64(u'你好') # 'gbyl9NIA' + >>> x.decompressFromBase64(compressed) # '你好' Installation ------------ diff --git a/setup.py b/setup.py index 6bfe6cc..01b149c 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='lzstring', - version='1.0.2', + version='1.0.3', description='lz-string for python', author='Geza Kovacs', author_email='geza0kovacs@gmail.com', From 377923c3144e138b181c5f86ed01c06ca79f7fd0 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 11 May 2017 10:02:23 +0200 Subject: [PATCH 05/13] Require at least v0.14.0 of future. Added travis config. --- .travis.yml | 14 ++++++++++++++ setup.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..44a0774 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.3" + - "3.4" + - "3.5" + - "3.6" +# commands to install dependencies +install: + - python setup.py -q install +# commands to run tests +script: + - python test.py diff --git a/setup.py b/setup.py index 01b149c..b1c18d0 100644 --- a/setup.py +++ b/setup.py @@ -30,5 +30,5 @@ 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', ], - install_requires=['future'], + install_requires=['future>=0.14.0'], ) \ No newline at end of file From 4d7c9983e7cfd22ed7615cc9bb161399f3cb0b38 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 18 May 2017 18:20:51 +0200 Subject: [PATCH 06/13] Add licence file. --- LICENSE.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 LICENSE.md diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..5f9ee7d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + From e29a0e0593be027995816a194e31941cf8a9f988 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 19 May 2017 09:39:55 +0200 Subject: [PATCH 07/13] Added MANIFEST.in file for PyPI packaging of licence and readme. --- MANIFEST.in | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..64c63b3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE.md +include README.rst \ No newline at end of file From 0f1867db461607811dff8319d9ecc9395669d0a1 Mon Sep 17 00:00:00 2001 From: peter-hams Date: Tue, 13 Jun 2017 15:01:01 +0200 Subject: [PATCH 08/13] Fixes decompression of base64 strings originated from JS compressor in Python. The original Python code simply copies the structure of the original JS code. However, the JS code only works, because at the end of the array, when it increases the data.index value beyond its length, the str.charCodeAt(index) function returns NaN. The not value is in the next iteration combined using bitewise AND operator with data.position to produce numerical 0. This change reproduces this ingenious /s JS behaviour in Python. --- lzstring/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lzstring/__init__.py b/lzstring/__init__.py index 8a9a891..657a3f2 100644 --- a/lzstring/__init__.py +++ b/lzstring/__init__.py @@ -527,12 +527,18 @@ def decompress(self, compressed): power = 1 while power != maxpower: - resb = data_val & data_position + if data_val is not None: + resb = data_val & data_position + else: + resb = 0 data_position >>= 1 if data_position == 0: data_position = 32768 - data_val = ord(data_string[data_index]) + if data_index < len(data_string): + data_val = ord(data_string[data_index]) + else: + data_val = None data_index += 1 bits |= (1 if resb > 0 else 0) * power @@ -555,12 +561,18 @@ def decompress(self, compressed): power = 1 while power != maxpower: - resb = data_val & data_position + if data_val is not None: + resb = data_val & data_position + else: + resb = 0 data_position >>= 1 if data_position == 0: data_position = 32768 - data_val = ord(data_string[data_index]) + if data_index < len(data_string): + data_val = ord(data_string[data_index]) + else: + data_val = None data_index += 1 bits |= (1 if resb > 0 else 0) * power From 916183026bdc80c33913f97b4927bd20df109987 Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Thu, 31 May 2018 18:45:48 -0700 Subject: [PATCH 09/13] switch implementation to that of https://github.com/marcel-dancak/lz-string-python which better matches recent versions of lzstring --- lzstring/__init__.py | 929 +++++++++++++++---------------------------- 1 file changed, 331 insertions(+), 598 deletions(-) diff --git a/lzstring/__init__.py b/lzstring/__init__.py index 657a3f2..a264411 100644 --- a/lzstring/__init__.py +++ b/lzstring/__init__.py @@ -15,683 +15,416 @@ import re -class LZString(object): - - def __init__(self): - self.keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" - - def compress(self, uncompressed): - - if uncompressed is None: - return '' - - value = 0 - context_dictionary = {} - context_dictionaryToCreate = {} - context_c = '' - context_wc = '' - context_w = '' - context_enlargeIn = 2 - - context_dictSize = 3 - context_numBits = 2 - context_data_string = '' - context_data_val = 0 - context_data_position = 0 - - uncompressed = uncompressed - - for ii in range(len(uncompressed)): - context_c = uncompressed[ii] - - if not context_c in context_dictionary: - context_dictionary[context_c] = context_dictSize - context_dictSize += 1 - context_dictionaryToCreate[context_c] = True - - context_wc = context_w + context_c - - if context_wc in context_dictionary: - context_w = context_wc - else: - if context_w in context_dictionaryToCreate: - if ord(context_w[0]) < 256: - for i in range(context_numBits): - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = ord(context_w[0]) - - for i in range(8): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - else: - value = 1 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | value - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = 0 - - value = ord(context_w[0]) - - for i in range(16): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionaryToCreate.pop(context_w, None) - #del context_dictionaryToCreate[context_w] - else: - value = context_dictionary[context_w] - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionary[context_wc] = context_dictSize - context_dictSize += 1 - context_w = context_c - if context_w != '': +keyStrBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" +keyStrUriSafe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$" +baseReverseDic = {}; + +class Object(object): + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + +def getBaseValue(alphabet, character): + if alphabet not in baseReverseDic: + baseReverseDic[alphabet] = {} + for i in range(len(alphabet)): + baseReverseDic[alphabet][alphabet[i]] = i + return baseReverseDic[alphabet][character] + + +def _compress(uncompressed, bitsPerChar, getCharFromInt): + if (uncompressed is None): + return "" + + context_dictionary = {} + context_dictionaryToCreate= {} + context_c = "" + context_wc = "" + context_w = "" + context_enlargeIn = 2 # Compensate for the first entry which should not count + context_dictSize = 3 + context_numBits = 2 + context_data = [] + context_data_val = 0 + context_data_position = 0 + + for ii in range(len(uncompressed)): + context_c = uncompressed[ii] + if context_c not in context_dictionary: + context_dictionary[context_c] = context_dictSize + context_dictSize += 1 + context_dictionaryToCreate[context_c] = True + + context_wc = context_w + context_c + if context_wc in context_dictionary: + context_w = context_wc + else: if context_w in context_dictionaryToCreate: if ord(context_w[0]) < 256: for i in range(context_numBits): context_data_val = (context_data_val << 1) - - if context_data_position == 15: + if context_data_position == bitsPerChar-1: context_data_position = 0 - context_data_string += chr(context_data_val) + context_data.append(getCharFromInt(context_data_val)) context_data_val = 0 else: context_data_position += 1 - value = ord(context_w[0]) - for i in range(8): context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: + if context_data_position == bitsPerChar - 1: context_data_position = 0 - context_data_string += chr(context_data_val) + context_data.append(getCharFromInt(context_data_val)) context_data_val = 0 else: context_data_position += 1 - value = value >> 1 + else: value = 1 - for i in range(context_numBits): context_data_val = (context_data_val << 1) | value - - if context_data_position == 15: + if context_data_position == bitsPerChar - 1: context_data_position = 0 - context_data_string += chr(context_data_val) + context_data.append(getCharFromInt(context_data_val)) context_data_val = 0 else: context_data_position += 1 - value = 0 - value = ord(context_w[0]) - for i in range(16): context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: + if context_data_position == bitsPerChar - 1: context_data_position = 0 - context_data_string += chr(context_data_val) + context_data.append(getCharFromInt(context_data_val)) context_data_val = 0 else: context_data_position += 1 - value = value >> 1 - context_enlargeIn -= 1 - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) + context_enlargeIn = math.pow(2, context_numBits) context_numBits += 1 - - context_dictionaryToCreate.pop(context_w, None) - #del context_dictionaryToCreate[context_w] + del context_dictionaryToCreate[context_w] else: value = context_dictionary[context_w] - for i in range(context_numBits): context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: + if context_data_position == bitsPerChar - 1: context_data_position = 0 - context_data_string += chr(context_data_val) + context_data.append(getCharFromInt(context_data_val)) context_data_val = 0 else: context_data_position += 1 - value = value >> 1 context_enlargeIn -= 1 - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) + context_enlargeIn = math.pow(2, context_numBits) context_numBits += 1 - - value = 2 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - while True: - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_string += chr(context_data_val) - break - else: - context_data_position += 1 - - return context_data_string - - def compressToBase64(self, string): - if string is None: - return '' - - output = '' - - chr1 = float('NaN') - chr2 = float('NaN') - chr3 = float('NaN') - enc1 = 0 - enc2 = 0 - enc3 = 0 - enc4 = 0 - - i = 0 - - string = self.compress(string) - strlen = len(string) - - while i < (strlen * 2): - if (i % 2) == 0: - chr1 = ord(string[int(i / 2)]) >> 8 - chr2 = ord(string[int(i / 2)]) & 255 - - if (i / 2) + 1 < strlen: - chr3 = ord(string[int((i / 2) + 1)]) >> 8 - else: - chr3 = float('NaN') + + # Add wc to the dictionary. + context_dictionary[context_wc] = context_dictSize + context_dictSize += 1 + context_w = str(context_c) + + # Output the code for w. + if context_w != "": + if context_w in context_dictionaryToCreate: + if ord(context_w[0]) < 256: + for i in range(context_numBits): + context_data_val = (context_data_val << 1) + if context_data_position == bitsPerChar-1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = ord(context_w[0]) + for i in range(8): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 else: - chr1 = ord(string[int((i - 1) / 2)]) & 255 - if (i + 1) / 2 < strlen: - chr2 = ord(string[int((i + 1) / 2)]) >> 8 - chr3 = ord(string[int((i + 1) / 2)]) & 255 + value = 1 + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | value + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = 0 + value = ord(context_w[0]) + for i in range(16): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + context_enlargeIn -= 1 + if context_enlargeIn == 0: + context_enlargeIn = math.pow(2, context_numBits) + context_numBits += 1 + del context_dictionaryToCreate[context_w] + else: + value = context_dictionary[context_w] + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 else: - chr2 = float('NaN') - chr3 = float('NaN') - - i += 3 - - # python dont support bit operation with NaN like javascript - enc1 = chr1 >> 2 - enc2 = ((chr1 & 3) << 4) | (chr2 >> 4 if not math.isnan(chr2) else 0) - enc3 = ((chr2 & 15 if not math.isnan(chr2) else 0) << 2) | (chr3 >> 6 if not math.isnan(chr3) else 0) - enc4 = (chr3 if not math.isnan(chr3) else 0) & 63 - - if math.isnan(chr2): - enc3 = 64 - enc4 = 64 - elif math.isnan(chr3): - enc4 = 64 - - output += self.keyStr[enc1] + self.keyStr[enc2] + self.keyStr[enc3] + self.keyStr[enc4] - - return output - - def compressToUTF16(self, string): - - if string is None: - return '' - - output = '' - c = 0 - current = 0 - status = 0 - - string = self.compress(string) - - for i in range(len(string)): - c = ord(string[i]) - - if status == 0: - status += 1 - output += chr(((c >> 1) + 32)) - current = (c & 1) << 14 - elif status == 1: - status += 1 - output += chr(((current + (c >> 2)) + 32)) - current = (c & 3) << 13 - elif status == 2: - status += 1 - output += chr(((current + (c >> 3)) + 32)) - current = (c & 7) << 12 - elif status == 3: - status += 1 - output += chr(((current + (c >> 4)) + 32)) - current = (c & 15) << 11 - elif status == 4: - status += 1 - output += chr(((current + (c >> 5)) + 32)) - current = (c & 31) << 10 - elif status == 5: - status += 1 - output += chr(((current + (c >> 6)) + 32)) - current = (c & 63) << 9 - elif status == 6: - status += 1 - output += chr(((current + (c >> 7)) + 32)) - current = (c & 127) << 8 - elif status == 7: - status += 1 - output += chr(((current + (c >> 8)) + 32)) - current = (c & 255) << 7 - elif status == 8: - status += 1 - output += chr(((current + (c >> 9)) + 32)) - current = (c & 511) << 6 - elif status == 9: - status += 1 - output += chr(((current + (c >> 10)) + 32)) - current = (c & 1023) << 5 - elif status == 10: - status += 1 - output += chr(((current + (c >> 11)) + 32)) - current = (c & 2047) << 4 - elif status == 11: - status += 1 - output += chr(((current + (c >> 12)) + 32)) - current = (c & 4095) << 3 - elif status == 12: - status += 1 - output += chr(((current + (c >> 13)) + 32)) - current = (c & 8191) << 2 - elif status == 13: - status += 1 - output += chr(((current + (c >> 14)) + 32)) - current = (c & 16383) << 1 - elif status == 14: - status += 1 - output += chr(((current + (c >> 15)) + 32)) - output += chr((c & 32767) + 32) - - status = 0 - - output += chr(current + 32) - - return output - - #written by https://github.com/v-python - def decompressFromUTF16(self, string): - if not string: + context_data_position += 1 + value = value >> 1 + + context_enlargeIn -= 1 + if context_enlargeIn == 0: + context_enlargeIn = math.pow(2, context_numBits) + context_numBits += 1 + + # Mark the end of the stream + value = 2 + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + + # Flush the last char + while True: + context_data_val = (context_data_val << 1) + if context_data_position == bitsPerChar - 1: + context_data.append(getCharFromInt(context_data_val)) + break + else: + context_data_position += 1 + + return "".join(context_data) + + +def _decompress(length, resetValue, getNextValue): + dictionary = {} + enlargeIn = 4 + dictSize = 4 + numBits = 3 + entry = "" + result = [] + + data = Object( + val=getNextValue(0), + position=resetValue, + index=1 + ) + + for i in range(3): + dictionary[i] = i + + bits = 0 + maxpower = math.pow(2, 2) + power = 1 + + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue + data.val = getNextValue(data.index) + data.index += 1 + + bits |= power if resb > 0 else 0 + power <<= 1; + + next = bits + if next == 0: + bits = 0 + maxpower = math.pow(2, 8) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + c = chr(bits) + elif next == 1: + bits = 0 + maxpower = math.pow(2, 16) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue; + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + c = chr(bits) + elif next == 2: + return "" + + dictionary[3] = c + w = c + result.append(c) + counter = 0 + while True: + counter += 1 + if data.index > length: return "" - output = "" - status = 0 - i = 0 - - while i < len(string): - c = ord(string[i]) - 32 - i += 1 - - if status == 0: - status = 1 - current = c << 1 - elif status == 1: - status = 2 - output += chr(current + (c >> 14)) - current = (c & 16383) << 2 - elif status == 2: - status = 3 - output += chr(current + (c >> 13)) - current = (c & 8191) << 3 - elif status == 3: - status = 4 - output += chr(current + (c >> 12)) - current = (c & 4095) << 4 - elif status == 4: - status = 5 - output += chr(current + (c >> 11)) - current = (c & 2047) << 5 - elif status == 5: - status = 6 - output += chr(current + (c >> 10)) - current = (c & 1023) << 6 - elif status == 6: - status = 7 - output += chr(current + (c >> 9)) - current = (c & 511) << 7 - elif status == 7: - status = 8 - output += chr(current + (c >> 8)) - current = (c & 255) << 8 - elif status == 8: - status = 9 - output += chr(current + (c >> 7)) - current = (c & 127) << 9 - elif status == 9: - status = 10 - output += chr(current + (c >> 6)) - current = (c & 63) << 10 - elif status == 10: - status = 11 - output += chr(current + (c >> 5)) - current = (c & 31) << 11 - elif status == 11: - status = 12 - output += chr(current + (c >> 4)) - current = (c & 15) << 12 - elif status == 12: - status = 13 - output += chr(current + (c >> 3)) - current = (c & 7) << 13 - elif status == 13: - status = 14 - output += chr(current + (c >> 2)) - current = (c & 3) << 14 - elif status == 14: - status = 15 - output += chr(current + (c >> 1)) - current = (c & 1) << 15 - elif status == 15: - status = 0 - output += chr(current + c) - current = (c & 1) << 15 - - return self.decompress(output) - - def decompress(self, compressed): - - if (compressed is None) or (compressed == ''): - return '' - - dictionary = {} - enlargeIn = 4 - dictSize = 4 - numBits = 3 - (entry, result, w, c) = ('', '', '', '') - (i, nnext, bits, resb, maxpower, power) = (0, 0, 0, 0, 0, 0) - - data_string = compressed - data_val = ord(compressed[0]) - data_position = 32768 - data_index = 1 - - for i in range(3): - #dictionary[i] = i - dictionary[i] = '' - bits = 0 - maxpower = pow(2, 2) + maxpower = math.pow(2, numBits) power = 1 - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue; + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 power <<= 1 - nnext = bits - if nnext == 0: + c = bits + if c == 0: bits = 0 - maxpower = pow(2, 8) + maxpower = math.pow(2, 8) power = 1 - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 power <<= 1 - c = chr(bits) - elif nnext == 1: - bits = 0 - maxpower = pow(2, 16) - power = 1 - - while power != maxpower: - if data_val is not None: - resb = data_val & data_position - else: - resb = 0 - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - if data_index < len(data_string): - data_val = ord(data_string[data_index]) - else: - data_val = None - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = chr(bits) - elif nnext == 2: - return '' - - dictionary[3] = c - result = c - w = result - - while True: - if data_index > len(data_string): - return '' - + dictionary[dictSize] = chr(bits) + dictSize += 1 + c = dictSize - 1 + enlargeIn -= 1 + elif c == 1: bits = 0 - maxpower = pow(2, numBits) + maxpower = math.pow(2, 16) power = 1 - while power != maxpower: - if data_val is not None: - resb = data_val & data_position - else: - resb = 0 - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - if data_index < len(data_string): - data_val = ord(data_string[data_index]) - else: - data_val = None - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue; + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 power <<= 1 - - c = bits - - if c == 0: - bits = 0 - maxpower = pow(2, 8) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - dictionary[dictSize] = chr(bits) - dictSize += 1 - c = dictSize - 1 - enlargeIn -= 1 - elif c == 1: - bits = 0 - maxpower = pow(2, 16) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - dictionary[dictSize] = chr(bits) - dictSize += 1 - c = dictSize - 1 - enlargeIn -= 1 - elif c == 2: - return result - - if enlargeIn == 0: - enlargeIn = pow(2, numBits) - numBits += 1 - - if c in dictionary: - entry = dictionary[c] - else: - if c == dictSize: - entry = w + w[0] - else: - return None - - result += entry - - dictionary[dictSize] = w + entry[0] + dictionary[dictSize] = chr(bits) dictSize += 1 + c = dictSize - 1 enlargeIn -= 1 + elif c == 2: + return "".join(result) - w = entry - - if enlargeIn == 0: - enlargeIn = pow(2, numBits) - numBits += 1 - def decompressFromBase64(self, iinput): - if iinput is None: - return '' + if enlargeIn == 0: + enlargeIn = math.pow(2, numBits) + numBits += 1 - output = "" - ol = 0 - output_ = '' - - i = 0 + if c in dictionary: + entry = dictionary[c] + else: + if c == dictSize: + entry = w + w[0] + else: + return None + result.append(entry) - iinput = re.sub(r'[^A-Za-z0-9\+\/\=]', '', iinput) + # Add w+entry[0] to the dictionary. + dictionary[dictSize] = w + entry[0] + dictSize += 1 + enlargeIn -= 1 - while i < len(iinput): - enc1 = self.keyStr.index(iinput[i]) - i += 1 - enc2 = self.keyStr.index(iinput[i]) - i += 1 - enc3 = self.keyStr.index(iinput[i]) - i += 1 - enc4 = self.keyStr.index(iinput[i]) - i += 1 + w = entry + if enlargeIn == 0: + enlargeIn = math.pow(2, numBits) + numBits += 1 - chr1 = (enc1 << 2) | (enc2 >> 4) - chr2 = ((enc2 & 15) << 4) | (enc3 >> 2) - chr3 = ((enc3 & 3) << 6) | enc4 - if (ol % 2) == 0: - output_ = chr1 << 8 +class LZString(object): + @staticmethod + def compress(uncompressed): + return _compress(uncompressed, 16, chr) - if enc3 != 64: - output += chr(output_ | chr2) + @staticmethod + def compressToUTF16(uncompressed): + if uncompressed is None: + return "" + return _compress(uncompressed, 15, lambda a: chr(a+32)) + " " - if enc4 != 64: - output_ = chr3 << 8 - else: - output = output + chr(output_ | chr1) + @staticmethod + def compressToBase64(uncompressed): + if uncompressed is None: + return "" + res = _compress(uncompressed, 6, lambda a: keyStrBase64[a]) + # To produce valid Base64 + end = len(res) % 4 + if end > 0: + res += "="*(4 - end) + return res + + @staticmethod + def compressToEncodedURIComponent(uncompressed): + if uncompressed is None: + return "" + return _compress(uncompressed, 6, lambda a: keyStrUriSafe[a]) - if enc3 != 64: - output_ = chr2 << 8 + @staticmethod + def decompress(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 32768, lambda index: ord(compressed[index])) - if enc4 != 64: - output += chr(output_ | chr3) + @staticmethod + def decompressFromUTF16(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 16384, lambda index: compressed[index] - 32) - ol += 3 + @staticmethod + def decompressFromBase64(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 32, lambda index: getBaseValue(keyStrBase64, compressed[index])) - return self.decompress(output) + @staticmethod + def decompressFromEncodedURIComponent(compressed): + if compressed is None: + return "" + if compressed == "": + return None + compressed = compressed.replace(" ", "+") + return _decompress(len(compressed), 32, lambda index: getBaseValue(keyStrUriSafe, compressed[index])) From 69fbb923f8f181c755f4f4656ea978caade077b3 Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Thu, 31 May 2018 18:47:28 -0700 Subject: [PATCH 10/13] updated reference output in example to match current lzstring versions --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 887fe91..4c586b8 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ Example >>> import lzstring >>> x = lzstring.LZString() - >>> compressed = x.compressToBase64(u'你好') # 'gbyl9NIA' + >>> compressed = x.compressToBase64(u'你好') # 'gbyl9NI=' >>> x.decompressFromBase64(compressed) # '你好' Installation From c7590be64f1afdd29c6b6f049785ea73fe7848e7 Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Thu, 31 May 2018 19:02:08 -0700 Subject: [PATCH 11/13] added keywords --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index b1c18d0..b2efb4f 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ long_description=long_description, url='https://github.com/gkovacs/lz-string-python', download_url='https://github.com/gkovacs/lz-string-python', + keywords=['lz-string', 'lzstring', 'compression'], classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', From 0b2773ede157fb69f0c837b853ccc5ab9c236c58 Mon Sep 17 00:00:00 2001 From: Geza Kovacs Date: Thu, 31 May 2018 19:31:56 -0700 Subject: [PATCH 12/13] bump version to 1.0.4 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b2efb4f..dca116d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='lzstring', - version='1.0.3', + version='1.0.4', description='lz-string for python', author='Geza Kovacs', author_email='geza0kovacs@gmail.com', From 9e5e63c2c62f24f0d633b8c380207a14d9011add Mon Sep 17 00:00:00 2001 From: Marc Olivier Chouinard Date: Sun, 10 May 2020 15:33:49 -0400 Subject: [PATCH 13/13] Implement Uint8 encoding compressToUint8Array decompressToUint8Array This is the best I could do without doing a bigger rewrite so lzstringn doesn't use the str() type and use bytes() (or another type). str() have lot of issues with conversions. --- lzstring/__init__.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lzstring/__init__.py b/lzstring/__init__.py index a264411..cf91d62 100644 --- a/lzstring/__init__.py +++ b/lzstring/__init__.py @@ -50,7 +50,10 @@ def _compress(uncompressed, bitsPerChar, getCharFromInt): context_data_position = 0 for ii in range(len(uncompressed)): - context_c = uncompressed[ii] + if isinstance(uncompressed, (bytes)): + context_c = chr(uncompressed[ii]) + else: + context_c = uncompressed[ii] if context_c not in context_dictionary: context_dictionary[context_c] = context_dictSize context_dictSize += 1 @@ -373,6 +376,11 @@ class LZString(object): def compress(uncompressed): return _compress(uncompressed, 16, chr) + @staticmethod + def compressToUint8Array(uncompressed): + return bytes([ord(x) for x in _compress(uncompressed, 8, chr)]) + + @staticmethod def compressToUTF16(uncompressed): if uncompressed is None: @@ -404,6 +412,14 @@ def decompress(compressed): return None return _decompress(len(compressed), 32768, lambda index: ord(compressed[index])) + @staticmethod + def decompressFromUint8Array(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 128, lambda index: compressed[index]) + @staticmethod def decompressFromUTF16(compressed): if compressed is None: