gitpython-developers · Byron · Feb 17, 2020 · Feb 16, 2020 · Feb 16, 2020 · Feb 16, 2020
diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py
@@ -50,11 +50,11 @@
     stream_copy
 )
 
-from gitdb.utils.compat import MAXSIZE
 from gitdb.utils.encoding import force_bytes
 
 import tempfile
 import os
+import sys
 
 
 __all__ = ('LooseObjectDB', )
@@ -196,7 +196,7 @@ def store(self, istream):
                 if istream.binsha is not None:
                     # copy as much as possible, the actual uncompressed item size might
                     # be smaller than the compressed version
-                    stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
+                    stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
                 else:
                     # write object with header, we have to make a new one
                     write_object(istream.type, istream.size, istream.read, writer.write,

diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py
@@ -18,7 +18,6 @@
 )
 
 from gitdb.pack import PackEntity
-from gitdb.utils.compat import xrange
 
 from functools import reduce
 
@@ -107,7 +106,7 @@ def sha_iter(self):
         for entity in self.entities():
             index = entity.index()
             sha_by_index = index.sha
-            for index in xrange(index.size()):
+            for index in range(index.size()):
                 yield sha_by_index(index)
             # END for each index
         # END for each entity

diff --git a/gitdb/fun.py b/gitdb/fun.py
@@ -16,7 +16,6 @@
 
 from gitdb.const import NULL_BYTE, BYTE_SPACE
 from gitdb.utils.encoding import force_text
-from gitdb.utils.compat import izip, buffer, xrange, PY3
 from gitdb.typ import (
     str_blob_type,
     str_commit_type,
@@ -101,7 +100,7 @@ def delta_chunk_apply(dc, bbuf, write):
     :param write: write method to call with data to write"""
     if dc.data is None:
         # COPY DATA FROM SOURCE
-        write(buffer(bbuf, dc.so, dc.ts))
+        write(bbuf[dc.so:dc.so + dc.ts])
     else:
         # APPEND DATA
         # whats faster: if + 4 function calls or just a write with a slice ?
@@ -264,7 +263,7 @@ def compress(self):
                     # if first_data_index is not None:
                     nd = StringIO()                     # new data
                     so = self[first_data_index].to      # start offset in target buffer
-                    for x in xrange(first_data_index, i - 1):
+                    for x in range(first_data_index, i - 1):
                         xdc = self[x]
                         nd.write(xdc.data[:xdc.ts])
                     # END collect data
@@ -314,7 +313,7 @@ def check_integrity(self, target_size=-1):
         right.next()
         # this is very pythonic - we might have just use index based access here,
         # but this could actually be faster
-        for lft, rgt in izip(left, right):
+        for lft, rgt in zip(left, right):
             assert lft.rbound() == rgt.to
             assert lft.to + lft.ts == rgt.to
         # END for each pair
@@ -424,20 +423,12 @@ def pack_object_header_info(data):
     type_id = (c >> 4) & 7          # numeric type
     size = c & 15                   # starting size
     s = 4                           # starting bit-shift size
-    if PY3:
-        while c & 0x80:
-            c = byte_ord(data[i])
-            i += 1
-            size += (c & 0x7f) << s
-            s += 7
-        # END character loop
-    else:
-        while c & 0x80:
-            c = ord(data[i])
-            i += 1
-            size += (c & 0x7f) << s
-            s += 7
-        # END character loop
+    while c & 0x80:
+        c = byte_ord(data[i])
+        i += 1
+        size += (c & 0x7f) << s
+        s += 7
+    # END character loop
     # end performance at expense of maintenance ...
     return (type_id, size, i)
 
@@ -450,28 +441,16 @@ def create_pack_object_header(obj_type, obj_size):
     :param obj_type: pack type_id of the object
     :param obj_size: uncompressed size in bytes of the following object stream"""
     c = 0       # 1 byte
-    if PY3:
-        hdr = bytearray()  # output string
-
-        c = (obj_type << 4) | (obj_size & 0xf)
-        obj_size >>= 4
-        while obj_size:
-            hdr.append(c | 0x80)
-            c = obj_size & 0x7f
-            obj_size >>= 7
-        # END until size is consumed
-        hdr.append(c)
-    else:
-        hdr = bytes()  # output string
-
-        c = (obj_type << 4) | (obj_size & 0xf)
-        obj_size >>= 4
-        while obj_size:
-            hdr += chr(c | 0x80)
-            c = obj_size & 0x7f
-            obj_size >>= 7
-        # END until size is consumed
-        hdr += chr(c)
+    hdr = bytearray()  # output string
+
+    c = (obj_type << 4) | (obj_size & 0xf)
+    obj_size >>= 4
+    while obj_size:
+        hdr.append(c | 0x80)
+        c = obj_size & 0x7f
+        obj_size >>= 7
+    # END until size is consumed
+    hdr.append(c)
     # end handle interpreter
     return hdr
 
@@ -484,26 +463,15 @@ def msb_size(data, offset=0):
     i = 0
     l = len(data)
     hit_msb = False
-    if PY3:
-        while i < l:
-            c = data[i + offset]
-            size |= (c & 0x7f) << i * 7
-            i += 1
-            if not c & 0x80:
-                hit_msb = True
-                break
-            # END check msb bit
-        # END while in range
-    else:
-        while i < l:
-            c = ord(data[i + offset])
-            size |= (c & 0x7f) << i * 7
-            i += 1
-            if not c & 0x80:
-                hit_msb = True
-                break
-            # END check msb bit
-        # END while in range
+    while i < l:
+        c = data[i + offset]
+        size |= (c & 0x7f) << i * 7
+        i += 1
+        if not c & 0x80:
+            hit_msb = True
+            break
+        # END check msb bit
+    # END while in range
     # end performance ...
     if not hit_msb:
         raise AssertionError("Could not find terminating MSB byte in data stream")
@@ -663,93 +631,48 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
     **Note:** transcribed to python from the similar routine in patch-delta.c"""
     i = 0
     db = delta_buf
-    if PY3:
-        while i < delta_buf_size:
-            c = db[i]
-            i += 1
-            if c & 0x80:
-                cp_off, cp_size = 0, 0
-                if (c & 0x01):
-                    cp_off = db[i]
-                    i += 1
-                if (c & 0x02):
-                    cp_off |= (db[i] << 8)
-                    i += 1
-                if (c & 0x04):
-                    cp_off |= (db[i] << 16)
-                    i += 1
-                if (c & 0x08):
-                    cp_off |= (db[i] << 24)
-                    i += 1
-                if (c & 0x10):
-                    cp_size = db[i]
-                    i += 1
-                if (c & 0x20):
-                    cp_size |= (db[i] << 8)
-                    i += 1
-                if (c & 0x40):
-                    cp_size |= (db[i] << 16)
-                    i += 1
-
-                if not cp_size:
-                    cp_size = 0x10000
-
-                rbound = cp_off + cp_size
-                if (rbound < cp_size or
-                        rbound > src_buf_size):
-                    break
-                write(buffer(src_buf, cp_off, cp_size))
-            elif c:
-                write(db[i:i + c])
-                i += c
-            else:
-                raise ValueError("unexpected delta opcode 0")
-            # END handle command byte
-        # END while processing delta data
-    else:
-        while i < delta_buf_size:
-            c = ord(db[i])
-            i += 1
-            if c & 0x80:
-                cp_off, cp_size = 0, 0
-                if (c & 0x01):
-                    cp_off = ord(db[i])
-                    i += 1
-                if (c & 0x02):
-                    cp_off |= (ord(db[i]) << 8)
-                    i += 1
-                if (c & 0x04):
-                    cp_off |= (ord(db[i]) << 16)
-                    i += 1
-                if (c & 0x08):
-                    cp_off |= (ord(db[i]) << 24)
-                    i += 1
-                if (c & 0x10):
-                    cp_size = ord(db[i])
-                    i += 1
-                if (c & 0x20):
-                    cp_size |= (ord(db[i]) << 8)
-                    i += 1
-                if (c & 0x40):
-                    cp_size |= (ord(db[i]) << 16)
-                    i += 1
-
-                if not cp_size:
-                    cp_size = 0x10000
-
-                rbound = cp_off + cp_size
-                if (rbound < cp_size or
-                        rbound > src_buf_size):
-                    break
-                write(buffer(src_buf, cp_off, cp_size))
-            elif c:
-                write(db[i:i + c])
-                i += c
-            else:
-                raise ValueError("unexpected delta opcode 0")
-            # END handle command byte
-        # END while processing delta data
-    # end save byte_ord call and prevent performance regression in py2
+    while i < delta_buf_size:
+        c = db[i]
+        i += 1
+        if c & 0x80:
+            cp_off, cp_size = 0, 0
+            if (c & 0x01):
+                cp_off = db[i]
+                i += 1
+            if (c & 0x02):
+                cp_off |= (db[i] << 8)
+                i += 1
+            if (c & 0x04):
+                cp_off |= (db[i] << 16)
+                i += 1
+            if (c & 0x08):
+                cp_off |= (db[i] << 24)
+                i += 1
+            if (c & 0x10):
+                cp_size = db[i]
+                i += 1
+            if (c & 0x20):
+                cp_size |= (db[i] << 8)
+                i += 1
+            if (c & 0x40):
+                cp_size |= (db[i] << 16)
+                i += 1
+
+            if not cp_size:
+                cp_size = 0x10000
+
+            rbound = cp_off + cp_size
+            if (rbound < cp_size or
+                    rbound > src_buf_size):
+                break
+            write(src_buf[cp_off:cp_off + cp_size])
+        elif c:
+            write(db[i:i + c])
+            i += c
+        else:
+            raise ValueError("unexpected delta opcode 0")
+        # END handle command byte
+    # END while processing delta data
 
     # yes, lets use the exact same error message that git uses :)
     assert i == delta_buf_size, "delta replay has gone wild"

diff --git a/gitdb/pack.py b/gitdb/pack.py
@@ -62,12 +62,6 @@
 from binascii import crc32
 
 from gitdb.const import NULL_BYTE
-from gitdb.utils.compat import (
-    izip, 
-    buffer, 
-    xrange,
-    to_bytes
-)
 
 import tempfile
 import array
@@ -119,7 +113,7 @@ def pack_object_at(cursor, offset, as_stream):
     # END handle type id
     abs_data_offset = offset + total_rela_offset
     if as_stream:
-        stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
+        stream = DecompressMemMapReader(data[total_rela_offset:], False, uncomp_size)
         if delta_info is None:
             return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
         else:
@@ -207,7 +201,7 @@ def write(self, pack_sha, write):
         for t in self._objs:
             tmplist[byte_ord(t[0][0])] += 1
         # END prepare fanout
-        for i in xrange(255):
+        for i in range(255):
             v = tmplist[i]
             sha_write(pack('>L', v))
             tmplist[i + 1] += v
@@ -376,7 +370,7 @@ def _read_fanout(self, byte_offset):
         d = self._cursor.map()
         out = list()
         append = out.append
-        for i in xrange(256):
+        for i in range(256):
             append(unpack_from('>L', d, byte_offset + i * 4)[0])
         # END for each entry
         return out
@@ -410,14 +404,14 @@ def offsets(self):
         if self._version == 2:
             # read stream to array, convert to tuple
             a = array.array('I')    # 4 byte unsigned int, long are 8 byte on 64 bit it appears
-            a.frombytes(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))
+            a.frombytes(self._cursor.map()[self._pack_offset:self._pack_64_offset])
 
             # networkbyteorder to something array likes more
             if sys.byteorder == 'little':
                 a.byteswap()
             return a
         else:
-            return tuple(self.offset(index) for index in xrange(self.size()))
+            return tuple(self.offset(index) for index in range(self.size()))
         # END handle version
 
     def sha_to_index(self, sha):
@@ -696,7 +690,7 @@ def _set_cache_(self, attr):
             iter_offsets = iter(offsets_sorted)
             iter_offsets_plus_one = iter(offsets_sorted)
             next(iter_offsets_plus_one)
-            consecutive = izip(iter_offsets, iter_offsets_plus_one)
+            consecutive = zip(iter_offsets, iter_offsets_plus_one)
 
             offset_map = dict(consecutive)
 
@@ -716,7 +710,7 @@ def _iter_objects(self, as_stream):
         """Iterate over all objects in our index and yield their OInfo or OStream instences"""
         _sha = self._index.sha
         _object = self._object
-        for index in xrange(self._index.size()):
+        for index in range(self._index.size()):
             yield _object(_sha(index), as_stream, index)
         # END for each index
 
@@ -838,7 +832,7 @@ def is_valid_stream(self, sha, use_crc=False):
             while cur_pos < next_offset:
                 rbound = min(cur_pos + chunk_size, next_offset)
                 size = rbound - cur_pos
-                this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value)
+                this_crc_value = crc_update(pack_data[cur_pos:cur_pos + size], this_crc_value)
                 cur_pos += size
             # END window size loop
 
@@ -882,7 +876,11 @@ def collect_streams_at_offset(self, offset):
             stream = streams[-1]
             while stream.type_id in delta_types:
                 if stream.type_id == REF_DELTA:
-                    sindex = self._index.sha_to_index(to_bytes(stream.delta_info))
+                    # smmap can return memory view objects, which can't be compared as buffers/bytes can ...
+                    if isinstance(stream.delta_info, memoryview):
+                        sindex = self._index.sha_to_index(stream.delta_info.tobytes())
+                    else:
+                        sindex = self._index.sha_to_index(stream.delta_info)
                     if sindex is None:
                         break
                     stream = self._pack.stream(self._index.offset(sindex))

diff --git a/gitdb/stream.py b/gitdb/stream.py
@@ -27,7 +27,6 @@
 )
 
 from gitdb.const import NULL_BYTE, BYTE_SPACE
-from gitdb.utils.compat import buffer
 from gitdb.utils.encoding import force_bytes
 
 has_perf_mod = False
@@ -278,7 +277,7 @@ def read(self, size=-1):
         # END adjust winsize
 
         # takes a slice, but doesn't copy the data, it says ...
-        indata = buffer(self._m, self._cws, self._cwe - self._cws)
+        indata = self._m[self._cws:self._cwe]
 
         # get the actual window end to be sure we don't use it for computations
         self._cwe = self._cws + len(indata)
@@ -414,7 +413,7 @@ def _set_cache_brute_(self, attr):
             buf = dstream.read(512)         # read the header information + X
             offset, src_size = msb_size(buf)
             offset, target_size = msb_size(buf, offset)
-            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
+            buffer_info_list.append((buf[offset:], offset, src_size, target_size))
             max_target_size = max(max_target_size, target_size)
         # END for each delta stream
 

diff --git a/gitdb/test/db/lib.py b/gitdb/test/db/lib.py
@@ -23,7 +23,6 @@
 
 from gitdb.exc import BadObject
 from gitdb.typ import str_blob_type
-from gitdb.utils.compat import xrange
 
 from io import BytesIO
 
@@ -45,7 +44,7 @@ def _assert_object_writing_simple(self, db):
         # write a bunch of objects and query their streams and info
         null_objs = db.size()
         ni = 250
-        for i in xrange(ni):
+        for i in range(ni):
             data = pack(">L", i)
             istream = IStream(str_blob_type, len(data), BytesIO(data))
             new_istream = db.store(istream)

diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py
@@ -4,7 +4,6 @@
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Utilities used in ODB testing"""
 from gitdb import OStream
-from gitdb.utils.compat import xrange
 
 import sys
 import random
@@ -151,7 +150,7 @@ def make_bytes(size_in_bytes, randomize=False):
     """:return: string with given size in bytes
     :param randomize: try to produce a very random stream"""
     actual_size = size_in_bytes // 4
-    producer = xrange(actual_size)
+    producer = range(actual_size)
     if randomize:
         producer = list(producer)
         random.shuffle(producer)

diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py
@@ -17,7 +17,6 @@
 from gitdb.typ import str_blob_type
 from gitdb.exc import UnsupportedOperation
 from gitdb.db.pack import PackedDB
-from gitdb.utils.compat import xrange
 from gitdb.test.lib import skip_on_travis_ci
 
 import sys
@@ -118,7 +117,7 @@ def test_correctness(self):
             for entity in pdb.entities():
                 pack_verify = entity.is_valid_stream
                 sha_by_index = entity.index().sha
-                for index in xrange(entity.index().size()):
+                for index in range(entity.index().size()):
                     try:
                         assert pack_verify(sha_by_index(index), use_crc=crc)
                         count += 1

diff --git a/gitdb/test/test_pack.py b/gitdb/test/test_pack.py
@@ -25,12 +25,6 @@
 from gitdb.fun import delta_types
 from gitdb.exc import UnsupportedOperation
 from gitdb.util import to_bin_sha
-from gitdb.utils.compat import xrange
-
-try:
-    from itertools import izip
-except ImportError:
-    izip = zip
 
 from nose import SkipTest
 
@@ -63,7 +57,7 @@ def _assert_index_file(self, index, version, size):
         assert len(index.offsets()) == size
 
         # get all data of all objects
-        for oidx in xrange(index.size()):
+        for oidx in range(index.size()):
             sha = index.sha(oidx)
             assert oidx == index.sha_to_index(sha)
 
@@ -155,7 +149,7 @@ def test_pack_entity(self, rw_dir):
             pack_objs.extend(entity.stream_iter())
 
             count = 0
-            for info, stream in izip(entity.info_iter(), entity.stream_iter()):
+            for info, stream in zip(entity.info_iter(), entity.stream_iter()):
                 count += 1
                 assert info.binsha == stream.binsha
                 assert len(info.binsha) == 20

diff --git a/gitdb/utils/compat.py b/gitdb/utils/compat.py
diff --git a/gitdb/utils/encoding.py b/gitdb/utils/encoding.py
@@ -1,31 +1,18 @@
-from gitdb.utils import compat
-
-if compat.PY3:
-    string_types = (str, )
-    text_type = str
-else:
-    string_types = (basestring, )
-    text_type = unicode
-
-
 def force_bytes(data, encoding="ascii"):
     if isinstance(data, bytes):
         return data
 
-    if isinstance(data, string_types):
+    if isinstance(data, str):
         return data.encode(encoding)
 
     return data
 
 
 def force_text(data, encoding="utf-8"):
-    if isinstance(data, text_type):
+    if isinstance(data, str):
         return data
 
     if isinstance(data, bytes):
         return data.decode(encoding)
 
-    if compat.PY3:
-        return text_type(data, encoding)
-    else:
-        return text_type(data)
+    return str(data, encoding)