Skip to content

Remove Python 2 compatibility shims #56

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 17, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gitdb/db/loose.py
Original file line number Diff line number Diff line change
@@ -50,11 +50,11 @@
stream_copy
)

from gitdb.utils.compat import MAXSIZE
from gitdb.utils.encoding import force_bytes

import tempfile
import os
import sys


__all__ = ('LooseObjectDB', )
@@ -196,7 +196,7 @@ def store(self, istream):
if istream.binsha is not None:
# copy as much as possible, the actual uncompressed item size might
# be smaller than the compressed version
stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
else:
# write object with header, we have to make a new one
write_object(istream.type, istream.size, istream.read, writer.write,
3 changes: 1 addition & 2 deletions gitdb/db/pack.py
Original file line number Diff line number Diff line change
@@ -18,7 +18,6 @@
)

from gitdb.pack import PackEntity
from gitdb.utils.compat import xrange

from functools import reduce

@@ -107,7 +106,7 @@ def sha_iter(self):
for entity in self.entities():
index = entity.index()
sha_by_index = index.sha
for index in xrange(index.size()):
for index in range(index.size()):
yield sha_by_index(index)
# END for each index
# END for each entity
217 changes: 70 additions & 147 deletions gitdb/fun.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,6 @@

from gitdb.const import NULL_BYTE, BYTE_SPACE
from gitdb.utils.encoding import force_text
from gitdb.utils.compat import izip, buffer, xrange, PY3
from gitdb.typ import (
str_blob_type,
str_commit_type,
@@ -101,7 +100,7 @@ def delta_chunk_apply(dc, bbuf, write):
:param write: write method to call with data to write"""
if dc.data is None:
# COPY DATA FROM SOURCE
write(buffer(bbuf, dc.so, dc.ts))
write(bbuf[dc.so:dc.so + dc.ts])
else:
# APPEND DATA
# whats faster: if + 4 function calls or just a write with a slice ?
@@ -264,7 +263,7 @@ def compress(self):
# if first_data_index is not None:
nd = StringIO() # new data
so = self[first_data_index].to # start offset in target buffer
for x in xrange(first_data_index, i - 1):
for x in range(first_data_index, i - 1):
xdc = self[x]
nd.write(xdc.data[:xdc.ts])
# END collect data
@@ -314,7 +313,7 @@ def check_integrity(self, target_size=-1):
right.next()
# this is very pythonic - we might have just use index based access here,
# but this could actually be faster
for lft, rgt in izip(left, right):
for lft, rgt in zip(left, right):
assert lft.rbound() == rgt.to
assert lft.to + lft.ts == rgt.to
# END for each pair
@@ -424,20 +423,12 @@ def pack_object_header_info(data):
type_id = (c >> 4) & 7 # numeric type
size = c & 15 # starting size
s = 4 # starting bit-shift size
if PY3:
while c & 0x80:
c = byte_ord(data[i])
i += 1
size += (c & 0x7f) << s
s += 7
# END character loop
else:
while c & 0x80:
c = ord(data[i])
i += 1
size += (c & 0x7f) << s
s += 7
# END character loop
while c & 0x80:
c = byte_ord(data[i])
i += 1
size += (c & 0x7f) << s
s += 7
# END character loop
# end performance at expense of maintenance ...
return (type_id, size, i)

@@ -450,28 +441,16 @@ def create_pack_object_header(obj_type, obj_size):
:param obj_type: pack type_id of the object
:param obj_size: uncompressed size in bytes of the following object stream"""
c = 0 # 1 byte
if PY3:
hdr = bytearray() # output string

c = (obj_type << 4) | (obj_size & 0xf)
obj_size >>= 4
while obj_size:
hdr.append(c | 0x80)
c = obj_size & 0x7f
obj_size >>= 7
# END until size is consumed
hdr.append(c)
else:
hdr = bytes() # output string

c = (obj_type << 4) | (obj_size & 0xf)
obj_size >>= 4
while obj_size:
hdr += chr(c | 0x80)
c = obj_size & 0x7f
obj_size >>= 7
# END until size is consumed
hdr += chr(c)
hdr = bytearray() # output string

c = (obj_type << 4) | (obj_size & 0xf)
obj_size >>= 4
while obj_size:
hdr.append(c | 0x80)
c = obj_size & 0x7f
obj_size >>= 7
# END until size is consumed
hdr.append(c)
# end handle interpreter
return hdr

@@ -484,26 +463,15 @@ def msb_size(data, offset=0):
i = 0
l = len(data)
hit_msb = False
if PY3:
while i < l:
c = data[i + offset]
size |= (c & 0x7f) << i * 7
i += 1
if not c & 0x80:
hit_msb = True
break
# END check msb bit
# END while in range
else:
while i < l:
c = ord(data[i + offset])
size |= (c & 0x7f) << i * 7
i += 1
if not c & 0x80:
hit_msb = True
break
# END check msb bit
# END while in range
while i < l:
c = data[i + offset]
size |= (c & 0x7f) << i * 7
i += 1
if not c & 0x80:
hit_msb = True
break
# END check msb bit
# END while in range
# end performance ...
if not hit_msb:
raise AssertionError("Could not find terminating MSB byte in data stream")
@@ -663,93 +631,48 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
**Note:** transcribed to python from the similar routine in patch-delta.c"""
i = 0
db = delta_buf
if PY3:
while i < delta_buf_size:
c = db[i]
i += 1
if c & 0x80:
cp_off, cp_size = 0, 0
if (c & 0x01):
cp_off = db[i]
i += 1
if (c & 0x02):
cp_off |= (db[i] << 8)
i += 1
if (c & 0x04):
cp_off |= (db[i] << 16)
i += 1
if (c & 0x08):
cp_off |= (db[i] << 24)
i += 1
if (c & 0x10):
cp_size = db[i]
i += 1
if (c & 0x20):
cp_size |= (db[i] << 8)
i += 1
if (c & 0x40):
cp_size |= (db[i] << 16)
i += 1

if not cp_size:
cp_size = 0x10000

rbound = cp_off + cp_size
if (rbound < cp_size or
rbound > src_buf_size):
break
write(buffer(src_buf, cp_off, cp_size))
elif c:
write(db[i:i + c])
i += c
else:
raise ValueError("unexpected delta opcode 0")
# END handle command byte
# END while processing delta data
else:
while i < delta_buf_size:
c = ord(db[i])
i += 1
if c & 0x80:
cp_off, cp_size = 0, 0
if (c & 0x01):
cp_off = ord(db[i])
i += 1
if (c & 0x02):
cp_off |= (ord(db[i]) << 8)
i += 1
if (c & 0x04):
cp_off |= (ord(db[i]) << 16)
i += 1
if (c & 0x08):
cp_off |= (ord(db[i]) << 24)
i += 1
if (c & 0x10):
cp_size = ord(db[i])
i += 1
if (c & 0x20):
cp_size |= (ord(db[i]) << 8)
i += 1
if (c & 0x40):
cp_size |= (ord(db[i]) << 16)
i += 1

if not cp_size:
cp_size = 0x10000

rbound = cp_off + cp_size
if (rbound < cp_size or
rbound > src_buf_size):
break
write(buffer(src_buf, cp_off, cp_size))
elif c:
write(db[i:i + c])
i += c
else:
raise ValueError("unexpected delta opcode 0")
# END handle command byte
# END while processing delta data
# end save byte_ord call and prevent performance regression in py2
while i < delta_buf_size:
c = db[i]
i += 1
if c & 0x80:
cp_off, cp_size = 0, 0
if (c & 0x01):
cp_off = db[i]
i += 1
if (c & 0x02):
cp_off |= (db[i] << 8)
i += 1
if (c & 0x04):
cp_off |= (db[i] << 16)
i += 1
if (c & 0x08):
cp_off |= (db[i] << 24)
i += 1
if (c & 0x10):
cp_size = db[i]
i += 1
if (c & 0x20):
cp_size |= (db[i] << 8)
i += 1
if (c & 0x40):
cp_size |= (db[i] << 16)
i += 1

if not cp_size:
cp_size = 0x10000

rbound = cp_off + cp_size
if (rbound < cp_size or
rbound > src_buf_size):
break
write(src_buf[cp_off:cp_off + cp_size])
elif c:
write(db[i:i + c])
i += c
else:
raise ValueError("unexpected delta opcode 0")
# END handle command byte
# END while processing delta data

# yes, lets use the exact same error message that git uses :)
assert i == delta_buf_size, "delta replay has gone wild"
28 changes: 13 additions & 15 deletions gitdb/pack.py
Original file line number Diff line number Diff line change
@@ -62,12 +62,6 @@
from binascii import crc32

from gitdb.const import NULL_BYTE
from gitdb.utils.compat import (
izip,
buffer,
xrange,
to_bytes
)

import tempfile
import array
@@ -119,7 +113,7 @@ def pack_object_at(cursor, offset, as_stream):
# END handle type id
abs_data_offset = offset + total_rela_offset
if as_stream:
stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
stream = DecompressMemMapReader(data[total_rela_offset:], False, uncomp_size)
if delta_info is None:
return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
else:
@@ -207,7 +201,7 @@ def write(self, pack_sha, write):
for t in self._objs:
tmplist[byte_ord(t[0][0])] += 1
# END prepare fanout
for i in xrange(255):
for i in range(255):
v = tmplist[i]
sha_write(pack('>L', v))
tmplist[i + 1] += v
@@ -376,7 +370,7 @@ def _read_fanout(self, byte_offset):
d = self._cursor.map()
out = list()
append = out.append
for i in xrange(256):
for i in range(256):
append(unpack_from('>L', d, byte_offset + i * 4)[0])
# END for each entry
return out
@@ -410,14 +404,14 @@ def offsets(self):
if self._version == 2:
# read stream to array, convert to tuple
a = array.array('I') # 4 byte unsigned int, long are 8 byte on 64 bit it appears
a.frombytes(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))
a.frombytes(self._cursor.map()[self._pack_offset:self._pack_64_offset])

# networkbyteorder to something array likes more
if sys.byteorder == 'little':
a.byteswap()
return a
else:
return tuple(self.offset(index) for index in xrange(self.size()))
return tuple(self.offset(index) for index in range(self.size()))
# END handle version

def sha_to_index(self, sha):
@@ -696,7 +690,7 @@ def _set_cache_(self, attr):
iter_offsets = iter(offsets_sorted)
iter_offsets_plus_one = iter(offsets_sorted)
next(iter_offsets_plus_one)
consecutive = izip(iter_offsets, iter_offsets_plus_one)
consecutive = zip(iter_offsets, iter_offsets_plus_one)

offset_map = dict(consecutive)

@@ -716,7 +710,7 @@ def _iter_objects(self, as_stream):
"""Iterate over all objects in our index and yield their OInfo or OStream instences"""
_sha = self._index.sha
_object = self._object
for index in xrange(self._index.size()):
for index in range(self._index.size()):
yield _object(_sha(index), as_stream, index)
# END for each index

@@ -838,7 +832,7 @@ def is_valid_stream(self, sha, use_crc=False):
while cur_pos < next_offset:
rbound = min(cur_pos + chunk_size, next_offset)
size = rbound - cur_pos
this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value)
this_crc_value = crc_update(pack_data[cur_pos:cur_pos + size], this_crc_value)
cur_pos += size
# END window size loop

@@ -882,7 +876,11 @@ def collect_streams_at_offset(self, offset):
stream = streams[-1]
while stream.type_id in delta_types:
if stream.type_id == REF_DELTA:
sindex = self._index.sha_to_index(to_bytes(stream.delta_info))
# smmap can return memory view objects, which can't be compared as buffers/bytes can ...
if isinstance(stream.delta_info, memoryview):
sindex = self._index.sha_to_index(stream.delta_info.tobytes())
else:
sindex = self._index.sha_to_index(stream.delta_info)
if sindex is None:
break
stream = self._pack.stream(self._index.offset(sindex))
5 changes: 2 additions & 3 deletions gitdb/stream.py
Original file line number Diff line number Diff line change
@@ -27,7 +27,6 @@
)

from gitdb.const import NULL_BYTE, BYTE_SPACE
from gitdb.utils.compat import buffer
from gitdb.utils.encoding import force_bytes

has_perf_mod = False
@@ -278,7 +277,7 @@ def read(self, size=-1):
# END adjust winsize

# takes a slice, but doesn't copy the data, it says ...
indata = buffer(self._m, self._cws, self._cwe - self._cws)
indata = self._m[self._cws:self._cwe]

# get the actual window end to be sure we don't use it for computations
self._cwe = self._cws + len(indata)
@@ -414,7 +413,7 @@ def _set_cache_brute_(self, attr):
buf = dstream.read(512) # read the header information + X
offset, src_size = msb_size(buf)
offset, target_size = msb_size(buf, offset)
buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
buffer_info_list.append((buf[offset:], offset, src_size, target_size))
max_target_size = max(max_target_size, target_size)
# END for each delta stream

3 changes: 1 addition & 2 deletions gitdb/test/db/lib.py
Original file line number Diff line number Diff line change
@@ -23,7 +23,6 @@

from gitdb.exc import BadObject
from gitdb.typ import str_blob_type
from gitdb.utils.compat import xrange

from io import BytesIO

@@ -45,7 +44,7 @@ def _assert_object_writing_simple(self, db):
# write a bunch of objects and query their streams and info
null_objs = db.size()
ni = 250
for i in xrange(ni):
for i in range(ni):
data = pack(">L", i)
istream = IStream(str_blob_type, len(data), BytesIO(data))
new_istream = db.store(istream)
3 changes: 1 addition & 2 deletions gitdb/test/lib.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Utilities used in ODB testing"""
from gitdb import OStream
from gitdb.utils.compat import xrange

import sys
import random
@@ -151,7 +150,7 @@ def make_bytes(size_in_bytes, randomize=False):
""":return: string with given size in bytes
:param randomize: try to produce a very random stream"""
actual_size = size_in_bytes // 4
producer = xrange(actual_size)
producer = range(actual_size)
if randomize:
producer = list(producer)
random.shuffle(producer)
3 changes: 1 addition & 2 deletions gitdb/test/performance/test_pack.py
Original file line number Diff line number Diff line change
@@ -17,7 +17,6 @@
from gitdb.typ import str_blob_type
from gitdb.exc import UnsupportedOperation
from gitdb.db.pack import PackedDB
from gitdb.utils.compat import xrange
from gitdb.test.lib import skip_on_travis_ci

import sys
@@ -118,7 +117,7 @@ def test_correctness(self):
for entity in pdb.entities():
pack_verify = entity.is_valid_stream
sha_by_index = entity.index().sha
for index in xrange(entity.index().size()):
for index in range(entity.index().size()):
try:
assert pack_verify(sha_by_index(index), use_crc=crc)
count += 1
10 changes: 2 additions & 8 deletions gitdb/test/test_pack.py
Original file line number Diff line number Diff line change
@@ -25,12 +25,6 @@
from gitdb.fun import delta_types
from gitdb.exc import UnsupportedOperation
from gitdb.util import to_bin_sha
from gitdb.utils.compat import xrange

try:
from itertools import izip
except ImportError:
izip = zip

from nose import SkipTest

@@ -63,7 +57,7 @@ def _assert_index_file(self, index, version, size):
assert len(index.offsets()) == size

# get all data of all objects
for oidx in xrange(index.size()):
for oidx in range(index.size()):
sha = index.sha(oidx)
assert oidx == index.sha_to_index(sha)

@@ -155,7 +149,7 @@ def test_pack_entity(self, rw_dir):
pack_objs.extend(entity.stream_iter())

count = 0
for info, stream in izip(entity.info_iter(), entity.stream_iter()):
for info, stream in zip(entity.info_iter(), entity.stream_iter()):
count += 1
assert info.binsha == stream.binsha
assert len(info.binsha) == 20
43 changes: 0 additions & 43 deletions gitdb/utils/compat.py

This file was deleted.

19 changes: 3 additions & 16 deletions gitdb/utils/encoding.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,18 @@
from gitdb.utils import compat

if compat.PY3:
string_types = (str, )
text_type = str
else:
string_types = (basestring, )
text_type = unicode


def force_bytes(data, encoding="ascii"):
if isinstance(data, bytes):
return data

if isinstance(data, string_types):
if isinstance(data, str):
return data.encode(encoding)

return data


def force_text(data, encoding="utf-8"):
if isinstance(data, text_type):
if isinstance(data, str):
return data

if isinstance(data, bytes):
return data.decode(encoding)

if compat.PY3:
return text_type(data, encoding)
else:
return text_type(data)
return str(data, encoding)