diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 688fa995ef35..44b252da212f 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -1654,6 +1654,56 @@ def test_upload_fails_with_existing_filename_diff_content(self, assert resp.status_code == 400 assert resp.status == "400 File already exists." + def test_upload_fails_with_diff_filename_same_blake2(self, + pyramid_config, + db_request): + pyramid_config.testing_securitypolicy(userid=1) + + user = UserFactory.create() + project = ProjectFactory.create() + release = ReleaseFactory.create(project=project, version="1.0") + RoleFactory.create(user=user, project=project) + + filename = "{}-{}.tar.gz".format(project.name, release.version) + file_content = io.BytesIO(b"A fake file.") + + db_request.POST = MultiDict({ + "metadata_version": "1.2", + "name": project.name, + "version": release.version, + "filetype": "sdist", + "md5_digest": hashlib.md5(file_content.getvalue()).hexdigest(), + "content": pretend.stub( + filename="{}-fake.tar.gz".format(project.name), + file=file_content, + type="application/tar", + ), + }) + + db_request.db.add( + File( + release=release, + filename=filename, + md5_digest=hashlib.md5(file_content.getvalue()).hexdigest(), + sha256_digest=hashlib.sha256( + file_content.getvalue() + ).hexdigest(), + blake2_256_digest=hashlib.blake2b( + file_content.getvalue(), + digest_size=256 // 8 + ).hexdigest(), + path="source/{name[0]}/{name}/{filename}".format( + name=project.name, + filename=filename, + ), + ), + ) + + resp = legacy.file_upload(db_request) + + assert resp.status_code == 400 + assert resp.status == "400 File already exists." + def test_upload_fails_with_wrong_filename(self, pyramid_config, db_request): pyramid_config.testing_securitypolicy(userid=1) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 092a3acd2ef7..067872238249 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -614,6 +614,19 @@ def _is_valid_dist_file(filename, filetype): return True +def _blake2_matches(db_session, blake2_hash): + """ + Check to see if a file with the same blake2 hash already exists + """ + file_ = ( + db_session.query(File) + .filter(File.blake2_256_digest == blake2_hash) + .first() + ) + + return file_ is not None + + def _is_duplicate_file(db_session, filename, hashes): """ Check to see if file already exists, and if it's content matches @@ -631,9 +644,13 @@ def _is_duplicate_file(db_session, filename, hashes): ) if file_ is not None: - return (file_.sha256_digest == hashes["sha256"] and - file_.md5_digest == hashes["md5"] and - file_.blake2_256_digest == hashes["blake2_256"]) + content_matches = ( + file_.sha256_digest == hashes["sha256"] and + file_.md5_digest == hashes["md5"] and + file_.blake2_256_digest == hashes["blake2_256"] + ) + return (content_matches or + _blake2_matches(db_session, hashes["blake2_256"])) return None