Skip to content

Commit f09c95f

Browse files
facutuescajavanlacerdawoodruffwdi
authored
Verify release URLs using Trusted Publisher information (#16205)
* Initial implementation Signed-off-by: Javan lacerda <[email protected]> * Refactor URL verification logic Move verification to its own function, and make the verification checks more exhaustive by parsing the URL and adding more test cases. * Verify also URLs of existing releases * Update warehouse/migrations/versions/26455e3712a2_create_verified_field_for_releaseurl.py * Update warehouse/forklift/legacy.py * Linting * Update warehouse/migrations/versions/26455e3712a2_create_verified_field_for_releaseurl.py --------- Signed-off-by: Javan lacerda <[email protected]> Co-authored-by: Javan lacerda <[email protected]> Co-authored-by: William Woodruff <[email protected]> Co-authored-by: Dustin Ingram <[email protected]>
1 parent 84e9d1b commit f09c95f

File tree

13 files changed

+390
-5
lines changed

13 files changed

+390
-5
lines changed

tests/unit/forklift/test_legacy.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
Project,
5959
ProjectMacaroonWarningAssociation,
6060
Release,
61+
ReleaseURL,
6162
Role,
6263
)
6364
from warehouse.packaging.tasks import sync_file_to_cache, update_bigquery_release_files
@@ -3831,6 +3832,155 @@ def failing_verify(_self, _verifier, _policy, _dist):
38313832
assert resp.status_code == 400
38323833
assert resp.status.startswith(expected_msg)
38333834

3835+
@pytest.mark.parametrize(
3836+
"url, expected",
3837+
[
3838+
("https://google.com", False), # Totally different
3839+
("https://github.com/foo", False), # Missing parts
3840+
("https://github.com/foo/bar/", True), # Exactly the same
3841+
("https://github.com/foo/bar/readme.md", True), # Additional parts
3842+
("https://github.com/foo/bar", True), # Missing trailing slash
3843+
],
3844+
)
3845+
def test_new_release_url_verified(
3846+
self, monkeypatch, pyramid_config, db_request, metrics, url, expected
3847+
):
3848+
project = ProjectFactory.create()
3849+
publisher = GitHubPublisherFactory.create(projects=[project])
3850+
publisher.repository_owner = "foo"
3851+
publisher.repository_name = "bar"
3852+
claims = {"sha": "somesha"}
3853+
identity = PublisherTokenContext(publisher, SignedClaims(claims))
3854+
db_request.oidc_publisher = identity.publisher
3855+
db_request.oidc_claims = identity.claims
3856+
3857+
db_request.db.add(Classifier(classifier="Environment :: Other Environment"))
3858+
db_request.db.add(Classifier(classifier="Programming Language :: Python"))
3859+
3860+
filename = "{}-{}.tar.gz".format(project.name, "1.0")
3861+
3862+
pyramid_config.testing_securitypolicy(identity=identity)
3863+
db_request.user_agent = "warehouse-tests/6.6.6"
3864+
db_request.POST = MultiDict(
3865+
{
3866+
"metadata_version": "1.2",
3867+
"name": project.name,
3868+
"version": "1.0",
3869+
"summary": "This is my summary!",
3870+
"filetype": "sdist",
3871+
"md5_digest": _TAR_GZ_PKG_MD5,
3872+
"content": pretend.stub(
3873+
filename=filename,
3874+
file=io.BytesIO(_TAR_GZ_PKG_TESTDATA),
3875+
type="application/tar",
3876+
),
3877+
}
3878+
)
3879+
db_request.POST.extend(
3880+
[
3881+
("classifiers", "Environment :: Other Environment"),
3882+
("classifiers", "Programming Language :: Python"),
3883+
("requires_dist", "foo"),
3884+
("requires_dist", "bar (>1.0)"),
3885+
("project_urls", f"Test, {url}"),
3886+
("requires_external", "Cheese (>1.0)"),
3887+
("provides", "testing"),
3888+
]
3889+
)
3890+
3891+
storage_service = pretend.stub(store=lambda path, filepath, meta: None)
3892+
db_request.find_service = lambda svc, name=None, context=None: {
3893+
IFileStorage: storage_service,
3894+
IMetricsService: metrics,
3895+
}.get(svc)
3896+
3897+
legacy.file_upload(db_request)
3898+
release_url = (
3899+
db_request.db.query(ReleaseURL).filter(Release.project == project).one()
3900+
)
3901+
assert release_url is not None
3902+
assert release_url.verified == expected
3903+
3904+
def test_new_publisher_verifies_existing_release_url(
3905+
self,
3906+
monkeypatch,
3907+
pyramid_config,
3908+
db_request,
3909+
metrics,
3910+
):
3911+
repo_name = "my_new_repo"
3912+
verified_url = "https://github.com/foo/bar"
3913+
unverified_url = f"https://github.com/foo/{repo_name}"
3914+
3915+
project = ProjectFactory.create()
3916+
release = ReleaseFactory.create(project=project, version="1.0")
3917+
# We start with an existing release, with one verified URL and one unverified
3918+
# URL. Uploading a new file with a Trusted Publisher that matches the unverified
3919+
# URL should mark it as verified.
3920+
release.project_urls = {
3921+
"verified_url": {"url": verified_url, "verified": True},
3922+
"unverified_url": {"url": unverified_url, "verified": False},
3923+
}
3924+
publisher = GitHubPublisherFactory.create(projects=[project])
3925+
publisher.repository_owner = "foo"
3926+
publisher.repository_name = repo_name
3927+
claims = {"sha": "somesha"}
3928+
identity = PublisherTokenContext(publisher, SignedClaims(claims))
3929+
db_request.oidc_publisher = identity.publisher
3930+
db_request.oidc_claims = identity.claims
3931+
3932+
db_request.db.add(Classifier(classifier="Environment :: Other Environment"))
3933+
db_request.db.add(Classifier(classifier="Programming Language :: Python"))
3934+
3935+
filename = "{}-{}.tar.gz".format(project.name, "1.0")
3936+
3937+
pyramid_config.testing_securitypolicy(identity=identity)
3938+
db_request.user_agent = "warehouse-tests/6.6.6"
3939+
db_request.POST = MultiDict(
3940+
{
3941+
"metadata_version": "1.2",
3942+
"name": project.name,
3943+
"version": "1.0",
3944+
"summary": "This is my summary!",
3945+
"filetype": "sdist",
3946+
"md5_digest": _TAR_GZ_PKG_MD5,
3947+
"content": pretend.stub(
3948+
filename=filename,
3949+
file=io.BytesIO(_TAR_GZ_PKG_TESTDATA),
3950+
type="application/tar",
3951+
),
3952+
}
3953+
)
3954+
db_request.POST.extend(
3955+
[
3956+
("classifiers", "Environment :: Other Environment"),
3957+
("classifiers", "Programming Language :: Python"),
3958+
("requires_dist", "foo"),
3959+
("requires_dist", "bar (>1.0)"),
3960+
("requires_external", "Cheese (>1.0)"),
3961+
("provides", "testing"),
3962+
]
3963+
)
3964+
db_request.POST.add("project_urls", f"verified_url, {verified_url}")
3965+
db_request.POST.add("project_urls", f"unverified_url, {unverified_url}")
3966+
3967+
storage_service = pretend.stub(store=lambda path, filepath, meta: None)
3968+
db_request.find_service = lambda svc, name=None, context=None: {
3969+
IFileStorage: storage_service,
3970+
IMetricsService: metrics,
3971+
}.get(svc)
3972+
3973+
legacy.file_upload(db_request)
3974+
3975+
# After successful upload, the Release should have now both URLs verified
3976+
release_urls = (
3977+
db_request.db.query(ReleaseURL).filter(Release.project == project).all()
3978+
)
3979+
release_urls = {r.name: r.verified for r in release_urls}
3980+
assert "verified_url" in release_urls and "unverified_url" in release_urls
3981+
assert release_urls["verified_url"]
3982+
assert release_urls["unverified_url"]
3983+
38343984
@pytest.mark.parametrize(
38353985
"version, expected_version",
38363986
[
@@ -4640,3 +4790,86 @@ def test_missing_trailing_slash_redirect(pyramid_request):
46404790
"/legacy/ (with a trailing slash)"
46414791
)
46424792
assert resp.headers["Location"] == "/legacy/"
4793+
4794+
4795+
@pytest.mark.parametrize(
4796+
("url", "publisher_url", "expected"),
4797+
[
4798+
( # GitHub trivial case
4799+
"https://github.com/owner/project",
4800+
"https://github.com/owner/project",
4801+
True,
4802+
),
4803+
( # ActiveState trivial case
4804+
"https://platform.activestate.com/owner/project",
4805+
"https://platform.activestate.com/owner/project",
4806+
True,
4807+
),
4808+
( # GitLab trivial case
4809+
"https://gitlab.com/owner/project",
4810+
"https://gitlab.com/owner/project",
4811+
True,
4812+
),
4813+
( # URL is a sub-path of the TP URL
4814+
"https://github.com/owner/project/issues",
4815+
"https://github.com/owner/project",
4816+
True,
4817+
),
4818+
( # Normalization
4819+
"https://GiThUB.com/owner/project/",
4820+
"https://github.com/owner/project",
4821+
True,
4822+
),
4823+
( # TP URL is a prefix, but not a parent of the URL
4824+
"https://github.com/owner/project22",
4825+
"https://github.com/owner/project",
4826+
False,
4827+
),
4828+
( # URL is a parent of the TP URL
4829+
"https://github.com/owner",
4830+
"https://github.com/owner/project",
4831+
False,
4832+
),
4833+
( # Scheme component does not match
4834+
"http://github.com/owner/project",
4835+
"https://github.com/owner/project",
4836+
False,
4837+
),
4838+
( # Host component does not match
4839+
"https://gitlab.com/owner/project",
4840+
"https://github.com/owner/project",
4841+
False,
4842+
),
4843+
( # Host component matches, but contains user and port info
4844+
"https://[email protected]:443/owner/project",
4845+
"https://github.com/owner/project",
4846+
False,
4847+
),
4848+
( # URL path component is empty
4849+
"https://github.com",
4850+
"https://github.com/owner/project",
4851+
False,
4852+
),
4853+
( # TP URL path component is empty
4854+
# (currently no TPs have an empty path, so even if the given URL is a
4855+
# sub-path of the TP URL, we fail the verification)
4856+
"https://github.com/owner/project",
4857+
"https://github.com",
4858+
False,
4859+
),
4860+
( # Both path components are empty
4861+
# (currently no TPs have an empty path, so even if the given URL is the
4862+
# same as the TP URL, we fail the verification)
4863+
"https://github.com",
4864+
"https://github.com",
4865+
False,
4866+
),
4867+
( # Publisher URL is None
4868+
"https://github.com/owner/project",
4869+
None,
4870+
False,
4871+
),
4872+
],
4873+
)
4874+
def test_verify_url(url, publisher_url, expected):
4875+
assert legacy._verify_url(url, publisher_url) == expected

tests/unit/oidc/models/test_activestate.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,18 @@ def test_publisher_name(self):
8484

8585
assert publisher.publisher_name == "ActiveState"
8686

87+
def test_publisher_base_url(self):
88+
org_name = "fakeorg"
89+
project_name = "fakeproject"
90+
publisher = ActiveStatePublisher(
91+
organization=org_name, activestate_project_name=project_name
92+
)
93+
94+
assert (
95+
publisher.publisher_base_url
96+
== f"https://platform.activestate.com/{org_name}/{project_name}"
97+
)
98+
8799
def test_publisher_url(self):
88100
org_name = "fakeorg"
89101
project_name = "fakeproject"

tests/unit/oidc/models/test_github.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def test_github_publisher_computed_properties(self):
9797
assert getattr(publisher, claim_name) is not None
9898

9999
assert str(publisher) == "fakeworkflow.yml"
100+
assert publisher.publisher_base_url == "https://github.com/fakeowner/fakerepo"
100101
assert publisher.publisher_url() == "https://github.com/fakeowner/fakerepo"
101102
assert (
102103
publisher.publisher_url({"sha": "somesha"})

tests/unit/oidc/models/test_gitlab.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def test_gitlab_publisher_computed_properties(self):
9595
assert getattr(publisher, claim_name) is not None
9696

9797
assert str(publisher) == "subfolder/fakeworkflow.yml"
98+
assert publisher.publisher_base_url == "https://gitlab.com/fakeowner/fakerepo"
9899
assert publisher.publisher_url() == "https://gitlab.com/fakeowner/fakerepo"
99100
assert (
100101
publisher.publisher_url({"sha": "somesha"})

tests/unit/oidc/models/test_google.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ def test_publisher_name(self):
3333

3434
assert publisher.publisher_name == "Google"
3535

36+
def test_publisher_base_url(self):
37+
publisher = google.GooglePublisher(email="[email protected]")
38+
39+
assert publisher.publisher_base_url is None
40+
3641
def test_publisher_url(self):
3742
publisher = google.GooglePublisher(email="[email protected]")
3843

0 commit comments

Comments
 (0)