diff --git a/tests/common/db/packaging.py b/tests/common/db/packaging.py index dab5fd972dcd..c09d4253dad3 100644 --- a/tests/common/db/packaging.py +++ b/tests/common/db/packaging.py @@ -22,12 +22,14 @@ BlacklistedProject, Dependency, DependencyKind, + Description, File, JournalEntry, Project, Release, Role, ) +from warehouse.utils import readme from .accounts import UserFactory from .base import WarehouseFactory @@ -41,6 +43,16 @@ class Meta: name = factory.fuzzy.FuzzyText(length=12) +class DescriptionFactory(WarehouseFactory): + class Meta: + model = Description + + id = factory.LazyFunction(uuid.uuid4) + raw = factory.fuzzy.FuzzyText(length=100) + html = factory.LazyAttribute(lambda o: readme.render(o.raw)) + rendered_by = factory.LazyAttribute(lambda o: readme.renderer_version()) + + class ReleaseFactory(WarehouseFactory): class Meta: model = Release @@ -54,6 +66,7 @@ class Meta: _pypi_ordering = factory.Sequence(lambda n: n) uploader = factory.SubFactory(UserFactory) + description = factory.SubFactory(DescriptionFactory) class FileFactory(WarehouseFactory): diff --git a/tests/unit/legacy/api/test_json.py b/tests/unit/legacy/api/test_json.py index fbfbb1a67aa7..b7db0d2e836b 100644 --- a/tests/unit/legacy/api/test_json.py +++ b/tests/unit/legacy/api/test_json.py @@ -21,6 +21,7 @@ from ....common.db.accounts import UserFactory from ....common.db.packaging import ( + DescriptionFactory, FileFactory, JournalEntryFactory, ProjectFactory, @@ -187,7 +188,9 @@ def test_detail_renders(self, pyramid_config, db_request, db_session): ReleaseFactory.create( project=project, version="3.0", - description_content_type=description_content_type, + description=DescriptionFactory.create( + content_type=description_content_type + ), ) ] @@ -239,7 +242,7 @@ def test_detail_renders(self, pyramid_config, db_request, db_session): "bugtrack_url": None, "classifiers": [], "description_content_type": description_content_type, - "description": None, + "description": releases[-1].description.raw, "docs_url": "/the/fake/url/", "download_url": None, "downloads": {"last_day": -1, "last_week": -1, "last_month": -1}, @@ -384,8 +387,8 @@ def test_minimal_renders(self, pyramid_config, db_request): "author_email": None, "bugtrack_url": None, "classifiers": [], - "description_content_type": None, - "description": None, + "description_content_type": release.description.content_type, + "description": release.description.raw, "docs_url": None, "download_url": None, "downloads": {"last_day": -1, "last_week": -1, "last_month": -1}, diff --git a/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py b/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py index b26760c847ba..4095353ab633 100644 --- a/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py +++ b/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py @@ -605,7 +605,7 @@ def test_release_data(db_request): "maintainer": release.maintainer, "maintainer_email": release.maintainer_email, "summary": release.summary, - "description": release.description, + "description": release.description.raw, "license": release.license, "keywords": release.keywords, "platform": release.platform, diff --git a/tests/unit/packaging/test_init.py b/tests/unit/packaging/test_init.py index 00466d5675bf..1fceb3dfea6e 100644 --- a/tests/unit/packaging/test_init.py +++ b/tests/unit/packaging/test_init.py @@ -19,7 +19,7 @@ from warehouse.accounts.models import Email, User from warehouse.packaging.interfaces import IDocsStorage, IFileStorage from warehouse.packaging.models import File, Project, Release, Role -from warehouse.packaging.tasks import compute_trending +from warehouse.packaging.tasks import compute_trending, update_description_html @pytest.mark.parametrize("with_trending", [True, False]) @@ -106,5 +106,10 @@ def key_factory(keystring, iterate_on=None): if with_trending: assert config.add_periodic_task.calls == [ - pretend.call(crontab(minute=0, hour=3), compute_trending) + pretend.call(crontab(minute="*/5"), update_description_html), + pretend.call(crontab(minute=0, hour=3), compute_trending), + ] + else: + assert config.add_periodic_task.calls == [ + pretend.call(crontab(minute="*/5"), update_description_html) ] diff --git a/tests/unit/packaging/test_tasks.py b/tests/unit/packaging/test_tasks.py index 333f5ba61b35..df2b09c71715 100644 --- a/tests/unit/packaging/test_tasks.py +++ b/tests/unit/packaging/test_tasks.py @@ -16,10 +16,11 @@ from google.cloud.bigquery import Row from warehouse.cache.origin import IOriginCache -from warehouse.packaging.models import Project -from warehouse.packaging.tasks import compute_trending +from warehouse.packaging.models import Description, Project +from warehouse.packaging.tasks import compute_trending, update_description_html +from warehouse.utils import readme -from ...common.db.packaging import ProjectFactory +from ...common.db.packaging import DescriptionFactory, ProjectFactory class TestComputeTrending: @@ -108,3 +109,28 @@ def find_service(iface=None, name=None): projects[1].name: 2, projects[2].name: -1, } + + +def test_update_description_html(monkeypatch, db_request): + current_version = "24.0" + previous_version = "23.0" + + monkeypatch.setattr(readme, "renderer_version", lambda: current_version) + + descriptions = [ + DescriptionFactory.create(html="rendered", rendered_by=current_version), + DescriptionFactory.create(html="not this one", rendered_by=previous_version), + DescriptionFactory.create(html="", rendered_by=""), # Initial migration state + ] + + update_description_html(db_request) + + assert set( + db_request.db.query( + Description.raw, Description.html, Description.rendered_by + ).all() + ) == { + (descriptions[0].raw, "rendered", current_version), + (descriptions[1].raw, readme.render(descriptions[1].raw), current_version), + (descriptions[2].raw, readme.render(descriptions[2].raw), current_version), + } diff --git a/tests/unit/packaging/test_views.py b/tests/unit/packaging/test_views.py index 091e94043635..bebf8b3ae216 100644 --- a/tests/unit/packaging/test_views.py +++ b/tests/unit/packaging/test_views.py @@ -21,6 +21,7 @@ from ...common.db.accounts import UserFactory from ...common.db.classifiers import ClassifierFactory from ...common.db.packaging import ( + DescriptionFactory, FileFactory, ProjectFactory, ReleaseFactory, @@ -145,6 +146,52 @@ def test_normalizing_version_redirects(self, db_request): pretend.call(name=release.project.name, version=release.version) ] + def test_detail_rendered(self, db_request): + users = [UserFactory.create(), UserFactory.create(), UserFactory.create()] + project = ProjectFactory.create() + releases = [ + ReleaseFactory.create( + project=project, + version=v, + description=DescriptionFactory.create( + raw="unrendered description", + html="rendered description", + content_type="text/plain", + ), + ) + for v in ["1.0", "2.0", "3.0", "4.0.dev0"] + ] + files = [ + FileFactory.create( + release=r, + filename="{}-{}.tar.gz".format(project.name, r.version), + python_version="source", + ) + for r in releases + ] + + # Create a role for each user + for user in users: + RoleFactory.create(user=user, project=project) + + # Add an extra role for one user, to ensure deduplication + RoleFactory.create(user=users[0], project=project, role_name="another role") + + result = views.release_detail(releases[1], db_request) + + assert result == { + "project": project, + "release": releases[1], + "files": [files[1]], + "description": "rendered description", + "latest_version": project.latest_version, + "all_versions": [ + (r.version, r.created, r.is_prerelease) for r in reversed(releases) + ], + "maintainers": sorted(users, key=lambda u: u.username.lower()), + "license": None, + } + def test_detail_renders(self, monkeypatch, db_request): users = [UserFactory.create(), UserFactory.create(), UserFactory.create()] project = ProjectFactory.create() @@ -152,8 +199,9 @@ def test_detail_renders(self, monkeypatch, db_request): ReleaseFactory.create( project=project, version=v, - description="unrendered description", - description_content_type="text/plain", + description=DescriptionFactory.create( + raw="unrendered description", html="", content_type="text/plain" + ), ) for v in ["1.0", "2.0", "3.0", "4.0.dev0"] ] diff --git a/tests/unit/search/test_tasks.py b/tests/unit/search/test_tasks.py index ab95baeecb78..56ae854bf91d 100644 --- a/tests/unit/search/test_tasks.py +++ b/tests/unit/search/test_tasks.py @@ -55,6 +55,9 @@ def test_project_docs(db_session): "normalized_name": p.normalized_name, "version": [r.version for r in prs], "latest_version": first(prs, key=lambda r: not r.is_prerelease).version, + "description": first( + prs, key=lambda r: not r.is_prerelease + ).description.raw, }, } for p, prs in sorted(releases.items(), key=lambda x: x[0].id) @@ -82,6 +85,9 @@ def test_single_project_doc(db_session): "normalized_name": p.normalized_name, "version": [r.version for r in prs], "latest_version": first(prs, key=lambda r: not r.is_prerelease).version, + "description": first( + prs, key=lambda r: not r.is_prerelease + ).description.raw, }, } for p, prs in sorted(releases.items(), key=lambda x: x[0].name.lower()) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index bc6aabd33388..171d8b85a05c 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -45,6 +45,7 @@ BlacklistedProject, Dependency, DependencyKind, + Description, File, Filename, JournalEntry, @@ -52,7 +53,7 @@ Release, Role, ) -from warehouse.utils import http +from warehouse.utils import http, readme MAX_FILESIZE = 60 * 1024 * 1024 # 60M MAX_SIGSIZE = 8 * 1024 # 8K @@ -928,35 +929,38 @@ def file_upload(request): if project.name != form.name.data: project.name = form.name.data - # Uploading should prevent broken rendered descriptions. - # Temporarily disabled, see - # https://github.com/pypa/warehouse/issues/4079 - # if form.description.data: - # description_content_type = form.description_content_type.data - # if not description_content_type: - # description_content_type = "text/x-rst" - # rendered = readme.render( - # form.description.data, description_content_type, use_fallback=False - # ) - - # if rendered is None: - # if form.description_content_type.data: - # message = ( - # "The description failed to render " - # "for '{description_content_type}'." - # ).format(description_content_type=description_content_type) - # else: - # message = ( - # "The description failed to render " - # "in the default format of reStructuredText." - # ) - # raise _exc_with_message( - # HTTPBadRequest, - # "{message} See {projecthelp} for more information.".format( - # message=message, - # projecthelp=request.help_url(_anchor="description-content-type"), - # ), - # ) from None + # Render our description so we can save from having to render this data every time + # we load a project description page. + rendered = None + if form.description.data: + description_content_type = form.description_content_type.data + # if not description_content_type: + # description_content_type = "text/x-rst" + rendered = readme.render( + form.description.data, description_content_type # use_fallback=False + ) + + # Uploading should prevent broken rendered descriptions. + # Temporarily disabled, see + # https://github.com/pypa/warehouse/issues/4079 + # if rendered is None: + # if form.description_content_type.data: + # message = ( + # "The description failed to render " + # "for '{description_content_type}'." + # ).format(description_content_type=description_content_type) + # else: + # message = ( + # "The description failed to render " + # "in the default format of reStructuredText." + # ) + # raise _exc_with_message( + # HTTPBadRequest, + # "{message} See {projecthelp} for more information.".format( + # message=message, + # projecthelp=request.help_url(_anchor="description-content-type"), + # ), + # ) from None try: canonical_version = packaging.utils.canonicalize_version(form.version.data) @@ -1001,6 +1005,12 @@ def file_upload(request): ) ), canonical_version=canonical_version, + description=Description( + content_type=form.description_content_type.data, + raw=form.description.data or "", + html=rendered or "", + rendered_by=readme.renderer_version(), + ), **{ k: getattr(form, k).data for k in { @@ -1008,8 +1018,6 @@ def file_upload(request): # should pull off and insert into our new release. "version", "summary", - "description", - "description_content_type", "license", "author", "author_email", diff --git a/warehouse/legacy/api/json.py b/warehouse/legacy/api/json.py index 601f04804422..6aeebd920261 100644 --- a/warehouse/legacy/api/json.py +++ b/warehouse/legacy/api/json.py @@ -157,8 +157,8 @@ def json_release(release, request): "name": project.name, "version": release.version, "summary": release.summary, - "description_content_type": release.description_content_type, - "description": release.description, + "description_content_type": release.description.content_type, + "description": release.description.raw, "keywords": release.keywords, "license": release.license, "classifiers": list(release.classifiers), diff --git a/warehouse/legacy/api/xmlrpc/views.py b/warehouse/legacy/api/xmlrpc/views.py index a5cfeeb6ea12..e1ef7044e92f 100644 --- a/warehouse/legacy/api/xmlrpc/views.py +++ b/warehouse/legacy/api/xmlrpc/views.py @@ -359,7 +359,7 @@ def release_data(request, package_name: str, version: str): try: release = ( request.db.query(Release) - .options(orm.undefer("description")) + .options(orm.joinedload(Release.description)) .join(Project) .filter( (Project.normalized_name == func.normalize_pep426_name(package_name)) @@ -390,7 +390,7 @@ def release_data(request, package_name: str, version: str): "maintainer": _clean_for_xml(release.maintainer), "maintainer_email": _clean_for_xml(release.maintainer_email), "summary": _clean_for_xml(release.summary), - "description": _clean_for_xml(release.description), + "description": _clean_for_xml(release.description.raw), "license": _clean_for_xml(release.license), "keywords": _clean_for_xml(release.keywords), "platform": release.platform, diff --git a/warehouse/migrations/versions/9ca7d5668af4_refactor_description_to_its_own_table.py b/warehouse/migrations/versions/9ca7d5668af4_refactor_description_to_its_own_table.py new file mode 100644 index 000000000000..3e628ae4632f --- /dev/null +++ b/warehouse/migrations/versions/9ca7d5668af4_refactor_description_to_its_own_table.py @@ -0,0 +1,84 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Refactor description to its own table + +Revision ID: 9ca7d5668af4 +Revises: 42f0409bb702 +Create Date: 2019-05-10 16:19:04.008388 +""" + +import sqlalchemy as sa + +from alembic import op +from sqlalchemy.dialects import postgresql + +revision = "9ca7d5668af4" +down_revision = "42f0409bb702" + + +def upgrade(): + op.create_table( + "release_descriptions", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + server_default=sa.text("gen_random_uuid()"), + nullable=False, + ), + sa.Column("content_type", sa.Text(), nullable=True), + sa.Column("raw", sa.Text(), nullable=False), + sa.Column("html", sa.Text(), nullable=False), + sa.Column("rendered_by", sa.Text(), nullable=False), + sa.Column("release_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.add_column( + "releases", + sa.Column("description_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_foreign_key( + None, + "releases", + "release_descriptions", + ["description_id"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + + # Backfill our data into the description table. + op.execute( + """ WITH inserted_descriptions AS ( + INSERT INTO release_descriptions + (content_type, raw, html, rendered_by, release_id) + SELECT + description_content_type, COALESCE(description, ''), '', '', id + FROM releases + RETURNING release_id, id AS description_id + ) + UPDATE releases + SET description_id = ids.description_id + FROM inserted_descriptions AS ids + WHERE id = release_id + """ + ) + + op.alter_column("releases", "description_id", nullable=False) + + op.drop_column("releases", "description_content_type") + op.drop_column("releases", "description") + op.drop_column("release_descriptions", "release_id") + + +def downgrade(): + raise RuntimeError(f"Cannot downgrade past revision: {revision!r}") diff --git a/warehouse/packaging/__init__.py b/warehouse/packaging/__init__.py index 360ac637c761..d7f5b9ee01a5 100644 --- a/warehouse/packaging/__init__.py +++ b/warehouse/packaging/__init__.py @@ -18,7 +18,7 @@ from warehouse.cache.origin import key_factory, receive_set from warehouse.packaging.interfaces import IDocsStorage, IFileStorage from warehouse.packaging.models import File, Project, Release, Role -from warehouse.packaging.tasks import compute_trending +from warehouse.packaging.tasks import compute_trending, update_description_html @db.listens_for(User.name, "set") @@ -89,6 +89,8 @@ def includeme(config): ], ) + config.add_periodic_task(crontab(minute="*/5"), update_description_html) + # Add a periodic task to compute trending once a day, assuming we have # been configured to be able to access BigQuery. if config.get_settings().get("warehouse.trending_table"): diff --git a/warehouse/packaging/models.py b/warehouse/packaging/models.py index e428c3a0aef4..c20d9f43737e 100644 --- a/warehouse/packaging/models.py +++ b/warehouse/packaging/models.py @@ -257,6 +257,16 @@ def _dependency_relation(kind): ) +class Description(db.Model): + + __tablename__ = "release_descriptions" + + content_type = Column(Text) + raw = Column(Text, nullable=False) + html = Column(Text, nullable=False) + rendered_by = Column(Text, nullable=False) + + class Release(db.Model): __tablename__ = "releases" @@ -287,7 +297,6 @@ def __table_args__(cls): # noqa home_page = Column(Text) license = Column(Text) summary = Column(Text) - description_content_type = Column(Text) keywords = Column(Text) platform = Column(Text) download_url = Column(Text) @@ -297,14 +306,21 @@ def __table_args__(cls): # noqa DateTime(timezone=False), nullable=False, server_default=sql.func.now() ) - # We defer this column because it is a very large column (it can be MB in - # size) and we very rarely actually want to access it. Typically we only - # need it when rendering the page for a single project, but many of our - # queries only need to access a few of the attributes of a Release. Instead - # of playing whack-a-mole and using load_only() or defer() on each of - # those queries, deferring this here makes the default case more - # performant. - description = orm.deferred(Column(Text)) + description_id = Column( + ForeignKey("release_descriptions.id", onupdate="CASCADE", ondelete="CASCADE"), + nullable=False, + ) + description = orm.relationship( + "Description", + backref=orm.backref( + "release", + cascade="all, delete-orphan", + passive_deletes=True, + passive_updates=True, + single_parent=True, + uselist=False, + ), + ) _classifiers = orm.relationship( Classifier, diff --git a/warehouse/packaging/tasks.py b/warehouse/packaging/tasks.py index b5e3f788d7a3..988030c8e3cb 100644 --- a/warehouse/packaging/tasks.py +++ b/warehouse/packaging/tasks.py @@ -12,7 +12,8 @@ from warehouse import tasks from warehouse.cache.origin import IOriginCache -from warehouse.packaging.models import Project +from warehouse.packaging.models import Description, Project +from warehouse.utils import readme @tasks.task(ignore_result=True, acks_late=True) @@ -94,3 +95,19 @@ def compute_trending(request): pass else: cacher.purge(["trending"]) + + +@tasks.task(ignore_result=True, acks_late=True) +def update_description_html(request): + renderer_version = readme.renderer_version() + + descriptions = ( + request.db.query(Description) + .filter(Description.rendered_by != renderer_version) + .yield_per(100) + .limit(5000) + ) + + for description in descriptions: + description.html = readme.render(description.raw, description.content_type) + description.rendered_by = renderer_version diff --git a/warehouse/packaging/views.py b/warehouse/packaging/views.py index c7471c0c156f..85c057f2d263 100644 --- a/warehouse/packaging/views.py +++ b/warehouse/packaging/views.py @@ -77,8 +77,16 @@ def release_detail(release, request): if project.name != request.matchdict.get("name", project.name): return HTTPMovedPermanently(request.current_route_path(name=project.name)) - # Render the release description. - description = readme.render(release.description, release.description_content_type) + # Grab the rendered description if it exists, and if it doesn't, then we will render + # it inline. + # TODO: Remove the fallback to rendering inline and only support displaying the + # already rendered content. + if release.description.html: + description = release.description.html + else: + description = readme.render( + release.description.raw, release.description.content_type + ) # Get all of the maintainers for this project. maintainers = [ diff --git a/warehouse/search/tasks.py b/warehouse/search/tasks.py index d5bdd7fa6be9..6394bf5c0d7e 100644 --- a/warehouse/search/tasks.py +++ b/warehouse/search/tasks.py @@ -24,7 +24,13 @@ from sqlalchemy.orm import aliased from warehouse import tasks -from warehouse.packaging.models import Classifier, Project, Release, release_classifiers +from warehouse.packaging.models import ( + Classifier, + Description, + Project, + Release, + release_classifiers, +) from warehouse.packaging.search import Project as ProjectDocument from warehouse.search.utils import get_index from warehouse.utils.db import windowed_query @@ -69,7 +75,7 @@ def _project_docs(db, project_name=None): release_data = ( db.query( - Release.description, + Description.raw.label("description"), Release.version.label("latest_version"), all_versions, Release.author, @@ -89,6 +95,7 @@ def _project_docs(db, project_name=None): ) .select_from(releases_list) .join(Release, Release.id == releases_list.c.id) + .join(Description) .outerjoin(Release.project) )