diff --git a/tests/functional/test_templates.py b/tests/functional/test_templates.py index cfc9788cb5b3..0537410a38b7 100644 --- a/tests/functional/test_templates.py +++ b/tests/functional/test_templates.py @@ -40,7 +40,7 @@ def test_templates_for_empty_titles(): "format_classifiers": "warehouse.filters:format_classifiers", "format_tags": "warehouse.filters:format_tags", "json": "warehouse.filters:tojson", - "readme": "warehouse.filters:readme", + "camoify": "warehouse.filters:camoify", "shorten_number": "warehouse.filters:shorten_number", "urlparse": "warehouse.filters:urlparse", "contains_valid_uris": "warehouse.filters:contains_valid_uris", diff --git a/tests/unit/packaging/test_models.py b/tests/unit/packaging/test_models.py index f8fe94e10246..5a1ebbc1bf03 100644 --- a/tests/unit/packaging/test_models.py +++ b/tests/unit/packaging/test_models.py @@ -17,6 +17,7 @@ from pyramid.security import Allow +from warehouse.utils import readme from warehouse.packaging.models import ( ProjectFactory, Dependency, DependencyKind, File, ) @@ -333,6 +334,41 @@ def test_github_repo_info_url(self, db_session, home_page, expected): ) assert release.github_repo_info_url == expected + def test_html_description_none(self, db_session): + release = DBReleaseFactory.create( + description=None) + + assert release.html_description is None + + def test_html_description_not_cached(self, db_session): + release = DBReleaseFactory.create( + description='text') + + assert release.html_description == '
text
\n' + assert release.rendered_description == 'text
\n' + assert release.renderer_version is not None + assert release in db_session.dirty + + def test_html_description_stale_cache(self, db_session): + release = DBReleaseFactory.create( + description='text', + rendered_description='rendered text', + renderer_version='invalid version') + + assert release.html_description == 'text
\n' + assert release.rendered_description == 'text
\n' + assert release.renderer_version != 'invalid version' + assert release in db_session.dirty + + def test_html_description_valid_cache(self, db_session): + release = DBReleaseFactory.create( + description='text', + rendered_description='rendered text', + renderer_version=readme.renderer_version()) + + assert release.html_description == 'rendered text' + assert release not in db_session.dirty + class TestFile: diff --git a/tests/unit/packaging/test_views.py b/tests/unit/packaging/test_views.py index 1f028738785a..a6f1e46fde8a 100644 --- a/tests/unit/packaging/test_views.py +++ b/tests/unit/packaging/test_views.py @@ -180,6 +180,7 @@ def test_detail_renders(self, db_request): "project": project, "release": releases[1], "files": [files[1]], + "description": releases[1].html_description, "latest_version": project.latest_version, "all_versions": [ (r.version, r.created, r.is_prerelease) diff --git a/tests/unit/test_filters.py b/tests/unit/test_filters.py index 865fb3523632..3111daaf7df3 100644 --- a/tests/unit/test_filters.py +++ b/tests/unit/test_filters.py @@ -14,11 +14,9 @@ from functools import partial -import jinja2 import packaging.version import pretend import pytest -import readme_renderer.rst from warehouse import filters @@ -42,134 +40,9 @@ def test_camo_url(): ) -class TestReadmeRender: - - def test_can_render_rst(self, monkeypatch): - renderer = pretend.call_recorder(lambda raw: "rendered") - monkeypatch.setattr(readme_renderer.rst, "render", renderer) - - request = pretend.stub( - registry=pretend.stub( - settings={ - "camo.url": "https://camo.example.net/", - "camo.key": "fake key", - }, - ), - ) - camo_url = partial(filters._camo_url, request) - request.camo_url = camo_url - - ctx = {"request": request} - - result = filters.readme( - ctx, "raw thing", description_content_type="text/x-rst", - ) - - assert result == jinja2.Markup("rendered") - assert renderer.calls == [pretend.call('raw thing')] - - def test_cant_render_rst(self, monkeypatch): - rst_renderer = pretend.call_recorder(lambda raw: None) - txt_renderer = pretend.call_recorder(lambda raw: "renderedraw thing
\n" + + +def test_cant_render_rst(): + result = readme.render("raw `raw thing
\n" + + +def test_renderer_version(): + assert readme.renderer_version() is not None diff --git a/warehouse/config.py b/warehouse/config.py index 91a56e772cdb..42b6571d0495 100644 --- a/warehouse/config.py +++ b/warehouse/config.py @@ -327,7 +327,7 @@ def configure(settings=None): ) filters.setdefault("format_tags", "warehouse.filters:format_tags") filters.setdefault("json", "warehouse.filters:tojson") - filters.setdefault("readme", "warehouse.filters:readme") + filters.setdefault("camoify", "warehouse.filters:camoify") filters.setdefault("shorten_number", "warehouse.filters:shorten_number") filters.setdefault("urlparse", "warehouse.filters:urlparse") filters.setdefault( diff --git a/warehouse/filters.py b/warehouse/filters.py index d8d72ef81c3a..36fa995b8bcd 100644 --- a/warehouse/filters.py +++ b/warehouse/filters.py @@ -11,7 +11,6 @@ # limitations under the License. import binascii -import cgi import collections import enum import hmac @@ -25,21 +24,11 @@ import jinja2 import packaging.version -import readme_renderer.markdown -import readme_renderer.rst -import readme_renderer.txt from pyramid.threadlocal import get_current_request from warehouse.utils.http import is_valid_uri -_renderers = { - '': readme_renderer.rst, # Default if description_content_type is None - 'text/plain': readme_renderer.txt, - 'text/x-rst': readme_renderer.rst, - 'text/markdown': readme_renderer.markdown, -} - class PackageType(enum.Enum): bdist_dmg = "OSX Disk Image" @@ -73,27 +62,14 @@ def _camo_url(request, url): @jinja2.contextfilter -def readme(ctx, value, *, description_content_type): +def camoify(ctx, value): request = ctx.get("request") or get_current_request() - content_type, parameters = cgi.parse_header(description_content_type or '') - - # Get the appropriate renderer - renderer = _renderers[content_type] - - # Actually render the given value, this will not only render the value, but - # also ensure that it's had any disallowed markup removed. - rendered = renderer.render(value, **parameters) - - # If the content was not rendered, we'll render as plaintext instead - if rendered is None: - rendered = readme_renderer.txt.render(value) - # Parse the rendered output and replace any inline images that don't point # to HTTPS with camouflaged images. tree_builder = html5lib.treebuilders.getTreeBuilder("dom") parser = html5lib.html5parser.HTMLParser(tree=tree_builder) - dom = parser.parse(rendered) + dom = parser.parse(value) for element in dom.getElementsByTagName("img"): src = element.getAttribute("src") @@ -102,9 +78,9 @@ def readme(ctx, value, *, description_content_type): tree_walker = html5lib.treewalkers.getTreeWalker("dom") html_serializer = html5lib.serializer.HTMLSerializer() - rendered = "".join(html_serializer.serialize(tree_walker(dom))) + camoed = "".join(html_serializer.serialize(tree_walker(dom))) - return jinja2.Markup(rendered) + return camoed _SI_SYMBOLS = ["k", "M", "G", "T", "P", "E", "Z", "Y"] diff --git a/warehouse/migrations/versions/628ea7069e3e_project_description_cache_fields.py b/warehouse/migrations/versions/628ea7069e3e_project_description_cache_fields.py new file mode 100644 index 000000000000..c721c12df7e8 --- /dev/null +++ b/warehouse/migrations/versions/628ea7069e3e_project_description_cache_fields.py @@ -0,0 +1,52 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +project description cache fields + +Revision ID: 628ea7069e3e +Revises: 6714f3f04f0f +Create Date: 2018-04-18 05:55:01.222046 +""" + +from alembic import op +import sqlalchemy as sa + + +revision = '628ea7069e3e' +down_revision = '6714f3f04f0f' + +# Note: It is VERY important to ensure that a migration does not lock for a +# long period of time and to ensure that each individual migration does +# not break compatibility with the *previous* version of the code base. +# This is because the migrations will be ran automatically as part of the +# deployment process, but while the previous version of the code is still +# up and running. Thus backwards incompatible changes must be broken up +# over multiple migrations inside of multiple pull requests in order to +# phase them in over multiple deploys. + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + 'releases', + sa.Column('rendered_description', sa.Text(), nullable=True)) + op.add_column( + 'releases', + sa.Column('renderer_version', sa.Text(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('releases', 'renderer_version') + op.drop_column('releases', 'rendered_description') + # ### end Alembic commands ### diff --git a/warehouse/packaging/models.py b/warehouse/packaging/models.py index 765d109fed76..d4886133e5b6 100644 --- a/warehouse/packaging/models.py +++ b/warehouse/packaging/models.py @@ -33,6 +33,7 @@ from sqlalchemy.ext.hybrid import hybrid_property from warehouse import db +from warehouse.utils import readme from warehouse.accounts.models import User from warehouse.classifiers.models import Classifier from warehouse.sitemap.models import SitemapMixin @@ -328,14 +329,18 @@ def __table_args__(cls): # noqa server_default=sql.func.now(), ) - # We defer this column because it is a very large column (it can be MB in - # size) and we very rarely actually want to access it. Typically we only - # need it when rendering the page for a single project, but many of our - # queries only need to access a few of the attributes of a Release. Instead - # of playing whack-a-mole and using load_only() or defer() on each of - # those queries, deferring this here makes the default case more + # We defer these column because they are potentially very large values (it + # can be MB in size) and we very rarely actually want to access them. + # Typically we only need it when rendering the page for a single project, + # but many of our queries only need to access a few of the attributes of a + # Release. Instead of playing whack-a-mole and using load_only() or defer() + # on each of those queries, deferring this here makes the default case more # performant. - description = orm.deferred(Column(Text)) + description = orm.deferred(Column(Text), group='description') + rendered_description = orm.deferred(Column(Text), group='description') + # This contains the version of the readme-renderer used to render + # the readme, for example, "0.19". + renderer_version = orm.deferred(Column(Text), group='description') _classifiers = orm.relationship( Classifier, @@ -420,6 +425,33 @@ def __acl__(self): acls.append((Allow, str(role.user.id), ["upload"])) return acls + @property + def html_description(self): + """Returns the fully-rendered HTML description. + + If the value is cached, it will return that. Otherwise it will render + and store the HTML. + """ + session = orm.object_session(self) + current_renderer_version = readme.renderer_version() + + if not self.description: + return None + + if (self.rendered_description is not None and + self.renderer_version == current_renderer_version): + return self.rendered_description + + rendered = readme.render( + self.description, self.description_content_type) + + # Save the rendered html. + self.rendered_description = rendered + self.renderer_version = current_renderer_version + session.add(self) + + return rendered + @property def urls(self): _urls = OrderedDict() diff --git a/warehouse/packaging/views.py b/warehouse/packaging/views.py index 5ff1bd2ebc41..8fab45e70445 100644 --- a/warehouse/packaging/views.py +++ b/warehouse/packaging/views.py @@ -89,6 +89,9 @@ def release_detail(release, request): request.current_route_path(name=project.name), ) + # Get the release description, rendering it if needed. + description = release.html_description + # Get all of the maintainers for this project. maintainers = [ r.user @@ -121,6 +124,7 @@ def release_detail(release, request): return { "project": project, "release": release, + "description": description, "files": release.files.all(), "latest_version": project.latest_version, "all_versions": project.all_versions, diff --git a/warehouse/templates/packaging/detail.html b/warehouse/templates/packaging/detail.html index bbb45808ae6d..be771760416b 100644 --- a/warehouse/templates/packaging/detail.html +++ b/warehouse/templates/packaging/detail.html @@ -261,9 +261,9 @@