From 66f5b2006513dc0eaab86d48486d052e90656aa0 Mon Sep 17 00:00:00 2001 From: Mike Fiedler Date: Mon, 31 Jul 2023 12:22:09 -0400 Subject: [PATCH 1/3] fix: remove copy&pasta encoding from helper Currently broken, as Sphinx looks to these variables to set the encoding. Removing them works fine. Signed-off-by: Mike Fiedler --- bin/dev-docs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/bin/dev-docs b/bin/dev-docs index 039acf003cb0..2926aa513abb 100755 --- a/bin/dev-docs +++ b/bin/dev-docs @@ -1,14 +1,5 @@ #!/bin/bash -set -e - -# Click requires us to ensure we have a well configured environment to run -# our click commands. So we'll set our environment to ensure our locale is -# correct. -export LC_ALL="${ENCODING:-en_US.UTF-8}" -export LANG="${ENCODING:-en_US.UTF-8}" - -# Print all the following commands -set -x +set -ex make -C docs/dev/ doctest SPHINXOPTS="-W" make -C docs/dev/ html SPHINXOPTS="-W" From 22eafa42b66c86db02d5bf64a6c4cb2fa3b9d6ca Mon Sep 17 00:00:00 2001 From: Mike Fiedler Date: Tue, 17 Jan 2023 16:55:18 -0500 Subject: [PATCH 2/3] docs: add system architecture diagrams Not 100% comprehensive, but a good starting point. Signed-off-by: Mike Fiedler --- docs/dev/architecture.md | 167 ++++++++++++++++++++++++++++++++++++++ docs/dev/conf.py | 1 + docs/dev/index.rst | 1 + requirements/docs-dev.in | 2 + requirements/docs-dev.txt | 36 +++++++- 5 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 docs/dev/architecture.md diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md new file mode 100644 index 000000000000..e6fea1be5ad6 --- /dev/null +++ b/docs/dev/architecture.md @@ -0,0 +1,167 @@ +# System Architecture + +Using the [C4 Model](https://c4model.com/) approach in [Mermaid syntax](https://mermaid.js.org/syntax/c4c.html) + +## System Context Diagram + +Represent items that **users** are most likely to interact with. +Some specifics have been omitted from the visual diagrams for clarity. +Items like: Kubernetes, Amazon Web Storage, deployment tooling, etc. + +```{mermaid} +C4Context + title System Context Diagram: Warehouse + UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1") + + Person_Ext(endUser, "End User", "human or machine") + + Enterprise_Boundary(b0, "External Services") { + System_Ext(fastly, "Fastly", "Content Delivery Network") + System_Ext(b2, "Backblaze B2", "Object store (cache)") + System_Ext(s3, "AWS S3", "Object store (archive)") + + %% User's browser interacts with HIBP and GitHub - worth representing? + %% System_Ext(hibp, "Have I Been Pwned", "breached username/password lookup") + %% System_Ext(github_api, "GitHub API", "project stats lookup") + + Rel(fastly, b2, "fetch and cache response") + Rel(fastly, s3, "fallback when B2 is either down or missing file") + UpdateRelStyle(fastly, s3, $offsetX="-50", $offsetY="40") + } + + Enterprise_Boundary(b1, "Warehouse Ecosystem") { + System(warehouse, "Warehouse", "Multiple components") + } + + BiRel(endUser, fastly, "Uses Web or API", "HTTPS") + UpdateRelStyle(endUser, fastly, $offsetY="-30") + Rel(fastly, warehouse, "proxies traffic to origin") + UpdateRelStyle(fastly, warehouse, $offsetX="10", $offsetY="-20") + + BiRel(endUser, warehouse, "Uploads bypass Fastly", "HTTPS") + UpdateRelStyle(endUser, warehouse, $offsetX="-80", $offsetY="-130") +``` + +Generally speaking, end users interact with the Warehouse ecosystem via Fastly, +which proxies traffic to the origin Warehouse instance. +Warehouse stores package files in Backblaze B2 and AWS S3. +Files are fetched and cached by Fastly. + +B2 is used as primary storage for cost savings over S3 for egress, +as Backblaze has an agreement with Fastly to waive egress fees. + +When a user uploads to warehouse, Fastly is bypassed +and the upload goes directly to the origin Warehouse instance. + +## Warehouse Container Diagrams + +_**Note**: A [Container diagram](https://c4model.com/#ContainerDiagram) is not a Docker container._ + +Let's dig into what makes up the Warehouse. +We'll split between the "web" and "worker" classes of services for clarity. + +### Web Container Diagrams + +On the web side, we run two types - the main web app, and the web uploads app. +The main difference between them is their `gunicorn` settings, +allowing the uploads app to handle larger file sizes and longer timeouts. + +#### Web Container Diagram - `web` + +On this diagram, we will only display a single web instance. +This serves the majority of end-user requests +and interactions with the web site & APIs. + +We do not show the interactions with storage systems (B2, S3), +as responses will direct clients to the storage system directly +via URLs prefixed with: `https://files.pythonhosted.org/packages/...` +which are served by Fastly and cached. + +```{mermaid} +C4Container + title Container Diagram: Warehouse - Web + UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1") + + Person_Ext(endUser, "End User", "human or machine") + System_Ext(fastly, "Fastly", "Content Delivery Network") + + Container_Boundary(c1, "Warehouse & Supporting Systems") { + Container(camo, "Camo", "image proxy") + Container(web_app, "Web", "Python (Pyramid, SQLAlchemy)", "Delivers HTML and API content") + SystemQueue(sqs, "AWS SQS", "task broker") + SystemDb(elasticsearch, "Elasticsearch", "Index of projects, packages, metadata") + SystemDb(db, "Postgres Database", "Store project, package metadata, user details") + SystemDb(redis, "Redis", "Store short-term cache data") + + Rel(web_app, sqs, "queue tasks") + Rel(web_app, elasticsearch, "search for projects") + Rel(web_app, db, "store/retrieve most data") + Rel(web_app, redis, "cache data") + } + + Rel(endUser, camo, "load images from project descriptions", "HTTPS") + Rel(endUser, fastly, "Uses", "HTTPS") + Rel(fastly, web_app, "proxies traffic to", "HTTPS") +``` + +#### Web Container Diagram - `web_uploads` + +Here we show how a user might upload a file to the Warehouse. + +```{mermaid} +C4Container + title Container Diagram: Warehouse - Web Uploads + UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1") + + Person_Ext(endUser, "Client app", "e.g. twine, flit") + + Container_Boundary(c1, "Warehouse") { + SystemDb(db, "Postgres Database", "Store project, package metadata, user details") + Rel(web_app, db, "store/retrieve most data") + + Container(web_app, "Web", "Python (Pyramid, SQLAlchemy)", "Delivers HTML and API content") + + System(s3, "AWS S3", "Object store (archive)") + Rel(web_app, s3, "stores package files") + + SystemQueue(sqs, "AWS SQS", "task broker") + Rel(web_app, sqs, "queue sync to cache task") + } + + Rel(endUser, web_app, "POST files and metadata", "HTTPS") +``` + +### Worker Container Diagram + +Our workers use Celery to run tasks. +We run a single worker type, feeding off multiple queues. +We also use Celery Beat to schedule tasks. + +We currently use AWS SQS as the queue, +and Redis as the result backend and schedule storage. + +```{mermaid} +C4Container + Container(worker_beat, "Worker - Beat", "Python, Celery", "keeps time, schedules tasks") + Container(worker, "Worker", "Python, Celery", "runs tasks") + + Container_Boundary(c1, "Supporting Systems") { + SystemDb(redis, "Redis", "Store short-term cache data") + SystemQueue(sqs, "AWS SQS", "task broker") + SystemDb(elasticsearch, "Elasticsearch", "Index of projects, packages, metadata") + SystemDb(db, "Postgres Database", "Store project, package metadata, user details") + System(ses, "AWS SES", "Simple Email Service") + } + + System_Ext(fastly, "Fastly", "Content Delivery Network") + + BiRel(worker, sqs, "get next task/ack") + BiRel(worker, redis, "store task results") + BiRel(worker, db, "interact with models") + BiRel(worker, elasticsearch, "update search index") + Rel(worker, fastly, "purge URLs") + Rel(worker, ses, "send emails") + + BiRel(worker_beat, redis, "fetch/store task schedules") + Rel(worker_beat, sqs, "schedule tasks") +``` diff --git a/docs/dev/conf.py b/docs/dev/conf.py index 4efcf237a4db..ab303420112d 100644 --- a/docs/dev/conf.py +++ b/docs/dev/conf.py @@ -27,6 +27,7 @@ "sphinx.ext.todo", "sphinxcontrib.httpdomain", "myst_parser", + "sphinxcontrib.mermaid", ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/dev/index.rst b/docs/dev/index.rst index 33a4528b6fbc..bcbb9e991f67 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -8,6 +8,7 @@ Contents: development/index application + architecture api-reference/index ui-principles security diff --git a/requirements/docs-dev.in b/requirements/docs-dev.in index 11a449425456..3e18b81e2723 100644 --- a/requirements/docs-dev.in +++ b/requirements/docs-dev.in @@ -1,4 +1,6 @@ furo Sphinx +sphinx-autobuild sphinxcontrib-httpdomain +sphinxcontrib-mermaid myst-parser diff --git a/requirements/docs-dev.txt b/requirements/docs-dev.txt index c66a5fad5383..5d2a8ead4ef1 100644 --- a/requirements/docs-dev.txt +++ b/requirements/docs-dev.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes --output-file=requirements/docs-dev.txt requirements/docs-dev.in +# pip-compile --allow-unsafe --config=pyproject.toml --generate-hashes --output-file=requirements/docs-dev.txt requirements/docs-dev.in # alabaster==0.7.13 \ --hash=sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3 \ @@ -97,6 +97,10 @@ charset-normalizer==3.2.0 \ --hash=sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac \ --hash=sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa # via requests +colorama==0.4.6 \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 + # via sphinx-autobuild docutils==0.20.1 \ --hash=sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 \ --hash=sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b @@ -121,6 +125,10 @@ jinja2==3.1.2 \ # via # myst-parser # sphinx +livereload==2.6.3 \ + --hash=sha256:776f2f865e59fde56490a56bcc6773b6917366bce0c267c60ee8aaf1a0959869 \ + --hash=sha256:ad4ac6f53b2d62bb6ce1a5e6e96f1f00976a32348afedcb4b6d68df2a1d346e4 + # via sphinx-autobuild markdown-it-py==3.0.0 \ --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb @@ -250,7 +258,9 @@ requests==2.31.0 \ six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - # via sphinxcontrib-httpdomain + # via + # livereload + # sphinxcontrib-httpdomain snowballstemmer==2.2.0 \ --hash=sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1 \ --hash=sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a @@ -266,8 +276,13 @@ sphinx==7.0.1 \ # -r requirements/docs-dev.in # furo # myst-parser + # sphinx-autobuild # sphinx-basic-ng # sphinxcontrib-httpdomain +sphinx-autobuild==2021.3.14 \ + --hash=sha256:8fe8cbfdb75db04475232f05187c776f46f6e9e04cacf1e49ce81bdac649ccac \ + --hash=sha256:de1ca3b66e271d2b5b5140c35034c89e47f263f2cd5db302c9217065f7443f05 + # via -r requirements/docs-dev.in sphinx-basic-ng==1.0.0b2 \ --hash=sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9 \ --hash=sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b @@ -292,6 +307,10 @@ sphinxcontrib-jsmath==1.0.1 \ --hash=sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 \ --hash=sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8 # via sphinx +sphinxcontrib-mermaid==0.9.2 \ + --hash=sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af \ + --hash=sha256:6795a72037ca55e65663d2a2c1a043d636dc3d30d418e56dd6087d1459d98a5d + # via -r requirements/docs-dev.in sphinxcontrib-qthelp==1.0.3 \ --hash=sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72 \ --hash=sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6 @@ -300,6 +319,19 @@ sphinxcontrib-serializinghtml==1.1.5 \ --hash=sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd \ --hash=sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952 # via sphinx +tornado==6.3.2 \ + --hash=sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4 \ + --hash=sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf \ + --hash=sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d \ + --hash=sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba \ + --hash=sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe \ + --hash=sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411 \ + --hash=sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2 \ + --hash=sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0 \ + --hash=sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c \ + --hash=sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f \ + --hash=sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829 + # via livereload urllib3==1.26.16 \ --hash=sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f \ --hash=sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14 From c2d271d6c78b0e0368d94b2e6ef3845c45d5ef92 Mon Sep 17 00:00:00 2001 From: Mike Fiedler Date: Tue, 1 Aug 2023 12:19:04 -0400 Subject: [PATCH 3/3] add redis to uploads Signed-off-by: Mike Fiedler --- docs/dev/architecture.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md index e6fea1be5ad6..a029ff029d91 100644 --- a/docs/dev/architecture.md +++ b/docs/dev/architecture.md @@ -126,6 +126,9 @@ C4Container SystemQueue(sqs, "AWS SQS", "task broker") Rel(web_app, sqs, "queue sync to cache task") + + SystemDb(redis, "Redis", "Store short-term cache data") + Rel(web_app, redis, "get/set rate limits and cache data") } Rel(endUser, web_app, "POST files and metadata", "HTTPS")