Skip to content

Varnish 6 VCL optimization #36796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 67 additions & 112 deletions app/code/Magento/PageCache/etc/varnish6.vcl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# VCL version 5.0 is not supported so it should be 4.0 even though actually used Varnish version is 6
vcl 4.0;
# The VCL version is not related to the version of Varnish. Use VCL version 4.1 to enforce the use of Varnish 6 or later
vcl 4.1;

import std;
# The minimal Varnish version is 6.0
Expand All @@ -10,7 +10,7 @@ backend default {
.port = "/* {{ port }} */";
.first_byte_timeout = 600s;
.probe = {
.url = "/pub/health_check.php";
.url = "/health_check.php";
.timeout = 2s;
.interval = 5s;
.window = 10;
Expand All @@ -23,10 +23,33 @@ acl purge {
}

sub vcl_recv {
if (req.restarts > 0) {
set req.hash_always_miss = true;
# Remove empty query string parameters
# e.g.: www.example.com/index.html?
if (req.url ~ "\?$") {
set req.url = regsub(req.url, "\?$", "");
}

# Remove port number from host header
set req.http.Host = regsub(req.http.Host, ":[0-9]+", "");

# Sorts query string parameters alphabetically for cache normalization purposes
set req.url = std.querysort(req.url);

# Remove the proxy header to mitigate the httpoxy vulnerability
# See https://httpoxy.org/
unset req.http.proxy;

# Add X-Forwarded-Proto header when using https
if (!req.http.X-Forwarded-Proto && (std.port(server.ip) == 443 || std.port(server.ip) == 8443)) {
set req.http.X-Forwarded-Proto = "https";
}

# Reduce grace to the configured setting if the backend is healthy
# In case of an unhealthy backend, the original grace is used
if (std.healthy(req.backend_hint)) {
set req.http.grace = /* {{ grace_period }} */s;
}

if (req.method == "PURGE") {
if (client.ip !~ purge) {
return (synth(405, "Method not allowed"));
Expand All @@ -35,7 +58,7 @@ sub vcl_recv {
# has been added to the response in your backend server config. This is used, for example, by the
# capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
return (purge);
}
if (req.http.X-Magento-Tags-Pattern) {
ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
Expand All @@ -50,10 +73,10 @@ sub vcl_recv {
req.method != "HEAD" &&
req.method != "PUT" &&
req.method != "POST" &&
req.method != "PATCH" &&
req.method != "TRACE" &&
req.method != "OPTIONS" &&
req.method != "DELETE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}

Expand All @@ -62,43 +85,17 @@ sub vcl_recv {
return (pass);
}

# Bypass customer, shopping cart, checkout
if (req.url ~ "/customer" || req.url ~ "/checkout") {
return (pass);
}

Comment on lines -65 to -69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please explain this change? Why you decided to remove this part?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peterjaap What was the deal again with this one? Did you say that Magento already returns Cache-Control: private, no-cache, no-store for these paths? Please clarify, I don't remember the exact reason.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes exactly

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ihor-sviziev is that an acceptable answer?

Long story short: no need to write code to bypass the cache if Magento prevents these pages from ending up in the cache.

Copy link
Contributor

@ihor-sviziev ihor-sviziev Feb 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peterjaap @ThijsFeryn Make sense, thx.
Please point me to where this code is located. Does it works based on the path, or it's inside the controllers?
If that's the 2nd option than people might add custom controllers in their modules that reuse these front names and rely on this behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ThijsFeryn @peterjaap any updates?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peterjaap this is your domain, can you provide an answer to @ihor-sviziev please?

Copy link
Contributor Author

@peterjaap peterjaap Mar 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ihor-sviziev here:

# Bypass health check requests
if (req.url ~ "^/(pub/)?(health_check.php)$") {
return (pass);
}

# Set initial grace period usage status
set req.http.grace = "none";

# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://", "");

# collect all cookies
# Collapse multiple cookie headers into one
std.collect(req.http.Cookie);

# Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unknown algorithm
unset req.http.Accept-Encoding;
}
}

# Remove all marketing get parameters to minimize the cache objects
if (req.url ~ "(\?|&)(gclid|cx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=") {
set req.url = regsuball(req.url, "(gclid|cx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
if (req.url ~ "(\?|&)(gclid|cx|_kx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=") {
set req.url = regsuball(req.url, "(gclid|cx|_kx|ie|cof|siteurl|zanpid|origin|fbclid|mc_[a-z]+|utm_[a-z]+|_bta_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
set req.url = regsub(req.url, "[?|&]+$", "");
}

Expand All @@ -113,101 +110,80 @@ sub vcl_recv {
#unset req.http.Cookie;
}

# Bypass authenticated GraphQL requests without a X-Magento-Cache-Id
if (req.url ~ "/graphql" && !req.http.X-Magento-Cache-Id && req.http.Authorization ~ "^Bearer") {
# Don't cache the authenticated GraphQL requests
if (req.url ~ "/graphql" && req.http.Authorization ~ "^Bearer") {
return (pass);
}
Comment on lines -117 to 116

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regardless of the value of X-Magento-Cache-Id: authorized GraphQL requests should not be served from the cache.

Copy link
Contributor

@danmooney2 danmooney2 Feb 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why X-Magento-Cache-Id exists in the first place. We need to have authorized GraphQL requests cacheable.

Keep in mind what the session factors currently are that create X-Magento-Cache-Id (and are able to be extended):

  • Currency
  • Store
  • Customer Group (this won't matter anymore if we only cache the requests of guests)
  • Customer tax rate
  • Is logged in (this won't matter anymore if we only cache the requests of guests)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danmooney2 my concern is that this wil create too many cache variations which consume space in the cache and impact the hit rate.

Can you give me your opinion on my assumption about having too many cache variations?

For a typical store, how many different X-Magento-Cache-Id values do you think there are?

If the average number of variations would be manageable, we can choose to revert this change.

@peterjaap can you chime in as well please?

Copy link
Contributor

@danmooney2 danmooney2 Mar 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The maximum number of variations for X-Magento-Cache-Id (without being extended) can be determined by the following equation:

Number of Currencies * Number of Stores * Number of Customer Groups * Number of Customer Tax Rates * 2 (is logged in or not).

The point is, it's obvious from current implementation that X-Magento-Cache-Id is utilized to provide both unauthorized and authorized consumers cached responses.

Correct me if I'm wrong, but the issue seems to boil down to a cost benefit analysis of space consumed versus UX. I had initially assumed this change was done to ensure an authorized user doesn't see something they weren't supposed to. Thanks for clarifying your concern.

I'm paging GraphQL performance Product Manager @cpartica for visibility and for his take on this change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious to hear from @cpartica :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danmooney2 I'd be happy to wait for Cris since this isn't my area of expertise since we don't work with GraphQL so much.

I'm also waiting on feedback here #36796 (comment)

Copy link
Contributor

@paales paales Apr 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I know; In concept this serves the same purpose as the X-Magento-Vary header. It creates separate cache buckets for the the session factors.

As for the cache buckets created:

Number of Currencies * Number of Stores * Number of Customer Groups * Number of Customer Tax Rates * 2 (is logged in or not).

We already have currencies and stores so that doesn't increase it. 'Not logged in' is a customer group, so it doesn't double the buckets for logged in or not logged in. You can not be in a customer group without being logged in?

However, The customer's tax rate, expressed as a percentage, such as 0.0875 is a problem. This means when a user selects a different country in the checkout and they will get a different percentage all caches are now invalidated.. This could explode the cache buckets, that would be an argument against this? It is required because catalog display prices could vary depending on the tax rate.

When a non-default tax rate is used, it should not cache the result. (can it be that this already happens)?

Reading deeper into this, an unauthorized request (example) will return a x-magento-cache-id header.

There are now two systems to handle caching:

  1. Store + Content-Currency
  2. X-Magento-Cache-Id

Does this mean that in practice we should make sure an implementer only uses one system, and not the other system?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's quite simple in my opinion, you should never cache authenticated requests based on different parameters...
So, if a Bearer token is set, the backend should always check the request.

It is authenticated for a reason, that's why the X-Magento-Cache-Id doesn't matter anymore.

Varnish is not for authentication, the backend is, it can check en re-evaluate the request.

You could even run into cache poisoning because of the current implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeroenBoersma That makes some sense, but the regular frontend requests do the same thing. It will create a different cache entry for different customer groups if one is logged in. Is your idea that this shouldn't happen at all?


return (hash);
}

sub vcl_hash {
if ((req.url !~ "/graphql" || !req.http.X-Magento-Cache-Id) && req.http.cookie ~ "X-Magento-Vary=") {
if (req.url !~ "/graphql" && req.http.cookie ~ "X-Magento-Vary=") {
hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
}

# To make sure http users don't see ssl warning
if (req.http./* {{ ssl_offloaded_header }} */) {
hash_data(req.http./* {{ ssl_offloaded_header }} */);
}
hash_data(req.http./* {{ ssl_offloaded_header }} */);

/* {{ design_exceptions_code }} */

if (req.url ~ "/graphql") {
call process_graphql_headers;
}
}

sub process_graphql_headers {
if (req.http.X-Magento-Cache-Id) {
hash_data(req.http.X-Magento-Cache-Id);

# When the frontend stops sending the auth token, make sure users stop getting results cached for logged-in users
if (req.http.Authorization ~ "^Bearer") {
hash_data("Authorized");
if (req.http.X-Magento-Cache-Id) {
hash_data(req.http.X-Magento-Cache-Id);
} else {
# if no X-Magento-Cache-Id (which already contains Store & Currency) is not set, use the HTTP headers
hash_data(req.http.Store);
hash_data(req.http.Content-Currency);
}
}

if (req.http.Store) {
hash_data(req.http.Store);
}

if (req.http.Content-Currency) {
hash_data(req.http.Content-Currency);
}
}

sub vcl_backend_response {

# Serve stale content for three days after object expiration
# Perform asynchronous revalidation while stale content is served
set beresp.grace = 3d;

# All text-based content can be parsed as ESI
if (beresp.http.content-type ~ "text") {
set beresp.do_esi = true;
}

# Allow GZIP compression on all JavaScript files and all text-based content
if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
set beresp.do_gzip = true;
}


# Add debug headers
if (beresp.http.X-Magento-Debug) {
set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
}

# cache only successfully responses and 404s that are not marked as private
if (beresp.status != 200 &&
beresp.status != 404 &&
beresp.http.Cache-Control ~ "private") {
# Only cache HTTP 200 and HTTP 404 responses
if (beresp.status != 200 && beresp.status != 404) {
set beresp.ttl = 120s;
set beresp.uncacheable = true;
set beresp.ttl = 86400s;
return (deliver);
}

# Don't cache if the request cache ID doesn't match the response cache ID for graphql requests
if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
set beresp.ttl = 120s;
set beresp.uncacheable = true;
return (deliver);
}

# validate if we need to cache it and prevent from setting cookie
# Remove the Set-Cookie header for cacheable content
# Only for HTTP GET & HTTP HEAD requests
if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
unset beresp.http.set-cookie;
unset beresp.http.Set-Cookie;
}

# If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
if (beresp.ttl <= 0s ||
beresp.http.Surrogate-control ~ "no-store" ||
(!beresp.http.Surrogate-Control &&
beresp.http.Cache-Control ~ "no-cache|no-store") ||
beresp.http.Vary == "*") {
# Mark as Hit-For-Pass for the next 2 minutes
set beresp.ttl = 120s;
set beresp.uncacheable = true;
}

# If the cache key in the Magento response doesn't match the one that was sent in the request, don't cache under the request's key
if (bereq.url ~ "/graphql" && bereq.http.X-Magento-Cache-Id && bereq.http.X-Magento-Cache-Id != beresp.http.X-Magento-Cache-Id) {
set beresp.ttl = 0s;
set beresp.uncacheable = true;
}

return (deliver);
}

sub vcl_deliver {
if (resp.http.x-varnish ~ " ") {
if (obj.uncacheable) {
set resp.http.X-Magento-Cache-Debug = "UNCACHEABLE";
} else if (obj.hits) {
set resp.http.X-Magento-Cache-Debug = "HIT";
set resp.http.Grace = req.http.grace;
} else {
Expand All @@ -232,24 +208,3 @@ sub vcl_deliver {
unset resp.http.Via;
unset resp.http.Link;
}

sub vcl_hit {
if (obj.ttl >= 0s) {
# Hit within TTL period
return (deliver);
}
if (std.healthy(req.backend_hint)) {
if (obj.ttl + /* {{ grace_period }} */s > 0s) {
# Hit after TTL expiration, but within grace period
set req.http.grace = "normal (healthy server)";
return (deliver);
} else {
# Hit after TTL and grace expiration
return (restart);
}
} else {
# server is not healthy, retrieve from cache
set req.http.grace = "unlimited (unhealthy server)";
return (deliver);
}
}