proweb
11/4/2015 - 3:40 PM

The perfect Varnish configuration for Joomla (& other CMS based) websites

The perfect Varnish configuration for Joomla (& other CMS based) websites

#################################################################################
### The perfect Varnish configuration for Joomla (& other CMS based) websites ###
#################################################################################

# USE: Place the contents of this configuration inside the main 
# Varnish configuration file, located in: /etc/varnish/default.vcl (root server access required - obviously)


# IMPORTANT: The following setup assumes a 2 minute cache time. You can safely increase
# this to 5 mins for less busier sites or drop it to 1 min or even 30s for high traffic sites.


# USE BEYOND JOOMLA: This configuration requires an HTTP Header and a user cookie (see the Joomla section)
# to identify if a user is logged in a site, in order to bypass caching overall. If your CMS provides a way to add
# these 2 requirements, then you can use this configuration to speed up your site or entire server. You can even
# exclude the domains you don't want to cache if you're looking to use it in a multi-site setup.


# === FOR JOOMLA ONLY [start] ===
#
# This Varnish configuration makes use of a custom HTTP header plus a user cookie to determine whether
# some user is logged in or not inside Joomla. To insert the HTTP header, simply append the following code block,
# while setting the $cookieDomain value:
#
#// Make Joomla Varnish-friendly [START]
#$cookieDomain = 'domain.tld'; // Replace "domain.tld" with your "naked" domain
#
#$getUserState = JFactory::getUser();
#
#if ($getUserState->guest) {
#    JResponse::allowCache(true);
#    JResponse::setHeader('X-Logged-In', 'False', true);
#    if($_COOKIE["userID"]){
#        setcookie("userID", "", time() - 3600, '/', $cookieDomain, 0);
#    }
#} else {
#    JResponse::allowCache(true);
#    JResponse::setHeader('X-Logged-In', 'True', true);
#    if(!isset($_COOKIE["userID"])){
#        setcookie("userID", $getUserState->id, 0, '/', $cookieDomain, 0);
#    }
#}
#// Make Joomla Varnish-friendly [FINISH]
#
# in your template's "index.php" file, right after the line:
# defined('_JEXEC') or die;
#
# IMPORTANT: If you use K2 (getk2.org) in your Joomla site, simply set the "Cookie Domain" option in the K2 parameters
# ("Advanced" tab) and all the above will be automatically enabled for your entire Joomla site.
#
#
# === HOW TO HANDLE FRONTEND LOGINS (e.g. for use with member areas, forums etc.) ===
# It is important for you to understand that since Joomla (in a very amateur way) uses session cookies for any user
# (even guests) supposedly for additional security (debatable), Varnish *cannot* work with Joomla out-of-the-box. If
# you installed Varnish without any modification to its configuration besides the cache time, it could not properly
# cache Joomla content because of the session cookies Joomla uses for both guest and logged in visitors. To bypass
# Joomla's behaviour, we must additionally set Varnish to strip any cookies set by Joomla, except for a specific one (userID).
# For even better control, we also set a custom HTTP header (X-Logged-In), which we have Varnish check on all requests. All
# this is explained how to integrate into Joomla via your template in the code sample above.
# However, if we want Varnish to allow frontend logins in Joomla, without breaking Joomla (because we strip its session cookies),
# we must explicitly tell Varnish which entry pages (=login pages) not to cache. Such a page could be for example the default
# Joomla login form (e.g. with an alias "login"). In the 2 Varnish exclusion lists defined in the configuration below, we would add
# "^/login" to make sure Varnish completely switches off when a user visits this page. In that case, Joomla's session cookie gets
# set and the form can be submitted normally, passing all Joomla security checks. Same goes for any page in Joomla that requires
# user input: a contact form, a newsletter signup form, a forum, comments and so on. So the solution to keep in mind is simple:
# - If the action requires the user to login first (e.g. a forum), we must create a specific/unique page for users to login first.
#   Once they log in, Varnish switches off completely and then a user can post in the forum or write comments or use a contact form
#   as if Varnish did not exist. If the user continues to browse the site while logged in, Varnish will be completely off ONLY for
#   this user. If the user logs out, Varnish will kick back in.
# - If the action does not require a user to be logged in first, e.g. a contact form, we simply exclude the contact form's URL from
#   Varnish, in which case -again- Varnish will switch off completely and the user will be able to submit the form passing the
#   Joomla security checks. If the user browses anywhere else in the site, Varnish will kick back in.
#
# === FOR JOOMLA ONLY [finish] ===


# === CHANGELOG ===
# Nov 3rd, 2015:
# - Updated configuration with new more robust rules
# - Added Varnish 4.x compatibility notes
# - Added JRememberMe cookie to allow Varnish to work properly with Joomla's "remember me" login option. Please
#   refer to this pull request on GitHub https://github.com/joomla/joomla-cms/pull/7677 on how to implement
#   on your Joomla site.
# - Configuration moved to GitHub from snipt.net (https://snipt.net/fevangelou/the-perfect-varnish-configuration-for-joomla-websites/)
#


### /etc/varnish/default.vcl - START ###

backend default {
    .host = "127.0.0.1"; # don't change this if the web server is on the same machine
    .port = "XXXX"; # replace XXXX with your web server's port
}

sub vcl_recv {

    # If we host multiple domains on a server, here you can list the domains you DO NOT want to cache
    # The first check matches both naked & "www" subdomains. Use the second for non generic subdomains.
    if (
        req.http.host ~ "(www\.)?(domain1.com|domain2.org|domain3.net)" ||
        req.http.host ~ "(subdomain.domain4.tld|othersubdomain.domain5.tld)"
    ) {
        return (pass);
    }

    # Forward client's IP to the backend
    if (req.restarts == 0) {
        if (req.http.x-forwarded-for) {
            set req.http.X-Forwarded-For = req.http.X-Forwarded-For + ", " + client.ip;
            # In Varnish 4.x, comment the above line & uncomment the one below
            #set req.http.X-Forwarded-For = req.http.X-Forwarded-For ", " client.ip;
        } else {
            set req.http.X-Forwarded-For = client.ip;
        }
    }

    # Non-RFC2616 or CONNECT which is weird.
    if (
        req.request != "GET" &&
        req.request != "HEAD" &&
        req.request != "PUT" &&
        req.request != "POST" &&
        req.request != "TRACE" &&
        req.request != "OPTIONS" &&
        req.request != "DELETE"
    ) {
        return (pipe);
    }

    # We only deal with GET and HEAD by default
    if (req.request != "GET" && req.request != "HEAD") {
        return (pass);
    }

    # Don't cache HTTP authorization/authentication pages and pages with certain headers or cookies
    if (
        req.http.Authorization ||
        req.http.Authenticate ||
        req.http.X-Logged-In == "True" ||
        req.http.Cookie ~ "userID" ||
        req.http.Cookie ~ "JRememberMe"
    ) {
        return (pass);
    }

    # Exclude the following paths (e.g. backend admins, user pages or ad URLs that require tracking)
    # In Joomla specifically, you are advised to create specific entry points (URLs) for users to
    # interact with the site (either common user logins or even commenting), e.g. make a menu item
    # to point to a user login page (e.g. /login), including all related functionality such as
    # password reset, email reminder and so on.
    if(
        req.url ~ "^/administrator" ||
        req.url ~ "^/component/banners" ||
        req.url ~ "^/component/users" ||
        req.url ~ "^/wp-admin" ||
        req.url ~ "^/wp-login.php" ||
        req.url ~ "^/any-other-url-path"
    ) {
        return (pass);
    }

    # Don't cache ajax requests
    if(req.http.X-Requested-With == "XMLHttpRequest" || req.url ~ "nocache") {
        return (pass);
    }

    # Check for the custom "X-Logged-In" header (used by K2 and other apps) to identify
    # if the visitor is a guest, then unset any cookie (including session cookies) provided
    # it's not a POST request.
    if(req.http.X-Logged-In == "False" && req.request != "POST"){
        unset req.http.Cookie;
    }

    # Properly handle different encoding types
    if (req.http.Accept-Encoding) {
      if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf)$") {
        # No point in compressing these
        remove req.http.Accept-Encoding;
      } elseif (req.http.Accept-Encoding ~ "gzip") {
        set req.http.Accept-Encoding = "gzip";
      } elseif (req.http.Accept-Encoding ~ "deflate") {
        set req.http.Accept-Encoding = "deflate";
      } else {
        # unknown algorithm (aka crappy browser)
        remove req.http.Accept-Encoding;
      }
    }

    # Cache files with these extensions
    if (req.url ~ "\.(js|css|jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf)$") {
        return (lookup);
    }

    # Set how long Varnish will cache content depending on whether your backend is healthy or not
    if (req.backend.healthy) {
        # Remember to adjust beresp.ttl & beresp.http.Cache-Control inside vcl_fetch()
        # to the same time
        set req.grace = 2m;
    } else {
        set req.grace = 1h;
    }

    return (lookup);

}

sub vcl_fetch {

    /*
    # Some (uncommented by default) rules for debugging cache status
    # ###
    # Varnish determined the object was not cacheable
    if (beresp.ttl <= 0s) {
        set beresp.http.X-Cacheable = "NO:Not Cacheable";

    # You don't wish to cache content for logged in users
    } elseif (req.http.Cookie ~ "userID") {
        set beresp.http.X-Cacheable = "NO:Got userID cookie";
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);

    # You are respecting the Cache-Control=private header from the backend
    } elseif (beresp.http.Cache-Control ~ "private") {
        set beresp.http.X-Cacheable = "NO:Cache-Control=private";
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);

    # Ajax
    } elseif (req.http.X-Requested-With == "XMLHttpRequest") {
        set beresp.http.X-Cacheable = "NO:Ajax";
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);

    # Varnish determined the object was cacheable
    } else {
        set beresp.http.X-Cacheable = "YES";
    }
    */

    # If we host multiple domains on a server, here you can list the domains you DO NOT want to cache
    # The first check matches both naked & "www" subdomains. Use the second for non generic subdomains.
    if (
        bereq.http.host ~ "(www\.)?(domain1.com|domain2.org|domain3.net)" ||
        bereq.http.host ~ "(subdomain.domain4.tld|othersubdomain.domain5.tld)"
    ) {
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);
    }

    # Exclude the following paths (e.g. backend admins, user pages or ad URLs that require tracking)
    # In Joomla specifically, you are advised to create specific entry points (URLs) for users to
    # interact with the site (either common user logins or even commenting), e.g. make a menu item
    # to point to a user login page (e.g. /login), including all related functionality such as
    # password reset, email reminder and so on.
    if(
        bereq.url ~ "^/administrator" ||
        bereq.url ~ "^/component/banners" ||
        bereq.url ~ "^/component/users" ||
        bereq.url ~ "^/wp-admin" ||
        bereq.url ~ "^/wp-login.php" ||
        bereq.url ~ "^/any-other-url-path"
    ) {
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);
    }

    # Don't cache HTTP authorization/authentication pages and pages with certain headers or cookies
    if (
        bereq.http.Authorization ||
        bereq.http.Authenticate ||
        bereq.http.X-Logged-In == "True" ||
        bereq.http.Cookie ~ "userID" ||
        bereq.http.Cookie ~ "JRememberMe"
    ) {
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);
    }

    # Don't cache ajax requests
    if(beresp.http.X-Requested-With == "XMLHttpRequest" || bereq.url ~ "nocache") {
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);
    }

    # Don't cache backend response to posted requests
    if (bereq.request == "POST") {
        return (hit_for_pass);
        # In Varnish 4.x, comment the above line & uncomment the one below
        #return (pass);
    }

    # Ok, we're cool & ready to cache things
    # so let's clean up some headers and cookies
    # to maximize caching.

    # Check for the custom "X-Logged-In" header to identify if the visitor is a guest,
    # then unset any cookie (including session cookies) provided it's not a POST request.
    if(bereq.request != "POST" && beresp.http.X-Logged-In == "False") {
        unset beresp.http.Set-Cookie;
    }

    # Allow items to be stale if needed (this value should be the same as with "set req.grace"
    # inside the sub vcl_recv {…} block (the 2nd part of the if/else statement)
    set beresp.grace = 1h;

    # Serve pages from the cache should we get a sudden error and re-check in 3 minutes
    if (beresp.status == 503 || beresp.status == 502 || beresp.status == 501 || beresp.status == 500) {
      set beresp.grace = 3m;
      return (restart);
    }

    # This is how long Varnish will keep cached content.
    # If you change it, remember to adjust "beresp.http.Cache-Control" lower and
    # "req.grace" in the vcl_recv() section
    set beresp.ttl = 2m;

    # OPTIONAL:
    # Nice trick from https://www.varnish-cache.org/trac/wiki/VCLExampleIgnoreCacheHeadersFromBackend
    # Essentially caches anything with a cache time lower than the specified time.
    # May not work on all cases so use with caution.
    #if (beresp.ttl < 2m) {
    #    set beresp.ttl = 2m;
    #}

    # Unset the "etag" header (suggested)
    unset beresp.http.etag;

    # Unset the "pragma" header
    unset beresp.http.Pragma;

    # Modify "expires" header - https://www.varnish-cache.org/trac/wiki/VCLExampleSetExpires
    set beresp.http.Expires = "" + (now + beresp.ttl);

    # If your backend server does not set the right caching headers for static assets,
    # you can set them below (uncomment first and change 604800 - which 1 week - to whatever you
    # want (in seconds)
    #if (req.url ~ "\.(ico|jpg|jpeg|gif|png|bmp|webp|tiff|svg|svgz|pdf|mp3|flac|ogg|mid|midi|wav|mp4|webm|mkv|ogv|wmv|eot|otf|woff|ttf|rss|atom|zip|7z|tgz|gz|rar|bz2|tar|exe|doc|docx|xls|xlsx|ppt|pptx|rtf|odt|ods|odp)(\?[a-zA-Z0-9=]+)$") {
    #    set beresp.http.Cache-Control = "public, max-age=604800";
    #}

    # We have content to cache, but it's got no-cache or other Cache-Control values sent
    # So let's reset it to our main caching time (2m as used in this example configuration)
    # The additional parameters specified (stale-while-revalidate & stale-if-error) are used
    # by modern browsers to better control caching. Set there to twice & five times your main
    # cache time respectively.
    # This final setting will normalize CMSs like Joomla which set max-age=0 even when
    # Joomla's cache is enabled.
    if (beresp.http.Cache-Control !~ "max-age" || beresp.http.Cache-Control ~ "max-age=0"){
        set beresp.http.Cache-Control = "public, max-age=120, stale-while-revalidate=240, stale-if-error=600";
    }

    return (deliver);

}

sub vcl_deliver {

    # Send a special header for excluded domains only.
    # The if statement can be identical to the ones in the vcl_recv() and vcl_fetch() functions above
    if (
        req.http.host ~ "(www\.)?(domain1.com|domain2.org|domain3.net)" ||
        req.http.host ~ "(subdomain.domain4.tld|othersubdomain.domain5.tld)"
    ) {
        set resp.http.X-Domain-Status = "EXCLUDED";
    }

    # Send special headers that indicate the cache status of each web page
    if (obj.hits > 0) {
        set resp.http.X-Cache = "HIT";
    } else {
        set resp.http.X-Cache = "MISS";
    }

    # If you hookup you server to a CDN that offers "website acceleration" features
    # you might need to uncomment the following line for the CDN to fetch content properly.
    #remove resp.http.Age;

    return (deliver);

}

### /etc/varnish/default.vcl - END ###