summary refs log tree commit diff
path: root/services/frontend
diff options
context:
space:
mode:
Diffstat (limited to 'services/frontend')
-rw-r--r--services/frontend/common.nix9
-rw-r--r--services/frontend/haproxy.nix302
-rw-r--r--services/frontend/web-server.nix41
3 files changed, 352 insertions, 0 deletions
diff --git a/services/frontend/common.nix b/services/frontend/common.nix
new file mode 100644
index 0000000..78056de
--- /dev/null
+++ b/services/frontend/common.nix
@@ -0,0 +1,9 @@
+{ ... }:
+
+{
+  networking.firewall.allowedTCPPorts = [ 80 443 ];
+
+  users.groups = {
+    "frontend" = { };
+  };
+}
diff --git a/services/frontend/haproxy.nix b/services/frontend/haproxy.nix
new file mode 100644
index 0000000..c39fc04
--- /dev/null
+++ b/services/frontend/haproxy.nix
@@ -0,0 +1,302 @@
+{ config, lib, ... }:
+
+let acmeEnabledVhosts = [ config.smalltech.domain ];
+
+in
+{
+  config = {
+    # Warning! There is a high chance of having several hours of downtime any
+    # time anything under security.acme is changed. Make sure you know what
+    # you're doing before messing with it.
+    security.acme = {
+      acceptTerms = true;
+      preliminarySelfsigned = true;
+
+      defaults = {
+        email = config.smalltech.adminEmail;
+        renewInterval = "Mon,Wed,Fri";
+
+        group = "frontend";
+        webroot = "/var/lib/acme/acme-challenge";
+
+        reloadServices = [
+          "haproxy.service"
+        ];
+
+        # When trying changes that may break things, all ACME traffic should
+        # go to the LetsEncrypt staging endpoint rather than the production
+        # endpoint, which is the default. This line is left here so that it
+        # can be easily uncommented when doing that testing.
+        #
+        # Bear in mind that the staging endpoint doesn't issue real
+        # certificates, so users seeing the site during this time will get
+        # warnings about certificate validity. Therefore it must be changed
+        # back ASAP as soon as you're sure things work.
+        #
+        # This is still preferable to us getting rate-limited by LetsEncrypt
+        # for hitting their production endpoint too many times, as that would
+        # lead to many hours of downtime while we wait for the rate-limiting
+        # to expire. This is a very easy thing to end up doing, since systemd
+        # responds to the failure by trying again immediately, and the rate
+        # limit is something like no more than five failures in an hour.
+        # Often, by the type a sysadmin notices there's a problem, it's too
+        # late to avoid the long wait.
+        #server = "https://acme-staging-v02.api.letsencrypt.org/directory";
+      };
+
+      certs = {
+        ${config.smalltech.domain} = { };
+      };
+    };
+
+    # Warning! There is a high chance of having several hours of downtime any
+    # time anything related to ACME is changed. Make sure you know what you're
+    # doing before messing with it.
+    systemd.services.haproxy = {
+      wants = (lib.concatLists
+                  (map (vhost: [
+                          "acme-${vhost}.service"
+                          "acme-selfsigned-${vhost}.service"])
+                       acmeEnabledVhosts))
+              ++ [ "nginx.service" ];
+      after =
+        (map (vhost: "acme-selfsigned-${vhost}.service") acmeEnabledVhosts)
+        ++ [ "nginx.service" ];
+    };
+
+    services.haproxy = {
+      enable = true;
+      group = "frontend";
+    };
+
+    smalltech.haproxy = {
+      global = [
+        # Specify where to put logs. These settings will make sure they wind
+        # up in syslog, where journald can handle them.
+        "log /dev/log local0 info"
+
+        # This is the principal control we have over DoS attempts. It tells
+        # HAProxy to deny requests that would take the system past this number
+        # of simultaneous connections.
+        "maxconn 2048"
+
+        # Set the directory which certificate pathnames will be relative to.
+        # The actual paths are configured in bind directives, below.
+        "crt-base /var/lib/acme"
+
+        # This is a pretty restricted set of ciphers, on the theory that more
+        # ciphers mean more attack surface. I picked these ones with an eye to
+        # compatibility. See the Mozilla recommendations at [1] for some
+        # background.
+        #
+        # For now, we need TLS 1.2 for compatibility with Windows 7. At some
+        # point hopefully this need will go away and we can move to requiring
+        # 1.3, which is substantailly more secure. Some of these ciphers are
+        # here only for 1.2 compat, so we should revisit them when we do that,
+        # too.
+        #
+        # [1] https://wiki.mozilla.org/Security/Server_Side_TLS
+        "ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets"
+        ("ssl-default-bind-ciphers " + builtins.concatStringsSep ":" [
+          "ECDHE-ECDSA-AES128-GCM-SHA256"
+          "ECDHE-RSA-AES128-GCM-SHA256"
+          "ECDHE-ECDSA-AES256-GCM-SHA384"
+          "ECDHE-RSA-AES256-GCM-SHA384"
+          "ECDHE-ECDSA-CHACHA20-POLY1305"
+          "ECDHE-RSA-CHACHA20-POLY1305"
+          "DHE-RSA-AES128-GCM-SHA256"
+          "DHE-RSA-AES256-GCM-SHA384"
+          "DHE-RSA-CHACHA20-POLY1305"
+        ])
+        ("ssl-default-bind-ciphersuites " + builtins.concatStringsSep ":" [
+          "TLS_AES_128_GCM_SHA256"
+          "TLS_AES_256_GCM_SHA384"
+          "TLS_CHACHA20_POLY1305_SHA256"
+        ])
+
+        # TODO we need to figure out secret management before doing this
+        # Custom Diffie-Hellman parameters avoid precomputation attacks.
+        # They're a best practice, so we use them. This is unlikely to ever
+        # need to be regenerated.
+        #
+        # Note that the file isn't really a secret, in fact it's public on
+        # every request, but it makes more sense to manage it with the
+        # credentials because it is finicky to work with in the same ways.
+        #"ssl-dh-param-file /etc/nixos/secrets/frontend/dhparam.pem"
+
+        # TODO Lua stuff needs doing
+        # Specify where to find the Lua modules.
+        #"lua-prepend-path ${luaPath}/?.lua"
+        #"lua-prepend-path ${luaCPath}/?.so cpath"
+        #"lua-load ${luaPath}/auth-request.lua"
+      ];
+
+      defaults = [
+        # HAProxy has two primary modes of operation, TCP and HTTP. We want
+        # TCP as the default; the difference is how deeply it looks at the
+        # structure of incoming headers. We also turn on logging here.
+        "mode tcp"
+        "log global"
+        "option tcplog"
+
+        # These timeouts are relevant to DoS protection.
+        "timeout connect 5s"
+        "timeout client 50s"
+        "timeout server 50s"
+      ];
+
+      frontends = {
+        # Most interesting stuff will happen under this frontend, it's the main
+        # one on port 443.
+        "fe_multi_https" = [
+          # We bind to all IP addresses, because the instance's private IP is
+          # subject to change so statically configuring it here would be
+          # fragile.
+          #
+          # We turn on ALPN ("application-layer protocol negotiation" [1])
+          # because this is a required step to make HTTP 2.0 work, and helps
+          # with performance by avoiding a round-trip.
+          #
+          # It would be nice to have strict-sni turned on here, but we can't
+          # because the self-signed bootstrap cert has CN=example.com.
+          #
+          # [1] https://datatracker.ietf.org/doc/html/rfc7301
+          ("bind :443 ssl alpn h2,http/1.2 "
+           + builtins.concatStringsSep " "
+                 (map (name: "crt " + name + "/full.pem") [
+                   config.smalltech.domain
+                 ]))
+
+          # Since this traffic is on the HTTPS port, we override the default
+          # TCP mode.
+          "mode http"
+          "option httplog"
+
+          # Some of the criteria we want to test are visible to HAProxy from
+          # the early traffic that the client sends, even without having sent
+          # any response beyond accepting the TCP connection. However, there's
+          # no guarantee that they will actually have been sent in timely
+          # fashion. By default, HAProxy would only look at what's been
+          # received so far, which means there's race conditions. By setting
+          # inspect-delay, we tell it to wait a little while before concluding
+          # it won't get what it's looking for.
+          "tcp-request inspect-delay 5s"
+
+          # If the client appears to be sending valid TLS traffic, tell it we're
+          # here so we can move on to the tests that require back-and-forth,
+          # which is everything below this part.
+          "tcp-request content accept if { ssl_fc }"
+
+          # Do not accept proxy headers from outside, they would allow an
+          # attacker to impersonate HAProxy.
+          #
+          # Someday perhaps we might wish to allow-list only the headers we
+          # specifically get value from, instead of having this deny-list, but
+          # that would require doing some research to figure out what those are
+          # exactly. This would also be a good place to implement a cookie
+          # firewall, if we ever want one.
+          #
+          # Please keep this list alphabetized.
+          "http-request del-header X-Forwarded-For"
+          "http-request del-header X-Forwarded-Host"
+          "http-request del-header X-Forwarded-Method"
+          "http-request del-header X-Forwarded-Proto"
+          "http-request del-header X-Forwarded-Uri"
+          "http-request del-header X-Real-IP"
+
+          # HAProxy has built-in support for X-Forwarded-For so we use that,
+          # rather than set-header. This is used by Authelia, and is pretty
+          # widely used in general, so we set it here for all backends to have
+          # access to.
+          "option forwardfor"
+
+          # TODO Authelia rewrite variables go here
+
+          # TODO Authelia forwarding headers go here
+
+          # We define ACLs here; they are used below. Multiple acl directives
+          # with the same name are or'd together.
+          #
+          # It is important to notice that these ACLs all rely on request-time
+          # information. Attempting to use them from a response-time directive
+          # will silently fail. This is the most fiddly issue in HAProxy
+          # configuration. This comment stands in witness of an hour wasted
+          # attempting to copy information from ACLs to a later phase; if you
+          # need to, good luck!
+          "acl is-acme path_beg /.well-known/acme-challenge/"
+
+          # TODO define ACLs here
+
+          # TODO execute the Authelia subrequest here
+
+          # TODO perform the login redirect here
+
+          # TODO refresh the Authelia cookie here
+
+          # Path-based routing, as seen here, is for things that need to appear
+          # on every hostname we serve traffic on.
+          #
+          # Currently, that's just the directory where ACME challenge responses
+          # are served, which is necessary to make our LetsEncrypt certificates
+          # work.
+          #
+          # This needs to come before the login check (not the subrequest
+          # immediately below, but the auth-failed redirect further down),
+          # because we don't get to tell LetsEncrypt that it needs to log in.
+          #
+          # Note that this relies on the is-acme ACL, which is defined above.
+          "use_backend be_local_nginx if is-acme"
+
+          # TODO path-and-domain routing goes here
+
+          # Domain-based routing, as seen here, is for content that should
+          # appear only on a specific hostname, and should be the default
+          # thing on that hostname. That's most things.
+          #
+          # We do two types of domain-based routing: use_backend, and
+          # redirects. For ease of maintenance that's separated into two
+          # lists, so that it's easier to see at a glance that all the line
+          # items follow the same pattern without any weird inconsistencies
+          # that could lead to misbehavior.
+          #
+          # These ones are the backend specifications. Keep this list
+          # alphabetical by backend, and within that by hostname, first by
+          # domain then subdomain.
+          ("use_backend be_local_nginx "
+           + "if { req.hdr(host) -i ${config.smalltech.domain} }")
+
+          # TODO redirects go here
+
+          # In order to reduce the information an attacker could gather about
+          # our network topology, we redact the Server header from responses.
+          "http-response del-header ^Server:.*$"
+        ];
+
+        # We also need to listen on port 80, so we can redirect to port 443.
+        "fe_multi_http" = [
+          # As with the port 443 frontend, we bind to all IP addresses.
+          "bind :80"
+
+          # Also as with port 443, we use HTTP mode.
+          "mode http"
+          "option httplog"
+
+          # Unconditionally redirect to HTTPS. This will apply to all domains
+          # we serve traffic for. It's a fiddly thing to do, which does
+          # interact with the ACME verification process, so be careful about
+          # changing it.
+          #
+          # We use 301 (moved permanently) as the response code.
+          "http-request redirect code 301 scheme https"
+        ];
+      };
+
+      backends = {
+        "be_local_nginx" = [
+          "mode http"
+          "server nginx 127.0.0.1:3080 maxconn 256"
+        ];
+      };
+    };
+  };
+}
diff --git a/services/frontend/web-server.nix b/services/frontend/web-server.nix
new file mode 100644
index 0000000..b9fa868
--- /dev/null
+++ b/services/frontend/web-server.nix
@@ -0,0 +1,41 @@
+{ config, ... }:
+
+{
+  # At present, the only thing we serve via nginx is the responses to ACME
+  # challenges, so that's the only thing configured. This interacts closely
+  # with the config in services/frontend/haproxy.nix, in that nginx is behind
+  # HAProxy and relies on HAProxy to route traffic to it, while HAProxy relies
+  # on nginx to handle the ACME stuff.
+  #
+  # This separated-out behavior is fiddly to set up the first time, but I have
+  # found it to be highly reliable once created.
+  services.nginx = {
+    enable = true;
+
+    group = "frontend";
+
+    recommendedGzipSettings = true;
+    recommendedOptimisation = true;
+    recommendedProxySettings = true;
+
+    virtualHosts = {
+      ${config.smalltech.domain} = {
+        serverName = config.smalltech.domain;
+
+        listen = [
+          {
+            addr = "127.0.0.1";
+            port = 3080;
+          }
+        ];
+
+        locations."/.well-known/acme-challenge" = {
+          root = "/var/lib/acme/acme-challenge";
+          extraConfig = ''
+            auth_basic off;
+          '';
+        };
+      };
+    };
+  };
+}