From 58dd4440eaf6be9d260809b9dcb361d1f46f2abb Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Thu, 31 Jul 2025 15:25:21 -0700 Subject: try to spin up haproxy, nginx, and an ACME client no login/ACL stuff yet Change-Id: If6eeaed671b2711dc809e94ea00bc6387dcae2f4 --- services/frontend/common.nix | 9 ++ services/frontend/haproxy.nix | 302 +++++++++++++++++++++++++++++++++++++++ services/frontend/web-server.nix | 41 ++++++ 3 files changed, 352 insertions(+) create mode 100644 services/frontend/common.nix create mode 100644 services/frontend/haproxy.nix create mode 100644 services/frontend/web-server.nix (limited to 'services/frontend') diff --git a/services/frontend/common.nix b/services/frontend/common.nix new file mode 100644 index 0000000..78056de --- /dev/null +++ b/services/frontend/common.nix @@ -0,0 +1,9 @@ +{ ... }: + +{ + networking.firewall.allowedTCPPorts = [ 80 443 ]; + + users.groups = { + "frontend" = { }; + }; +} diff --git a/services/frontend/haproxy.nix b/services/frontend/haproxy.nix new file mode 100644 index 0000000..c39fc04 --- /dev/null +++ b/services/frontend/haproxy.nix @@ -0,0 +1,302 @@ +{ config, lib, ... }: + +let acmeEnabledVhosts = [ config.smalltech.domain ]; + +in +{ + config = { + # Warning! There is a high chance of having several hours of downtime any + # time anything under security.acme is changed. Make sure you know what + # you're doing before messing with it. + security.acme = { + acceptTerms = true; + preliminarySelfsigned = true; + + defaults = { + email = config.smalltech.adminEmail; + renewInterval = "Mon,Wed,Fri"; + + group = "frontend"; + webroot = "/var/lib/acme/acme-challenge"; + + reloadServices = [ + "haproxy.service" + ]; + + # When trying changes that may break things, all ACME traffic should + # go to the LetsEncrypt staging endpoint rather than the production + # endpoint, which is the default. This line is left here so that it + # can be easily uncommented when doing that testing. + # + # Bear in mind that the staging endpoint doesn't issue real + # certificates, so users seeing the site during this time will get + # warnings about certificate validity. Therefore it must be changed + # back ASAP as soon as you're sure things work. + # + # This is still preferable to us getting rate-limited by LetsEncrypt + # for hitting their production endpoint too many times, as that would + # lead to many hours of downtime while we wait for the rate-limiting + # to expire. This is a very easy thing to end up doing, since systemd + # responds to the failure by trying again immediately, and the rate + # limit is something like no more than five failures in an hour. + # Often, by the type a sysadmin notices there's a problem, it's too + # late to avoid the long wait. + #server = "https://acme-staging-v02.api.letsencrypt.org/directory"; + }; + + certs = { + ${config.smalltech.domain} = { }; + }; + }; + + # Warning! There is a high chance of having several hours of downtime any + # time anything related to ACME is changed. Make sure you know what you're + # doing before messing with it. + systemd.services.haproxy = { + wants = (lib.concatLists + (map (vhost: [ + "acme-${vhost}.service" + "acme-selfsigned-${vhost}.service"]) + acmeEnabledVhosts)) + ++ [ "nginx.service" ]; + after = + (map (vhost: "acme-selfsigned-${vhost}.service") acmeEnabledVhosts) + ++ [ "nginx.service" ]; + }; + + services.haproxy = { + enable = true; + group = "frontend"; + }; + + smalltech.haproxy = { + global = [ + # Specify where to put logs. These settings will make sure they wind + # up in syslog, where journald can handle them. + "log /dev/log local0 info" + + # This is the principal control we have over DoS attempts. It tells + # HAProxy to deny requests that would take the system past this number + # of simultaneous connections. + "maxconn 2048" + + # Set the directory which certificate pathnames will be relative to. + # The actual paths are configured in bind directives, below. + "crt-base /var/lib/acme" + + # This is a pretty restricted set of ciphers, on the theory that more + # ciphers mean more attack surface. I picked these ones with an eye to + # compatibility. See the Mozilla recommendations at [1] for some + # background. + # + # For now, we need TLS 1.2 for compatibility with Windows 7. At some + # point hopefully this need will go away and we can move to requiring + # 1.3, which is substantailly more secure. Some of these ciphers are + # here only for 1.2 compat, so we should revisit them when we do that, + # too. + # + # [1] https://wiki.mozilla.org/Security/Server_Side_TLS + "ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets" + ("ssl-default-bind-ciphers " + builtins.concatStringsSep ":" [ + "ECDHE-ECDSA-AES128-GCM-SHA256" + "ECDHE-RSA-AES128-GCM-SHA256" + "ECDHE-ECDSA-AES256-GCM-SHA384" + "ECDHE-RSA-AES256-GCM-SHA384" + "ECDHE-ECDSA-CHACHA20-POLY1305" + "ECDHE-RSA-CHACHA20-POLY1305" + "DHE-RSA-AES128-GCM-SHA256" + "DHE-RSA-AES256-GCM-SHA384" + "DHE-RSA-CHACHA20-POLY1305" + ]) + ("ssl-default-bind-ciphersuites " + builtins.concatStringsSep ":" [ + "TLS_AES_128_GCM_SHA256" + "TLS_AES_256_GCM_SHA384" + "TLS_CHACHA20_POLY1305_SHA256" + ]) + + # TODO we need to figure out secret management before doing this + # Custom Diffie-Hellman parameters avoid precomputation attacks. + # They're a best practice, so we use them. This is unlikely to ever + # need to be regenerated. + # + # Note that the file isn't really a secret, in fact it's public on + # every request, but it makes more sense to manage it with the + # credentials because it is finicky to work with in the same ways. + #"ssl-dh-param-file /etc/nixos/secrets/frontend/dhparam.pem" + + # TODO Lua stuff needs doing + # Specify where to find the Lua modules. + #"lua-prepend-path ${luaPath}/?.lua" + #"lua-prepend-path ${luaCPath}/?.so cpath" + #"lua-load ${luaPath}/auth-request.lua" + ]; + + defaults = [ + # HAProxy has two primary modes of operation, TCP and HTTP. We want + # TCP as the default; the difference is how deeply it looks at the + # structure of incoming headers. We also turn on logging here. + "mode tcp" + "log global" + "option tcplog" + + # These timeouts are relevant to DoS protection. + "timeout connect 5s" + "timeout client 50s" + "timeout server 50s" + ]; + + frontends = { + # Most interesting stuff will happen under this frontend, it's the main + # one on port 443. + "fe_multi_https" = [ + # We bind to all IP addresses, because the instance's private IP is + # subject to change so statically configuring it here would be + # fragile. + # + # We turn on ALPN ("application-layer protocol negotiation" [1]) + # because this is a required step to make HTTP 2.0 work, and helps + # with performance by avoiding a round-trip. + # + # It would be nice to have strict-sni turned on here, but we can't + # because the self-signed bootstrap cert has CN=example.com. + # + # [1] https://datatracker.ietf.org/doc/html/rfc7301 + ("bind :443 ssl alpn h2,http/1.2 " + + builtins.concatStringsSep " " + (map (name: "crt " + name + "/full.pem") [ + config.smalltech.domain + ])) + + # Since this traffic is on the HTTPS port, we override the default + # TCP mode. + "mode http" + "option httplog" + + # Some of the criteria we want to test are visible to HAProxy from + # the early traffic that the client sends, even without having sent + # any response beyond accepting the TCP connection. However, there's + # no guarantee that they will actually have been sent in timely + # fashion. By default, HAProxy would only look at what's been + # received so far, which means there's race conditions. By setting + # inspect-delay, we tell it to wait a little while before concluding + # it won't get what it's looking for. + "tcp-request inspect-delay 5s" + + # If the client appears to be sending valid TLS traffic, tell it we're + # here so we can move on to the tests that require back-and-forth, + # which is everything below this part. + "tcp-request content accept if { ssl_fc }" + + # Do not accept proxy headers from outside, they would allow an + # attacker to impersonate HAProxy. + # + # Someday perhaps we might wish to allow-list only the headers we + # specifically get value from, instead of having this deny-list, but + # that would require doing some research to figure out what those are + # exactly. This would also be a good place to implement a cookie + # firewall, if we ever want one. + # + # Please keep this list alphabetized. + "http-request del-header X-Forwarded-For" + "http-request del-header X-Forwarded-Host" + "http-request del-header X-Forwarded-Method" + "http-request del-header X-Forwarded-Proto" + "http-request del-header X-Forwarded-Uri" + "http-request del-header X-Real-IP" + + # HAProxy has built-in support for X-Forwarded-For so we use that, + # rather than set-header. This is used by Authelia, and is pretty + # widely used in general, so we set it here for all backends to have + # access to. + "option forwardfor" + + # TODO Authelia rewrite variables go here + + # TODO Authelia forwarding headers go here + + # We define ACLs here; they are used below. Multiple acl directives + # with the same name are or'd together. + # + # It is important to notice that these ACLs all rely on request-time + # information. Attempting to use them from a response-time directive + # will silently fail. This is the most fiddly issue in HAProxy + # configuration. This comment stands in witness of an hour wasted + # attempting to copy information from ACLs to a later phase; if you + # need to, good luck! + "acl is-acme path_beg /.well-known/acme-challenge/" + + # TODO define ACLs here + + # TODO execute the Authelia subrequest here + + # TODO perform the login redirect here + + # TODO refresh the Authelia cookie here + + # Path-based routing, as seen here, is for things that need to appear + # on every hostname we serve traffic on. + # + # Currently, that's just the directory where ACME challenge responses + # are served, which is necessary to make our LetsEncrypt certificates + # work. + # + # This needs to come before the login check (not the subrequest + # immediately below, but the auth-failed redirect further down), + # because we don't get to tell LetsEncrypt that it needs to log in. + # + # Note that this relies on the is-acme ACL, which is defined above. + "use_backend be_local_nginx if is-acme" + + # TODO path-and-domain routing goes here + + # Domain-based routing, as seen here, is for content that should + # appear only on a specific hostname, and should be the default + # thing on that hostname. That's most things. + # + # We do two types of domain-based routing: use_backend, and + # redirects. For ease of maintenance that's separated into two + # lists, so that it's easier to see at a glance that all the line + # items follow the same pattern without any weird inconsistencies + # that could lead to misbehavior. + # + # These ones are the backend specifications. Keep this list + # alphabetical by backend, and within that by hostname, first by + # domain then subdomain. + ("use_backend be_local_nginx " + + "if { req.hdr(host) -i ${config.smalltech.domain} }") + + # TODO redirects go here + + # In order to reduce the information an attacker could gather about + # our network topology, we redact the Server header from responses. + "http-response del-header ^Server:.*$" + ]; + + # We also need to listen on port 80, so we can redirect to port 443. + "fe_multi_http" = [ + # As with the port 443 frontend, we bind to all IP addresses. + "bind :80" + + # Also as with port 443, we use HTTP mode. + "mode http" + "option httplog" + + # Unconditionally redirect to HTTPS. This will apply to all domains + # we serve traffic for. It's a fiddly thing to do, which does + # interact with the ACME verification process, so be careful about + # changing it. + # + # We use 301 (moved permanently) as the response code. + "http-request redirect code 301 scheme https" + ]; + }; + + backends = { + "be_local_nginx" = [ + "mode http" + "server nginx 127.0.0.1:3080 maxconn 256" + ]; + }; + }; + }; +} diff --git a/services/frontend/web-server.nix b/services/frontend/web-server.nix new file mode 100644 index 0000000..b9fa868 --- /dev/null +++ b/services/frontend/web-server.nix @@ -0,0 +1,41 @@ +{ config, ... }: + +{ + # At present, the only thing we serve via nginx is the responses to ACME + # challenges, so that's the only thing configured. This interacts closely + # with the config in services/frontend/haproxy.nix, in that nginx is behind + # HAProxy and relies on HAProxy to route traffic to it, while HAProxy relies + # on nginx to handle the ACME stuff. + # + # This separated-out behavior is fiddly to set up the first time, but I have + # found it to be highly reliable once created. + services.nginx = { + enable = true; + + group = "frontend"; + + recommendedGzipSettings = true; + recommendedOptimisation = true; + recommendedProxySettings = true; + + virtualHosts = { + ${config.smalltech.domain} = { + serverName = config.smalltech.domain; + + listen = [ + { + addr = "127.0.0.1"; + port = 3080; + } + ]; + + locations."/.well-known/acme-challenge" = { + root = "/var/lib/acme/acme-challenge"; + extraConfig = '' + auth_basic off; + ''; + }; + }; + }; + }; +} -- cgit 1.4.1