pages-server/server/certificates/certificates.go

411 lines
14 KiB
Go
Raw Permalink Normal View History

2021-12-05 14:21:05 +00:00
package certificates
2021-03-16 23:34:31 +00:00
import (
2021-12-05 17:26:54 +00:00
"context"
2021-03-16 23:34:31 +00:00
"crypto/tls"
"crypto/x509"
"errors"
2021-12-05 22:20:34 +00:00
"fmt"
"strconv"
"strings"
"time"
"github.com/go-acme/lego/v4/certcrypto"
2021-12-03 02:44:21 +00:00
"github.com/go-acme/lego/v4/certificate"
"github.com/go-acme/lego/v4/challenge/tlsalpn01"
"github.com/go-acme/lego/v4/lego"
"github.com/hashicorp/golang-lru/v2/expirable"
2021-12-05 15:33:56 +00:00
"github.com/reugn/equalizer"
"github.com/rs/zerolog/log"
2021-12-03 02:44:21 +00:00
2021-12-03 03:15:48 +00:00
"codeberg.org/codeberg/pages/server/cache"
2021-12-03 02:44:21 +00:00
"codeberg.org/codeberg/pages/server/database"
2021-12-05 14:21:05 +00:00
dnsutils "codeberg.org/codeberg/pages/server/dns"
"codeberg.org/codeberg/pages/server/gitea"
2021-12-05 14:21:05 +00:00
"codeberg.org/codeberg/pages/server/upstream"
2021-03-16 23:34:31 +00:00
)
var ErrUserRateLimitExceeded = errors.New("rate limit exceeded: 10 certificates per user per 24 hours")
2021-12-03 03:15:48 +00:00
// TLSConfig returns the configuration for generating, serving and cleaning up Let's Encrypt certificates.
func TLSConfig(mainDomainSuffix string,
giteaClient *gitea.Client,
acmeClient *AcmeClient,
firstDefaultBranch string,
challengeCache, canonicalDomainCache cache.ICache,
certDB database.CertDB,
noDNS01 bool,
rawDomain string,
) *tls.Config {
// every cert is at most 24h in the cache and 7 days before expiry the cert is renewed
keyCache := expirable.NewLRU[string, *tls.Certificate](32, nil, 24*time.Hour)
2021-12-05 13:45:17 +00:00
return &tls.Config{
// check DNS name & get certificate from Let's Encrypt
GetCertificate: func(info *tls.ClientHelloInfo) (*tls.Certificate, error) {
domain := strings.ToLower(strings.TrimSpace(info.ServerName))
if len(domain) < 1 {
return nil, errors.New("missing domain info via SNI (RFC 4366, Section 3.1)")
2021-12-05 13:45:17 +00:00
}
// https request init is actually a acme challenge
2021-12-05 13:45:17 +00:00
if info.SupportedProtos != nil {
for _, proto := range info.SupportedProtos {
if proto != tlsalpn01.ACMETLS1Protocol {
continue
}
log.Info().Msgf("Detect ACME-TLS1 challenge for '%s'", domain)
challenge, ok := challengeCache.Get(domain)
if !ok {
return nil, errors.New("no challenge for this domain")
}
cert, err := tlsalpn01.ChallengeCert(domain, challenge.(string))
if err != nil {
return nil, err
}
return cert, nil
}
}
2021-12-05 13:45:17 +00:00
targetOwner := ""
Allow to use certificate even if domain validation fails (#160) - Currently if the canonical domain validations fails(either for legitimate reasons or for bug reasons like the request to Gitea/Forgejo failing) it will use main domain certificate, which in the case for custom domains will warrant a security error as the certificate isn't issued to the custom domain. - This patch handles this situation more gracefully and instead only disallow obtaining a certificate if the domain validation fails, so in the case that a certificate still exists it can still be used even if the canonical domain validation fails. There's a small side effect, legitimate users that remove domains from `.domain` will still be able to use the removed domain(as long as the DNS records exists) as long as the certificate currently hold by pages-server isn't expired. - Given the increased usage in custom domains that are resulting in errors, I think it ways more than the side effect. - In order to future-proof against future slowdowns of instances, add a retry mechanism to the domain validation function, such that it's more likely to succeed even if the instance is not responding. - Refactor the code a bit and add some comments. Co-authored-by: Gusted <postmaster@gusted.xyz> Co-authored-by: 6543 <6543@obermui.de> Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/160 Reviewed-by: 6543 <6543@obermui.de> Co-authored-by: Gusted <gusted@noreply.codeberg.org> Co-committed-by: Gusted <gusted@noreply.codeberg.org>
2023-02-10 01:38:15 +00:00
mayObtainCert := true
if strings.HasSuffix(domain, mainDomainSuffix) || strings.EqualFold(domain, mainDomainSuffix[1:]) {
if noDNS01 {
// Limit the domains allowed to request a certificate to pages-server domains
// and domains for an existing user of org
if !strings.EqualFold(domain, mainDomainSuffix[1:]) && !strings.EqualFold(domain, rawDomain) {
targetOwner := strings.TrimSuffix(domain, mainDomainSuffix)
owner_exist, err := giteaClient.GiteaCheckIfOwnerExists(targetOwner)
mayObtainCert = owner_exist
if err != nil {
log.Error().Err(err).Msgf("Failed to check '%s' existence on the forge: %s", targetOwner, err)
mayObtainCert = false
}
}
} else {
// deliver default certificate for the main domain (*.codeberg.page)
domain = mainDomainSuffix
}
} else {
2021-12-05 13:45:17 +00:00
var targetRepo, targetBranch string
targetOwner, targetRepo, targetBranch = dnsutils.GetTargetFromDNS(domain, mainDomainSuffix, firstDefaultBranch)
2021-12-05 13:45:17 +00:00
if targetOwner == "" {
// DNS not set up, return main certificate to redirect to the docs
domain = mainDomainSuffix
2021-12-05 13:45:17 +00:00
} else {
targetOpt := &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: targetRepo,
TargetBranch: targetBranch,
}
_, valid := targetOpt.CheckCanonicalDomain(giteaClient, domain, mainDomainSuffix, canonicalDomainCache)
2021-12-05 13:45:17 +00:00
if !valid {
Allow to use certificate even if domain validation fails (#160) - Currently if the canonical domain validations fails(either for legitimate reasons or for bug reasons like the request to Gitea/Forgejo failing) it will use main domain certificate, which in the case for custom domains will warrant a security error as the certificate isn't issued to the custom domain. - This patch handles this situation more gracefully and instead only disallow obtaining a certificate if the domain validation fails, so in the case that a certificate still exists it can still be used even if the canonical domain validation fails. There's a small side effect, legitimate users that remove domains from `.domain` will still be able to use the removed domain(as long as the DNS records exists) as long as the certificate currently hold by pages-server isn't expired. - Given the increased usage in custom domains that are resulting in errors, I think it ways more than the side effect. - In order to future-proof against future slowdowns of instances, add a retry mechanism to the domain validation function, such that it's more likely to succeed even if the instance is not responding. - Refactor the code a bit and add some comments. Co-authored-by: Gusted <postmaster@gusted.xyz> Co-authored-by: 6543 <6543@obermui.de> Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/160 Reviewed-by: 6543 <6543@obermui.de> Co-authored-by: Gusted <gusted@noreply.codeberg.org> Co-committed-by: Gusted <gusted@noreply.codeberg.org>
2023-02-10 01:38:15 +00:00
// We shouldn't obtain a certificate when we cannot check if the
// repository has specified this domain in the `.domains` file.
mayObtainCert = false
2021-12-05 13:45:17 +00:00
}
}
}
if tlsCertificate, ok := keyCache.Get(domain); ok {
2021-12-05 13:45:17 +00:00
// we can use an existing certificate object
return tlsCertificate, nil
2021-12-05 13:45:17 +00:00
}
var tlsCertificate *tls.Certificate
2021-12-05 13:45:17 +00:00
var err error
if tlsCertificate, err = acmeClient.retrieveCertFromDB(domain, mainDomainSuffix, false, certDB); err != nil {
if !errors.Is(err, database.ErrNotFound) {
return nil, err
2021-12-05 13:45:17 +00:00
}
// we could not find a cert in db, request a new certificate
// first check if we are allowed to obtain a cert for this domain
if strings.EqualFold(domain, mainDomainSuffix) {
return nil, errors.New("won't request certificate for main domain, something really bad has happened")
}
Allow to use certificate even if domain validation fails (#160) - Currently if the canonical domain validations fails(either for legitimate reasons or for bug reasons like the request to Gitea/Forgejo failing) it will use main domain certificate, which in the case for custom domains will warrant a security error as the certificate isn't issued to the custom domain. - This patch handles this situation more gracefully and instead only disallow obtaining a certificate if the domain validation fails, so in the case that a certificate still exists it can still be used even if the canonical domain validation fails. There's a small side effect, legitimate users that remove domains from `.domain` will still be able to use the removed domain(as long as the DNS records exists) as long as the certificate currently hold by pages-server isn't expired. - Given the increased usage in custom domains that are resulting in errors, I think it ways more than the side effect. - In order to future-proof against future slowdowns of instances, add a retry mechanism to the domain validation function, such that it's more likely to succeed even if the instance is not responding. - Refactor the code a bit and add some comments. Co-authored-by: Gusted <postmaster@gusted.xyz> Co-authored-by: 6543 <6543@obermui.de> Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/160 Reviewed-by: 6543 <6543@obermui.de> Co-authored-by: Gusted <gusted@noreply.codeberg.org> Co-committed-by: Gusted <gusted@noreply.codeberg.org>
2023-02-10 01:38:15 +00:00
if !mayObtainCert {
return nil, fmt.Errorf("won't request certificate for %q", domain)
Allow to use certificate even if domain validation fails (#160) - Currently if the canonical domain validations fails(either for legitimate reasons or for bug reasons like the request to Gitea/Forgejo failing) it will use main domain certificate, which in the case for custom domains will warrant a security error as the certificate isn't issued to the custom domain. - This patch handles this situation more gracefully and instead only disallow obtaining a certificate if the domain validation fails, so in the case that a certificate still exists it can still be used even if the canonical domain validation fails. There's a small side effect, legitimate users that remove domains from `.domain` will still be able to use the removed domain(as long as the DNS records exists) as long as the certificate currently hold by pages-server isn't expired. - Given the increased usage in custom domains that are resulting in errors, I think it ways more than the side effect. - In order to future-proof against future slowdowns of instances, add a retry mechanism to the domain validation function, such that it's more likely to succeed even if the instance is not responding. - Refactor the code a bit and add some comments. Co-authored-by: Gusted <postmaster@gusted.xyz> Co-authored-by: 6543 <6543@obermui.de> Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/160 Reviewed-by: 6543 <6543@obermui.de> Co-authored-by: Gusted <gusted@noreply.codeberg.org> Co-committed-by: Gusted <gusted@noreply.codeberg.org>
2023-02-10 01:38:15 +00:00
}
tlsCertificate, err = acmeClient.obtainCert(acmeClient.legoClient, []string{domain}, nil, targetOwner, false, mainDomainSuffix, certDB)
2021-12-05 13:45:17 +00:00
if err != nil {
return nil, err
}
}
keyCache.Add(domain, tlsCertificate)
return tlsCertificate, nil
2021-12-05 13:45:17 +00:00
},
NextProtos: []string{
"h2",
2021-12-05 13:45:17 +00:00
"http/1.1",
tlsalpn01.ACMETLS1Protocol,
},
2021-12-05 13:45:17 +00:00
// generated 2021-07-13, Mozilla Guideline v5.6, Go 1.14.4, intermediate configuration
// https://ssl-config.mozilla.org/#server=go&version=1.14.4&config=intermediate&guideline=5.6
MinVersion: tls.VersionTLS12,
CipherSuites: []uint16{
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,
tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,
},
}
}
func (c *AcmeClient) checkUserLimit(user string) error {
userLimit, ok := c.acmeClientCertificateLimitPerUser[user]
if !ok {
// Each user can only add 10 new domains per day.
2021-11-25 15:12:28 +00:00
userLimit = equalizer.NewTokenBucket(10, time.Hour*24)
c.acmeClientCertificateLimitPerUser[user] = userLimit
}
if !userLimit.Ask() {
return fmt.Errorf("user '%s' error: %w", user, ErrUserRateLimitExceeded)
}
return nil
}
func (c *AcmeClient) retrieveCertFromDB(sni, mainDomainSuffix string, useDnsProvider bool, certDB database.CertDB) (*tls.Certificate, error) {
// parse certificate from database
res, err := certDB.Get(sni)
2021-12-05 18:00:57 +00:00
if err != nil {
return nil, err
} else if res == nil {
return nil, database.ErrNotFound
}
tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey)
if err != nil {
return nil, err
}
2021-12-02 18:12:45 +00:00
// TODO: document & put into own function
if !strings.EqualFold(sni, mainDomainSuffix) {
tlsCertificate.Leaf, err = leaf(&tlsCertificate)
if err != nil {
return nil, err
}
// renew certificates 7 days before they expire
if tlsCertificate.Leaf.NotAfter.Before(time.Now().Add(7 * 24 * time.Hour)) {
// TODO: use ValidTill of custom cert struct
Implement static serving of compressed files (#387) This provides an option for #223 without fully resolving it. (I think.) Essentially, it acts very similar to the `gzip_static` and similar options for nginx, where it will check for the existence of pre-compressed files and serve those instead if the client allows it. I couldn't find a pre-existing way to actually parse the Accept-Encoding header properly (admittedly didn't look very hard) and just implemented one on my own that should be fine. This should hopefully not have the same DOS vulnerabilities as #302, since it relies on the existing caching system. Compressed versions of files will be cached just like any other files, and that includes cache for missing files as well. The compressed files will also be accessible directly, and this won't automatically decompress them. So, if you have a `tar.gz` file that you access directly, it will still be downloaded as the gzipped version, although you will now gain the option to download the `.tar` directly and decompress it in transit. (Which doesn't affect the server at all, just the client's way of interpreting it.) ---- One key thing this change also adds is a short-circuit when accessing directories: these always return 404 via the API, although they'd try the cache anyway and go through that route, which was kind of slow. Adding in the additional encodings, it's going to try for .gz, .br, and .zst files in the worst case as well, which feels wrong. So, instead, it just always falls back to the index-check behaviour if the path ends in a slash or is empty. (Which is implicitly just a slash.) ---- For testing, I set up this repo: https://codeberg.org/clarfonthey/testrepo I ended up realising that LFS wasn't supported by default with `just dev`, so, it ended up working until I made sure the files on the repo *didn't* use LFS. Assuming you've run `just dev`, you can go directly to this page in the browser here: https://clarfonthey.localhost.mock.directory:4430/testrepo/ And also you can try a few cURL commands: ```shell curl https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure curl -H 'Accept-Encoding: gz' https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure | gunzip - curl -H 'Accept-Encoding: br' https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure | brotli --decompress - curl -H 'Accept-Encoding: zst' https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure | zstd --decompress - ``` Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/387 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Co-authored-by: ltdk <usr@ltdk.xyz> Co-committed-by: ltdk <usr@ltdk.xyz>
2024-09-29 21:00:54 +00:00
if len(res.CSR) > 0 {
// CSR stores the time when the renewal shall be tried again
nextTryUnix, err := strconv.ParseInt(string(res.CSR), 10, 64)
if err == nil && time.Now().Before(time.Unix(nextTryUnix, 0)) {
return &tlsCertificate, nil
}
}
// TODO: make a queue ?
go (func() {
res.CSR = nil // acme client doesn't like CSR to be set
if _, err := c.obtainCert(c.legoClient, []string{sni}, res, "", useDnsProvider, mainDomainSuffix, certDB); err != nil {
log.Error().Msgf("Couldn't renew certificate for %s: %v", sni, err)
}
})()
}
}
return &tlsCertificate, nil
}
func (c *AcmeClient) obtainCert(acmeClient *lego.Client, domains []string, renew *certificate.Resource, user string, useDnsProvider bool, mainDomainSuffix string, keyDatabase database.CertDB) (*tls.Certificate, error) {
name := strings.TrimPrefix(domains[0], "*")
// lock to avoid simultaneous requests
_, working := c.obtainLocks.LoadOrStore(name, struct{}{})
if working {
for working {
time.Sleep(100 * time.Millisecond)
_, working = c.obtainLocks.Load(name)
}
cert, err := c.retrieveCertFromDB(name, mainDomainSuffix, useDnsProvider, keyDatabase)
if err != nil {
return nil, fmt.Errorf("certificate failed in synchronous request: %w", err)
}
return cert, nil
}
defer c.obtainLocks.Delete(name)
if acmeClient == nil {
if useDnsProvider {
return mockCert(domains[0], "DNS ACME client is not defined", mainDomainSuffix, keyDatabase)
} else {
return mockCert(domains[0], "ACME client uninitialized. This is a server error, please report!", mainDomainSuffix, keyDatabase)
}
}
// request actual cert
var res *certificate.Resource
var err error
if renew != nil && renew.CertURL != "" {
if c.acmeUseRateLimits {
c.acmeClientRequestLimit.Take()
}
log.Debug().Msgf("Renewing certificate for: %v", domains)
res, err = acmeClient.Certificate.Renew(*renew, true, false, "")
if err != nil {
log.Error().Err(err).Msgf("Couldn't renew certificate for %v, trying to request a new one", domains)
if c.acmeUseRateLimits {
c.acmeClientFailLimit.Take()
}
res = nil
}
}
if res == nil {
if user != "" {
if err := c.checkUserLimit(user); err != nil {
return nil, err
}
}
if c.acmeUseRateLimits {
c.acmeClientOrderLimit.Take()
c.acmeClientRequestLimit.Take()
}
log.Debug().Msgf("Re-requesting new certificate for %v", domains)
res, err = acmeClient.Certificate.Obtain(certificate.ObtainRequest{
Domains: domains,
Bundle: true,
MustStaple: false,
})
if c.acmeUseRateLimits && err != nil {
c.acmeClientFailLimit.Take()
}
}
if err != nil {
log.Error().Err(err).Msgf("Couldn't obtain again a certificate or %v", domains)
if renew != nil && renew.CertURL != "" {
tlsCertificate, err := tls.X509KeyPair(renew.Certificate, renew.PrivateKey)
if err != nil {
mockC, err2 := mockCert(domains[0], err.Error(), mainDomainSuffix, keyDatabase)
if err2 != nil {
return nil, errors.Join(err, err2)
}
return mockC, err
}
leaf, err := leaf(&tlsCertificate)
if err == nil && leaf.NotAfter.After(time.Now()) {
tlsCertificate.Leaf = leaf
// avoid sending a mock cert instead of a still valid cert, instead abuse CSR field to store time to try again at
2021-12-01 21:59:52 +00:00
renew.CSR = []byte(strconv.FormatInt(time.Now().Add(6*time.Hour).Unix(), 10))
2021-12-05 18:00:57 +00:00
if err := keyDatabase.Put(name, renew); err != nil {
mockC, err2 := mockCert(domains[0], err.Error(), mainDomainSuffix, keyDatabase)
if err2 != nil {
return nil, errors.Join(err, err2)
}
return mockC, err
2021-12-05 18:00:57 +00:00
}
return &tlsCertificate, nil
}
}
return mockCert(domains[0], err.Error(), mainDomainSuffix, keyDatabase)
}
log.Debug().Msgf("Obtained certificate for %v", domains)
2021-12-05 18:00:57 +00:00
if err := keyDatabase.Put(name, res); err != nil {
return nil, err
2021-12-05 18:00:57 +00:00
}
tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey)
if err != nil {
return nil, err
}
return &tlsCertificate, nil
}
func SetupMainDomainCertificates(mainDomainSuffix string, acmeClient *AcmeClient, certDB database.CertDB) error {
2021-12-05 22:20:34 +00:00
// getting main cert before ACME account so that we can fail here without hitting rate limits
mainCertBytes, err := certDB.Get(mainDomainSuffix)
if err != nil && !errors.Is(err, database.ErrNotFound) {
return fmt.Errorf("cert database is not working: %w", err)
}
if mainCertBytes == nil {
_, err = acmeClient.obtainCert(acmeClient.dnsChallengerLegoClient, []string{"*" + mainDomainSuffix, mainDomainSuffix[1:]}, nil, "", true, mainDomainSuffix, certDB)
if err != nil {
log.Error().Err(err).Msg("Couldn't renew main domain certificate, continuing with mock certs only")
}
}
2021-12-05 22:20:34 +00:00
return nil
}
func MaintainCertDB(ctx context.Context, interval time.Duration, acmeClient *AcmeClient, mainDomainSuffix string, certDB database.CertDB) {
for {
// delete expired certs that will be invalid until next clean up
threshold := time.Now().Add(interval)
expiredCertCount := 0
certs, err := certDB.Items(0, 0)
if err != nil {
log.Error().Err(err).Msg("could not get certs from list")
} else {
for _, cert := range certs {
if !strings.EqualFold(cert.Domain, strings.TrimPrefix(mainDomainSuffix, ".")) {
if time.Unix(cert.ValidTill, 0).Before(threshold) {
err := certDB.Delete(cert.Domain)
if err != nil {
log.Error().Err(err).Msgf("Deleting expired certificate for %q failed", cert.Domain)
} else {
expiredCertCount++
}
}
}
}
log.Debug().Msgf("Removed %d expired certificates from the database", expiredCertCount)
}
// update main cert
res, err := certDB.Get(mainDomainSuffix)
2021-12-05 18:00:57 +00:00
if err != nil {
log.Error().Msgf("Couldn't get cert for domain %q", mainDomainSuffix)
2021-12-05 18:00:57 +00:00
} else if res == nil {
log.Error().Msgf("Couldn't renew certificate for main domain %q expected main domain cert to exist, but it's missing - seems like the database is corrupted", mainDomainSuffix)
} else {
tlsCertificates, err := certcrypto.ParsePEMBundle(res.Certificate)
if err != nil {
log.Error().Err(fmt.Errorf("could not parse cert for mainDomainSuffix: %w", err))
} else if tlsCertificates[0].NotAfter.Before(time.Now().Add(30 * 24 * time.Hour)) {
// renew main certificate 30 days before it expires
go (func() {
_, err = acmeClient.obtainCert(acmeClient.dnsChallengerLegoClient, []string{"*" + mainDomainSuffix, mainDomainSuffix[1:]}, res, "", true, mainDomainSuffix, certDB)
if err != nil {
log.Error().Err(err).Msg("Couldn't renew certificate for main domain")
}
})()
}
}
2021-12-05 17:26:54 +00:00
select {
case <-ctx.Done():
return
case <-time.After(interval):
}
}
}
// leaf returns the parsed leaf certificate, either from c.Leaf or by parsing
// the corresponding c.Certificate[0].
// After successfully parsing the cert c.Leaf gets set to the parsed cert.
func leaf(c *tls.Certificate) (*x509.Certificate, error) {
if c.Leaf != nil {
return c.Leaf, nil
}
leaf, err := x509.ParseCertificate(c.Certificate[0])
if err != nil {
return nil, fmt.Errorf("tlsCert - failed to parse leaf: %w", err)
}
c.Leaf = leaf
return leaf, err
}