From a8afb372dd28ae5e431f7f858d43f11e8c9dbe13 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Wed, 27 Jul 2022 15:39:46 +0200 Subject: [PATCH] more caching in-client --- cmd/main.go | 11 +++----- server/gitea/cache.go | 17 +++++++++++++ server/gitea/client.go | 5 ++++ server/gitea/client_fasthttp.go | 26 +++++++++---------- server/gitea/client_std.go | 45 +++++++++++++++++++++++++-------- server/handler.go | 18 ++++++------- server/handler_test.go | 2 -- server/try.go | 4 +-- server/upstream/const.go | 8 ------ server/upstream/helper.go | 27 +++----------------- server/upstream/upstream.go | 12 ++++----- 11 files changed, 95 insertions(+), 80 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index b11e446..b147306 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -76,13 +76,10 @@ func Serve(ctx *cli.Context) error { canonicalDomainCache := cache.NewKeyValueCache() // dnsLookupCache stores DNS lookups for custom domains dnsLookupCache := cache.NewKeyValueCache() - // branchTimestampCache stores branch timestamps for faster cache checking - branchTimestampCache := cache.NewKeyValueCache() - // fileResponseCache stores responses from the Gitea server - // TODO: make this an MRU cache with a size limit - fileResponseCache := cache.NewKeyValueCache() + // clientResponseCache stores responses from the Gitea server + clientResponseCache := cache.NewKeyValueCache() - giteaClient, err := gitea.NewClient(giteaRoot, giteaAPIToken, fileResponseCache, ctx.Bool("enable-symlink-support"), ctx.Bool("enable-lfs-support")) + giteaClient, err := gitea.NewClient(giteaRoot, giteaAPIToken, clientResponseCache, ctx.Bool("enable-symlink-support"), ctx.Bool("enable-lfs-support")) if err != nil { return fmt.Errorf("could not create new gitea client: %v", err) } @@ -92,7 +89,7 @@ func Serve(ctx *cli.Context) error { giteaClient, giteaRoot, rawInfoPage, BlacklistedPaths, allowedCorsDomains, - dnsLookupCache, canonicalDomainCache, branchTimestampCache, fileResponseCache) + dnsLookupCache, canonicalDomainCache) fastServer := server.SetupServer(handler) httpServer := server.SetupHTTPACMEChallengeServer(challengeCache) diff --git a/server/gitea/cache.go b/server/gitea/cache.go index 932ff3c..5b3fb7c 100644 --- a/server/gitea/cache.go +++ b/server/gitea/cache.go @@ -1,5 +1,7 @@ package gitea +import "time" + type FileResponse struct { Exists bool ETag []byte @@ -10,3 +12,18 @@ type FileResponse struct { func (f FileResponse) IsEmpty() bool { return len(f.Body) != 0 } + +type BranchTimestamp struct { + Branch string + Timestamp time.Time +} + +var ( + // defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long. + defaultBranchCacheTimeout = 15 * time.Minute + + // branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter + // than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be + // picked up faster, while still allowing the content to be cached longer if nothing changes. + branchExistenceCacheTimeout = 5 * time.Minute +) diff --git a/server/gitea/client.go b/server/gitea/client.go index 68782a8..6e9a9ec 100644 --- a/server/gitea/client.go +++ b/server/gitea/client.go @@ -5,3 +5,8 @@ import ( ) var ErrorNotFound = errors.New("not found") + +const ( + branchTimestampCacheKeyPrefix = "branchTime" + defaultBranchCacheKeyPrefix = "defaultBranch" +) diff --git a/server/gitea/client_fasthttp.go b/server/gitea/client_fasthttp.go index d2698e9..1ec97f3 100644 --- a/server/gitea/client_fasthttp.go +++ b/server/gitea/client_fasthttp.go @@ -21,28 +21,28 @@ const ( ) type Client struct { - giteaRoot string - giteaAPIToken string - infoTimeout time.Duration - contentTimeout time.Duration - fastClient *fasthttp.Client - fileResponseCache cache.SetGetKey + giteaRoot string + giteaAPIToken string + infoTimeout time.Duration + contentTimeout time.Duration + fastClient *fasthttp.Client + responseCache cache.SetGetKey followSymlinks bool supportLFS bool } -func NewClient(giteaRoot, giteaAPIToken string, fileResponseCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) { +func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) { rootURL, err := url.Parse(giteaRoot) giteaRoot = strings.Trim(rootURL.String(), "/") return &Client{ - giteaRoot: giteaRoot, - giteaAPIToken: giteaAPIToken, - infoTimeout: 5 * time.Second, - contentTimeout: 10 * time.Second, - fastClient: getFastHTTPClient(), - fileResponseCache: fileResponseCache, + giteaRoot: giteaRoot, + giteaAPIToken: giteaAPIToken, + infoTimeout: 5 * time.Second, + contentTimeout: 10 * time.Second, + fastClient: getFastHTTPClient(), + responseCache: respCache, followSymlinks: followSymlinks, supportLFS: supportLFS, diff --git a/server/gitea/client_std.go b/server/gitea/client_std.go index aa34390..9e66763 100644 --- a/server/gitea/client_std.go +++ b/server/gitea/client_std.go @@ -16,14 +16,14 @@ import ( ) type Client struct { - sdkClient *gitea.Client - fileResponseCache cache.SetGetKey + sdkClient *gitea.Client + responseCache cache.SetGetKey followSymlinks bool supportLFS bool } -func NewClient(giteaRoot, giteaAPIToken string, fileResponseCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) { +func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) { rootURL, err := url.Parse(giteaRoot) giteaRoot = strings.Trim(rootURL.String(), "/") @@ -31,8 +31,8 @@ func NewClient(giteaRoot, giteaAPIToken string, fileResponseCache cache.SetGetKe sdk, err := gitea.NewClient(giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(giteaAPIToken)) return &Client{ - sdkClient: sdk, - fileResponseCache: fileResponseCache, + sdkClient: sdk, + responseCache: respCache, }, err } @@ -68,18 +68,40 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str } } -func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (time.Time, error) { +func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) { + cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName) + + if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil { + return stamp.(*BranchTimestamp), nil + } + branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName) if err != nil { - return time.Time{}, err + if resp != nil && resp.StatusCode == http.StatusNotFound { + return &BranchTimestamp{}, ErrorNotFound + } + return &BranchTimestamp{}, err } if resp.StatusCode != http.StatusOK { - return time.Time{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) + return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } - return branch.Commit.Timestamp, nil + + stamp := &BranchTimestamp{ + Branch: branch.Name, + Timestamp: branch.Commit.Timestamp, + } + + client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout) + return stamp, nil } func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) { + cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName) + + if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil { + return branch.(string), nil + } + repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName) if err != nil { return "", err @@ -87,5 +109,8 @@ func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (str if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } - return repo.DefaultBranch, nil + + branch := repo.DefaultBranch + client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout) + return branch, nil } diff --git a/server/handler.go b/server/handler.go index 44c379a..2207035 100644 --- a/server/handler.go +++ b/server/handler.go @@ -24,7 +24,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaClient *gitea.Client, giteaRoot, rawInfoPage string, blacklistedPaths, allowedCorsDomains [][]byte, - dnsLookupCache, canonicalDomainCache, branchTimestampCache, fileResponseCache cache.SetGetKey, + dnsLookupCache, canonicalDomainCache cache.SetGetKey, ) func(ctx *fasthttp.RequestCtx) { return func(ctx *fasthttp.RequestCtx) { log := log.With().Str("Handler", string(ctx.Request.Header.RequestURI())).Logger() @@ -96,7 +96,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, branch = strings.ReplaceAll(branch, "~", "/") // Check if the branch exists, otherwise treat it as a file path - branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch, branchTimestampCache) + branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch) if branchTimestampResult == nil { log.Debug().Msg("tryBranch: branch doesn't exist") return false @@ -153,7 +153,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 1") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) return } log.Debug().Msg("missing branch") @@ -169,7 +169,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 2") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) return } else if bytes.HasSuffix(trimmedHost, mainDomainSuffix) { @@ -204,7 +204,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 3") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) } else { html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) } @@ -220,7 +220,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 4") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) } else { html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) } @@ -236,7 +236,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 5") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) return } @@ -248,7 +248,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 6") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) return } @@ -296,7 +296,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, log.Debug().Msg("tryBranch, now trying upstream 7") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache, branchTimestampCache, fileResponseCache) + canonicalDomainCache) return } diff --git a/server/handler_test.go b/server/handler_test.go index a2bc6d7..8452dc8 100644 --- a/server/handler_test.go +++ b/server/handler_test.go @@ -24,8 +24,6 @@ func TestHandlerPerformance(t *testing.T) { [][]byte{[]byte("raw.codeberg.org"), []byte("fonts.codeberg.org"), []byte("design.codeberg.org")}, cache.NewKeyValueCache(), cache.NewKeyValueCache(), - cache.NewKeyValueCache(), - cache.NewKeyValueCache(), ) testCase := func(uri string, status int) { diff --git a/server/try.go b/server/try.go index 4f3ea1f..3f2d7dc 100644 --- a/server/try.go +++ b/server/try.go @@ -21,7 +21,7 @@ func tryUpstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, targetOptions *upstream.Options, targetOwner, targetRepo, targetBranch, targetPath string, - canonicalDomainCache, branchTimestampCache, fileResponseCache cache.SetGetKey, + canonicalDomainCache cache.SetGetKey, ) { // check if a canonical domain exists on a request on MainDomain if bytes.HasSuffix(trimmedHost, mainDomainSuffix) { @@ -45,7 +45,7 @@ func tryUpstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, targetOptions.TargetPath = targetPath // Try to request the file from the Gitea API - if !targetOptions.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) { + if !targetOptions.Upstream(ctx, giteaClient, branchTimestampCache) { html.ReturnErrorPage(ctx, ctx.Response.StatusCode()) } } diff --git a/server/upstream/const.go b/server/upstream/const.go index 247e1d1..bdb123b 100644 --- a/server/upstream/const.go +++ b/server/upstream/const.go @@ -2,14 +2,6 @@ package upstream import "time" -// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long. -var defaultBranchCacheTimeout = 15 * time.Minute - -// branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter -// than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be -// picked up faster, while still allowing the content to be cached longer if nothing changes. -var branchExistenceCacheTimeout = 5 * time.Minute - // fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending // on your available memory. // TODO: move as option into cache interface diff --git a/server/upstream/helper.go b/server/upstream/helper.go index 0714dcd..12eda80 100644 --- a/server/upstream/helper.go +++ b/server/upstream/helper.go @@ -5,46 +5,27 @@ import ( "path" "strconv" "strings" - "time" - "codeberg.org/codeberg/pages/server/cache" "codeberg.org/codeberg/pages/server/gitea" ) -type branchTimestamp struct { - Branch string - Timestamp time.Time -} - // GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch // (or nil if the branch doesn't exist) -func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string, branchTimestampCache cache.SetGetKey) *branchTimestamp { - if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok { - if result == nil { - return nil - } - return result.(*branchTimestamp) - } - result := &branchTimestamp{ - Branch: branch, - } +func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string) *gitea.BranchTimestamp { if len(branch) == 0 { // Get default branch defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(owner, repo) if err != nil { - _ = branchTimestampCache.Set(owner+"/"+repo+"/", nil, defaultBranchCacheTimeout) return nil } - result.Branch = defaultBranch + branch = defaultBranch } - timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, result.Branch) + timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, branch) if err != nil { return nil } - result.Timestamp = timestamp - _ = branchTimestampCache.Set(owner+"/"+repo+"/"+branch, result, branchExistenceCacheTimeout) - return result + return timestamp } func (o *Options) getMimeTypeByExtension() string { diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go index 6ca7b4c..e647671 100644 --- a/server/upstream/upstream.go +++ b/server/upstream/upstream.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "net/http" "strings" "time" @@ -12,7 +13,6 @@ import ( "github.com/valyala/fasthttp" "codeberg.org/codeberg/pages/html" - "codeberg.org/codeberg/pages/server/cache" "codeberg.org/codeberg/pages/server/gitea" ) @@ -43,12 +43,12 @@ type Options struct { } // Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. -func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, branchTimestampCache, fileResponseCache cache.SetGetKey) (final bool) { +func (o *Options) Upstream(ctx *http.Response, giteaClient *gitea.Client) (final bool) { log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger() // Check if the branch exists and when it was modified if o.BranchTimestamp.IsZero() { - branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch, branchTimestampCache) + branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch) if branch == nil { html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) @@ -93,7 +93,7 @@ func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, optionsForIndexPages.appendTrailingSlash = true for _, indexPage := range upstreamIndexPages { optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage - if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) { + if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache) { _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{ Exists: false, }, fileCacheTimeout) @@ -104,7 +104,7 @@ func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, optionsForIndexPages.appendTrailingSlash = false optionsForIndexPages.redirectIfExists = strings.TrimSuffix(string(ctx.Request.URI().Path()), "/") + ".html" optionsForIndexPages.TargetPath = o.TargetPath + ".html" - if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) { + if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache) { _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{ Exists: false, }, fileCacheTimeout) @@ -119,7 +119,7 @@ func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, optionsForNotFoundPages.appendTrailingSlash = false for _, notFoundPage := range upstreamNotFoundPages { optionsForNotFoundPages.TargetPath = "/" + notFoundPage - if optionsForNotFoundPages.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) { + if optionsForNotFoundPages.Upstream(ctx, giteaClient, branchTimestampCache) { _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{ Exists: false, }, fileCacheTimeout)