more caching in-client

This commit is contained in:
6543 2022-07-27 15:39:46 +02:00
parent b2b22872a2
commit a8afb372dd
No known key found for this signature in database
GPG key ID: C99B82E40B027BAE
11 changed files with 95 additions and 80 deletions

View file

@ -76,13 +76,10 @@ func Serve(ctx *cli.Context) error {
canonicalDomainCache := cache.NewKeyValueCache()
// dnsLookupCache stores DNS lookups for custom domains
dnsLookupCache := cache.NewKeyValueCache()
// branchTimestampCache stores branch timestamps for faster cache checking
branchTimestampCache := cache.NewKeyValueCache()
// fileResponseCache stores responses from the Gitea server
// TODO: make this an MRU cache with a size limit
fileResponseCache := cache.NewKeyValueCache()
// clientResponseCache stores responses from the Gitea server
clientResponseCache := cache.NewKeyValueCache()
giteaClient, err := gitea.NewClient(giteaRoot, giteaAPIToken, fileResponseCache, ctx.Bool("enable-symlink-support"), ctx.Bool("enable-lfs-support"))
giteaClient, err := gitea.NewClient(giteaRoot, giteaAPIToken, clientResponseCache, ctx.Bool("enable-symlink-support"), ctx.Bool("enable-lfs-support"))
if err != nil {
return fmt.Errorf("could not create new gitea client: %v", err)
}
@ -92,7 +89,7 @@ func Serve(ctx *cli.Context) error {
giteaClient,
giteaRoot, rawInfoPage,
BlacklistedPaths, allowedCorsDomains,
dnsLookupCache, canonicalDomainCache, branchTimestampCache, fileResponseCache)
dnsLookupCache, canonicalDomainCache)
fastServer := server.SetupServer(handler)
httpServer := server.SetupHTTPACMEChallengeServer(challengeCache)

View file

@ -1,5 +1,7 @@
package gitea
import "time"
type FileResponse struct {
Exists bool
ETag []byte
@ -10,3 +12,18 @@ type FileResponse struct {
func (f FileResponse) IsEmpty() bool {
return len(f.Body) != 0
}
type BranchTimestamp struct {
Branch string
Timestamp time.Time
}
var (
// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long.
defaultBranchCacheTimeout = 15 * time.Minute
// branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter
// than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be
// picked up faster, while still allowing the content to be cached longer if nothing changes.
branchExistenceCacheTimeout = 5 * time.Minute
)

View file

@ -5,3 +5,8 @@ import (
)
var ErrorNotFound = errors.New("not found")
const (
branchTimestampCacheKeyPrefix = "branchTime"
defaultBranchCacheKeyPrefix = "defaultBranch"
)

View file

@ -21,28 +21,28 @@ const (
)
type Client struct {
giteaRoot string
giteaAPIToken string
infoTimeout time.Duration
contentTimeout time.Duration
fastClient *fasthttp.Client
fileResponseCache cache.SetGetKey
giteaRoot string
giteaAPIToken string
infoTimeout time.Duration
contentTimeout time.Duration
fastClient *fasthttp.Client
responseCache cache.SetGetKey
followSymlinks bool
supportLFS bool
}
func NewClient(giteaRoot, giteaAPIToken string, fileResponseCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) {
func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) {
rootURL, err := url.Parse(giteaRoot)
giteaRoot = strings.Trim(rootURL.String(), "/")
return &Client{
giteaRoot: giteaRoot,
giteaAPIToken: giteaAPIToken,
infoTimeout: 5 * time.Second,
contentTimeout: 10 * time.Second,
fastClient: getFastHTTPClient(),
fileResponseCache: fileResponseCache,
giteaRoot: giteaRoot,
giteaAPIToken: giteaAPIToken,
infoTimeout: 5 * time.Second,
contentTimeout: 10 * time.Second,
fastClient: getFastHTTPClient(),
responseCache: respCache,
followSymlinks: followSymlinks,
supportLFS: supportLFS,

View file

@ -16,14 +16,14 @@ import (
)
type Client struct {
sdkClient *gitea.Client
fileResponseCache cache.SetGetKey
sdkClient *gitea.Client
responseCache cache.SetGetKey
followSymlinks bool
supportLFS bool
}
func NewClient(giteaRoot, giteaAPIToken string, fileResponseCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) {
func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) {
rootURL, err := url.Parse(giteaRoot)
giteaRoot = strings.Trim(rootURL.String(), "/")
@ -31,8 +31,8 @@ func NewClient(giteaRoot, giteaAPIToken string, fileResponseCache cache.SetGetKe
sdk, err := gitea.NewClient(giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(giteaAPIToken))
return &Client{
sdkClient: sdk,
fileResponseCache: fileResponseCache,
sdkClient: sdk,
responseCache: respCache,
}, err
}
@ -68,18 +68,40 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str
}
}
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (time.Time, error) {
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) {
cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName)
if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil {
return stamp.(*BranchTimestamp), nil
}
branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName)
if err != nil {
return time.Time{}, err
if resp != nil && resp.StatusCode == http.StatusNotFound {
return &BranchTimestamp{}, ErrorNotFound
}
return &BranchTimestamp{}, err
}
if resp.StatusCode != http.StatusOK {
return time.Time{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
return branch.Commit.Timestamp, nil
stamp := &BranchTimestamp{
Branch: branch.Name,
Timestamp: branch.Commit.Timestamp,
}
client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout)
return stamp, nil
}
func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) {
cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName)
if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil {
return branch.(string), nil
}
repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName)
if err != nil {
return "", err
@ -87,5 +109,8 @@ func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (str
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
return repo.DefaultBranch, nil
branch := repo.DefaultBranch
client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout)
return branch, nil
}

View file

@ -24,7 +24,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
giteaClient *gitea.Client,
giteaRoot, rawInfoPage string,
blacklistedPaths, allowedCorsDomains [][]byte,
dnsLookupCache, canonicalDomainCache, branchTimestampCache, fileResponseCache cache.SetGetKey,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
) func(ctx *fasthttp.RequestCtx) {
return func(ctx *fasthttp.RequestCtx) {
log := log.With().Str("Handler", string(ctx.Request.Header.RequestURI())).Logger()
@ -96,7 +96,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
branch = strings.ReplaceAll(branch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch, branchTimestampCache)
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch)
if branchTimestampResult == nil {
log.Debug().Msg("tryBranch: branch doesn't exist")
return false
@ -153,7 +153,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 1")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
return
}
log.Debug().Msg("missing branch")
@ -169,7 +169,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 2")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
return
} else if bytes.HasSuffix(trimmedHost, mainDomainSuffix) {
@ -204,7 +204,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 3")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency)
}
@ -220,7 +220,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 4")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency)
}
@ -236,7 +236,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
return
}
@ -248,7 +248,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
return
}
@ -296,7 +296,7 @@ func Handler(mainDomainSuffix, rawDomain []byte,
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache, branchTimestampCache, fileResponseCache)
canonicalDomainCache)
return
}

View file

@ -24,8 +24,6 @@ func TestHandlerPerformance(t *testing.T) {
[][]byte{[]byte("raw.codeberg.org"), []byte("fonts.codeberg.org"), []byte("design.codeberg.org")},
cache.NewKeyValueCache(),
cache.NewKeyValueCache(),
cache.NewKeyValueCache(),
cache.NewKeyValueCache(),
)
testCase := func(uri string, status int) {

View file

@ -21,7 +21,7 @@ func tryUpstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client,
targetOptions *upstream.Options,
targetOwner, targetRepo, targetBranch, targetPath string,
canonicalDomainCache, branchTimestampCache, fileResponseCache cache.SetGetKey,
canonicalDomainCache cache.SetGetKey,
) {
// check if a canonical domain exists on a request on MainDomain
if bytes.HasSuffix(trimmedHost, mainDomainSuffix) {
@ -45,7 +45,7 @@ func tryUpstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client,
targetOptions.TargetPath = targetPath
// Try to request the file from the Gitea API
if !targetOptions.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) {
if !targetOptions.Upstream(ctx, giteaClient, branchTimestampCache) {
html.ReturnErrorPage(ctx, ctx.Response.StatusCode())
}
}

View file

@ -2,14 +2,6 @@ package upstream
import "time"
// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long.
var defaultBranchCacheTimeout = 15 * time.Minute
// branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter
// than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be
// picked up faster, while still allowing the content to be cached longer if nothing changes.
var branchExistenceCacheTimeout = 5 * time.Minute
// fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending
// on your available memory.
// TODO: move as option into cache interface

View file

@ -5,46 +5,27 @@ import (
"path"
"strconv"
"strings"
"time"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/gitea"
)
type branchTimestamp struct {
Branch string
Timestamp time.Time
}
// GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
// (or nil if the branch doesn't exist)
func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string, branchTimestampCache cache.SetGetKey) *branchTimestamp {
if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok {
if result == nil {
return nil
}
return result.(*branchTimestamp)
}
result := &branchTimestamp{
Branch: branch,
}
func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string) *gitea.BranchTimestamp {
if len(branch) == 0 {
// Get default branch
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(owner, repo)
if err != nil {
_ = branchTimestampCache.Set(owner+"/"+repo+"/", nil, defaultBranchCacheTimeout)
return nil
}
result.Branch = defaultBranch
branch = defaultBranch
}
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, result.Branch)
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, branch)
if err != nil {
return nil
}
result.Timestamp = timestamp
_ = branchTimestampCache.Set(owner+"/"+repo+"/"+branch, result, branchExistenceCacheTimeout)
return result
return timestamp
}
func (o *Options) getMimeTypeByExtension() string {

View file

@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
@ -12,7 +13,6 @@ import (
"github.com/valyala/fasthttp"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/gitea"
)
@ -43,12 +43,12 @@ type Options struct {
}
// Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, branchTimestampCache, fileResponseCache cache.SetGetKey) (final bool) {
func (o *Options) Upstream(ctx *http.Response, giteaClient *gitea.Client) (final bool) {
log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
// Check if the branch exists and when it was modified
if o.BranchTimestamp.IsZero() {
branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch, branchTimestampCache)
branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch)
if branch == nil {
html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency)
@ -93,7 +93,7 @@ func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client,
optionsForIndexPages.appendTrailingSlash = true
for _, indexPage := range upstreamIndexPages {
optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage
if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) {
if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache) {
_ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{
Exists: false,
}, fileCacheTimeout)
@ -104,7 +104,7 @@ func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client,
optionsForIndexPages.appendTrailingSlash = false
optionsForIndexPages.redirectIfExists = strings.TrimSuffix(string(ctx.Request.URI().Path()), "/") + ".html"
optionsForIndexPages.TargetPath = o.TargetPath + ".html"
if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) {
if optionsForIndexPages.Upstream(ctx, giteaClient, branchTimestampCache) {
_ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{
Exists: false,
}, fileCacheTimeout)
@ -119,7 +119,7 @@ func (o *Options) Upstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client,
optionsForNotFoundPages.appendTrailingSlash = false
for _, notFoundPage := range upstreamNotFoundPages {
optionsForNotFoundPages.TargetPath = "/" + notFoundPage
if optionsForNotFoundPages.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) {
if optionsForNotFoundPages.Upstream(ctx, giteaClient, branchTimestampCache) {
_ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{
Exists: false,
}, fileCacheTimeout)