Implement caching & limit concurrency

This commit is contained in:
Moritz Marquardt 2021-03-19 20:58:53 +01:00
parent 8ead10c82e
commit 203e230905
No known key found for this signature in database
GPG key ID: D5788327BEE388B6
4 changed files with 62 additions and 49 deletions

1
go.mod
View file

@ -3,6 +3,7 @@ module codeberg.org/codeberg/pages
go 1.16 go 1.16
require ( require (
github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a
github.com/valyala/fasthttp v1.22.0 github.com/valyala/fasthttp v1.22.0
github.com/valyala/fastjson v1.6.3 github.com/valyala/fastjson v1.6.3
) )

2
go.sum
View file

@ -1,3 +1,5 @@
github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a h1:Cf4CrDeyrIcuIiJZEZJAH5dapqQ6J3OmP/vHPbDjaFA=
github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a/go.mod h1:ig6eVXkYn/9dz0Vm8UdLf+E0u1bE6kBSn3n2hqk6jas=
github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc= github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/klauspost/compress v1.11.8 h1:difgzQsp5mdAz9v8lm3P/I+EpDKMU/6uTMw1y1FObuo= github.com/klauspost/compress v1.11.8 h1:difgzQsp5mdAz9v8lm3P/I+EpDKMU/6uTMw1y1FObuo=

View file

@ -3,10 +3,10 @@ package main
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"github.com/OrlovEvgeny/go-mcache"
"github.com/valyala/fasthttp" "github.com/valyala/fasthttp"
"github.com/valyala/fastjson" "github.com/valyala/fastjson"
"mime" "mime"
"net/url"
"path" "path"
"strconv" "strconv"
"strings" "strings"
@ -69,34 +69,30 @@ func handler(ctx *fasthttp.RequestCtx) {
if repo == "" { if repo == "" {
return false return false
} }
fmt.Printf("Trying branch: %s/%s/%s with path %v\n", targetOwner, repo, branch, path)
escapedBranch, _ := url.PathUnescape(branch)
if escapedBranch == "" {
escapedBranch = branch
}
// Check if the branch exists, otherwise treat it as a file path // Check if the branch exists, otherwise treat it as a file path
targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, repo, branch) branchTimestampResult := getBranchTimestamp(targetOwner, repo, branch)
fmt.Printf("Branch %s has timestamp %v\n", targetBranch, targetOptions.BranchTimestamp) if branchTimestampResult == nil {
if targetOptions.BranchTimestamp != (time.Time{}) {
// Branch exists, use it
targetRepo = repo
targetPath = strings.Trim(strings.Join(path, "/"), "/")
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex")
ctx.Response.Header.Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
return true
} else {
// branch doesn't exist // branch doesn't exist
return false return false
} }
// Branch exists, use it
targetRepo = repo
targetPath = strings.Trim(strings.Join(path, "/"), "/")
targetBranch = branchTimestampResult.branch
targetOptions.BranchTimestamp = branchTimestampResult.timestamp
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex")
ctx.Response.Header.Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
return true
} }
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure. // tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
@ -209,36 +205,49 @@ func returnErrorPage(ctx *fasthttp.RequestCtx, code int) {
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+fasthttp.StatusMessage(code)))) ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+fasthttp.StatusMessage(code))))
} }
type branchTimestamp struct {
branch string
timestamp time.Time
}
var branchTimestampCache = mcache.New()
// getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch // getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
// (or an empty time.Time if the branch doesn't exist) // (or an empty time.Time if the branch doesn't exist)
// TODO: cache responses for ~15 minutes if a branch exists func getBranchTimestamp(owner, repo, branch string) *branchTimestamp {
func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string, t time.Time) { if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok {
branchWithFallback = branch return result.(*branchTimestamp)
}
result := &branchTimestamp{}
result.branch = branch
if branch == "" { if branch == "" {
var body = make([]byte, 0) var body = make([]byte, 0)
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo), 10*time.Second) status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo, 10*time.Second)
if err != nil || status != 200 { if err != nil || status != 200 {
fmt.Printf("Default branch request to Gitea API failed with status code %d and error %s\n", status, err) return nil
branchWithFallback = ""
return
} }
branch = fastjson.GetString(body, "default_branch") branch = fastjson.GetString(body, "default_branch")
branchWithFallback = branch result.branch = branch
} }
var body = make([]byte, 0) var body = make([]byte, 0)
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo)+"/branches/"+url.PathEscape(branch), 10*time.Second) status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch, 10*time.Second)
if err != nil || status != 200 { if err != nil || status != 200 {
fmt.Printf("Branch info request to Gitea API failed with status code %d and error %s\n", status, err) return nil
branchWithFallback = ""
return
} }
t, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) result.timestamp, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp"))
return _ = branchTimestampCache.Set(owner + "/" + repo + "/" + branch, result, 15 * time.Second)
return result
} }
// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. var upstreamClient = fasthttp.Client{
ReadTimeout: 10 * time.Second,
MaxConnDuration: 60 * time.Second,
MaxConnWaitTimeout: 1000 * time.Millisecond,
MaxConnsPerHost: 1024 * 16, // TODO: adjust bottlenecks for best performance with Gitea!
}
// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, targetBranch string, targetPath string, options *upstreamOptions) (success bool) { func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, targetBranch string, targetPath string, options *upstreamOptions) (success bool) {
if options.ForbiddenMimeTypes == nil { if options.ForbiddenMimeTypes == nil {
options.ForbiddenMimeTypes = map[string]struct{}{} options.ForbiddenMimeTypes = map[string]struct{}{}
@ -246,15 +255,14 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t
// Check if the branch exists and when it was modified // Check if the branch exists and when it was modified
if options.BranchTimestamp == (time.Time{}) { if options.BranchTimestamp == (time.Time{}) {
targetBranch, options.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, targetBranch) branch := getBranchTimestamp(targetOwner, targetRepo, targetBranch)
}
// Handle repositories with no/broken pages setup if branch == nil {
if options.BranchTimestamp == (time.Time{}) || targetBranch == "" { returnErrorPage(ctx, fasthttp.StatusFailedDependency)
ctx.Response.SetStatusCode(fasthttp.StatusFailedDependency) return true
ctx.Response.Header.SetContentType("text/html; charset=utf-8") }
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte("pages not set up for this repo"))) targetBranch = branch.branch
return true options.BranchTimestamp = branch.timestamp
} }
if targetOwner == "" || targetRepo == "" || targetBranch == "" { if targetOwner == "" || targetRepo == "" || targetBranch == "" {
@ -272,9 +280,9 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t
// Make a GET request to the upstream URL // Make a GET request to the upstream URL
req := fasthttp.AcquireRequest() req := fasthttp.AcquireRequest()
req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + url.PathEscape(targetOwner) + "/" + url.PathEscape(targetRepo) + "/raw/" + url.PathEscape(targetBranch) + "/" + url.PathEscape(targetPath)) req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath)
res := fasthttp.AcquireResponse() res := fasthttp.AcquireResponse()
err := fasthttp.DoTimeout(req, res, 10*time.Second) err := upstreamClient.Do(req, res)
// Handle errors // Handle errors
if res.StatusCode() == fasthttp.StatusNotFound { if res.StatusCode() == fasthttp.StatusNotFound {

View file

@ -99,6 +99,8 @@ func main() {
NoDefaultServerHeader: true, NoDefaultServerHeader: true,
NoDefaultDate: true, NoDefaultDate: true,
ReadTimeout: 10 * time.Second, ReadTimeout: 10 * time.Second,
Concurrency: 1024 * 32, // TODO: adjust bottlenecks for best performance with Gitea!
MaxConnsPerIP: 100,
}).Serve(listener) }).Serve(listener)
if err != nil { if err != nil {
fmt.Printf("Couldn't start server: %s\n", err) fmt.Printf("Couldn't start server: %s\n", err)