unwind tryBranch into own func

This commit is contained in:
6543 2022-11-12 02:54:56 +01:00
parent 3d9ffcf8d7
commit f13feec8bf
No known key found for this signature in database
GPG key ID: B8BE6D610E61C862
5 changed files with 78 additions and 119 deletions

View file

@ -90,7 +90,7 @@ func Serve(ctx *cli.Context) error {
// Create handler based on settings
httpsHandler := server.Handler(mainDomainSuffix, rawDomain,
giteaClient,
giteaRoot, rawInfoPage,
rawInfoPage,
BlacklistedPaths, allowedCorsDomains,
dnsLookupCache, canonicalDomainCache)

View file

@ -45,6 +45,8 @@ type Client struct {
sdkClient *gitea.Client
responseCache cache.SetGetKey
giteaRoot string
followSymlinks bool
supportLFS bool
@ -79,6 +81,8 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo
sdkClient: sdk,
responseCache: respCache,
giteaRoot: giteaRoot,
followSymlinks: followSymlinks,
supportLFS: supportLFS,
@ -87,6 +91,10 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo
}, err
}
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
if err != nil {

View file

@ -6,7 +6,6 @@ import (
"path"
"strings"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/html"
@ -27,7 +26,7 @@ const (
// Handler handles a single HTTP request to the web server.
func Handler(mainDomainSuffix, rawDomain string,
giteaClient *gitea.Client,
giteaRoot, rawInfoPage string,
rawInfoPage string,
blacklistedPaths, allowedCorsDomains []string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
) http.HandlerFunc {
@ -85,51 +84,10 @@ func Handler(mainDomainSuffix, rawDomain string,
}
// Prepare request information to Gitea
var targetOwner, targetRepo, targetBranch, targetPath string
targetOptions := &upstream.Options{
TryIndexPages: true,
}
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will
// also disallow search indexing and add a Link header to the canonical URL.
// TODO: move into external func to not alert vars indirectly
tryBranch1 := func(log zerolog.Logger, repo, branch string, _path []string, canonicalLink string) bool {
if repo == "" {
log.Debug().Msg("tryBranch: repo is empty")
return false
}
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
branch = strings.ReplaceAll(branch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch)
if branchTimestampResult == nil {
log.Debug().Msg("tryBranch: branch doesn't exist")
return false
}
// Branch exists, use it
targetRepo = repo
targetPath = path.Join(_path...)
targetBranch = branchTimestampResult.Branch
targetOptions.BranchTimestamp = branchTimestampResult.Timestamp
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
log.Debug().Msg("tryBranch: true")
return true
}
log.Debug().Msg("preparations")
if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) {
// Serve raw content from RawDomain
@ -144,21 +102,19 @@ func Handler(mainDomainSuffix, rawDomain string,
ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect)
return
}
targetOwner = pathElements[0]
targetRepo = pathElements[1]
targetOwner := pathElements[0]
targetRepo := pathElements[1]
// raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch")
if newRepo, newPath, newBranch, newTimestamp, works := tryBranch2(log, ctx, giteaClient,
targetOwner, targetRepo, pathElements[2][1:], pathElements[3:],
giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p",
); works {
targetOptions.BranchTimestamp = *newTimestamp
newPath := path.Join(pathElements[3:]...)
branch := pathElements[2][1:]
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 1")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, newRepo, newBranch, newPath,
canonicalDomainCache)
targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
return
}
log.Debug().Msg("missing branch info")
@ -167,14 +123,15 @@ func Handler(mainDomainSuffix, rawDomain string,
}
log.Debug().Msg("raw domain preparations, now trying with default branch")
tryBranch(log,
targetRepo, "", pathElements[2:],
giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p",
)
newPath := path.Join(pathElements[2:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, "", newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 2")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
} else {
log.Error().Msg("TODO: is this a bug?")
}
return
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
@ -182,9 +139,8 @@ func Handler(mainDomainSuffix, rawDomain string,
log.Debug().Msg("main domain suffix")
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
targetOwner = strings.TrimSuffix(trimmedHost, mainDomainSuffix)
targetRepo = pathElements[0]
targetPath = strings.Trim(strings.Join(pathElements[1:], "/"), "/")
targetOwner := strings.TrimSuffix(trimmedHost, mainDomainSuffix)
targetRepo := pathElements[0]
if targetOwner == "www" {
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
@ -203,14 +159,13 @@ func Handler(mainDomainSuffix, rawDomain string,
log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
branch := pathElements[1][1:]
if tryBranch(log,
pathElements[0], branch, pathElements[2:],
"/"+pathElements[0]+"/%p",
) {
newPath := path.Join(pathElements[2:]...)
repo := pathElements[0]
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 3")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo),
@ -224,12 +179,13 @@ func Handler(mainDomainSuffix, rawDomain string,
if strings.HasPrefix(pathElements[0], "@") {
log.Debug().Msg("main domain preparations, now trying with specified branch")
branch := pathElements[0][1:]
if tryBranch(log,
"pages", branch, pathElements[1:], "/%p") {
repo := "pages"
newPath := path.Join(pathElements[1:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 4")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, "pages", targetBranch, targetPath,
canonicalDomainCache)
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"),
@ -242,37 +198,42 @@ func Handler(mainDomainSuffix, rawDomain string,
// example.codeberg.page/myrepo/index.html
// example.codeberg.page/pages/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != "pages" && tryBranch(log,
pathElements[0], "pages", pathElements[1:], "") {
if pathElements[0] != "pages" {
repo := pathElements[0]
branch := "pages"
newPath := path.Join(pathElements[1:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, false); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
return
}
}
// Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch")
if tryBranch(log,
"pages", "", pathElements, "") {
newPath := path.Join(pathElements...)
repo := "pages"
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, "", newPath, false); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
return
}
// Couldn't find a valid repo/branch
html.ReturnErrorPage(ctx,
fmt.Sprintf("couldn't find a valid repo[%s]/branch[%s]", targetRepo, targetBranch),
fmt.Sprintf("couldn't find a valid repo[%s]", targetRepo),
http.StatusFailedDependency)
return
} else {
trimmedHostStr := string(trimmedHost)
// Serve pages from custom domains
targetOwner, targetRepo, targetBranch = dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache)
targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache)
if targetOwner == "" {
html.ReturnErrorPage(ctx,
"could not obtain repo owner from custom domain",
@ -281,18 +242,19 @@ func Handler(mainDomainSuffix, rawDomain string,
}
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
canonicalLink := ""
canonicalLink := false
if strings.HasPrefix(pathElements[0], "@") {
targetBranch = pathElements[0][1:]
pathElements = pathElements[1:]
canonicalLink = "/%p"
canonicalLink = true
}
// Try to use the given repo on the given branch or the default branch
newPath := path.Join(pathElements...)
log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if tryBranch(log,
targetRepo, targetBranch, pathElements, canonicalLink) {
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, targetBranch, newPath, canonicalLink); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, timestampBranch.Branch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return
@ -300,7 +262,7 @@ func Handler(mainDomainSuffix, rawDomain string,
// only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache)
if targetOwner != "" {
ctx.Redirect("https://"+canonicalDomain+string(ctx.Path()), http.StatusTemporaryRedirect)
ctx.Redirect("https://"+canonicalDomain+string(newPath), http.StatusTemporaryRedirect)
return
}
@ -310,8 +272,7 @@ func Handler(mainDomainSuffix, rawDomain string,
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath,
canonicalDomainCache)
targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
return
}

View file

@ -11,12 +11,11 @@ import (
)
func TestHandlerPerformance(t *testing.T) {
giteaRoot := "https://codeberg.org"
giteaClient, _ := gitea.NewClient(giteaRoot, "", cache.NewKeyValueCache(), false, false)
giteaClient, _ := gitea.NewClient("https://codeberg.org", "", cache.NewKeyValueCache(), false, false)
testHandler := Handler(
"codeberg.page", "raw.codeberg.org",
giteaClient,
giteaRoot, "https://docs.codeberg.org/pages/raw-content/",
"https://docs.codeberg.org/pages/raw-content/",
[]string{"/.well-known/acme-challenge/"},
[]string{"raw.codeberg.org", "fonts.codeberg.org", "design.codeberg.org"},
cache.NewKeyValueCache(),

View file

@ -2,9 +2,7 @@ package server
import (
"net/http"
"path"
"strings"
"time"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
@ -51,13 +49,14 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
}
}
func tryBranch2(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
repoOwner, repoName, branch string, _path []string, canonicalLink string) (
targetRepo, targetPath, targetBranch string, branchTimestamp *time.Time,
works bool) {
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty,
// it will also disallow search indexing and add a Link header to the canonical URL.
func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
repoOwner, repoName, branch, path string, canonicalLink bool,
) (*gitea.BranchTimestamp, bool) {
if repoName == "" {
log.Debug().Msg("tryBranch: repo == ''")
return "", "", "", nil, false
log.Debug().Msg("tryBranch: repo is empty")
return nil, false
}
// Replace "~" to "/" so we can access branch that contains slash character
@ -68,25 +67,17 @@ func tryBranch2(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Cli
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch)
if branchTimestampResult == nil {
log.Debug().Msg("tryBranch: branch doesn't exist")
return "", "", "", nil, false
return nil, false
}
// Branch exists, use it
targetRepo = repoName
targetPath = path.Join(_path...)
targetBranch = branchTimestampResult.Branch
branchTimestamp = &branchTimestampResult.Timestamp
if canonicalLink != "" {
if canonicalLink {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
giteaClient.ContentWebLink(repoOwner, repoName, branchTimestampResult.Branch, path)+
"; rel=\"canonical\"")
}
log.Debug().Msg("tryBranch: true")
return
return branchTimestampResult, true
}