unwind tryBranch into own func

This commit is contained in:
6543 2022-11-12 02:54:56 +01:00
parent 3d9ffcf8d7
commit f13feec8bf
No known key found for this signature in database
GPG key ID: B8BE6D610E61C862
5 changed files with 78 additions and 119 deletions

View file

@ -90,7 +90,7 @@ func Serve(ctx *cli.Context) error {
// Create handler based on settings // Create handler based on settings
httpsHandler := server.Handler(mainDomainSuffix, rawDomain, httpsHandler := server.Handler(mainDomainSuffix, rawDomain,
giteaClient, giteaClient,
giteaRoot, rawInfoPage, rawInfoPage,
BlacklistedPaths, allowedCorsDomains, BlacklistedPaths, allowedCorsDomains,
dnsLookupCache, canonicalDomainCache) dnsLookupCache, canonicalDomainCache)

View file

@ -45,6 +45,8 @@ type Client struct {
sdkClient *gitea.Client sdkClient *gitea.Client
responseCache cache.SetGetKey responseCache cache.SetGetKey
giteaRoot string
followSymlinks bool followSymlinks bool
supportLFS bool supportLFS bool
@ -79,6 +81,8 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo
sdkClient: sdk, sdkClient: sdk,
responseCache: respCache, responseCache: respCache,
giteaRoot: giteaRoot,
followSymlinks: followSymlinks, followSymlinks: followSymlinks,
supportLFS: supportLFS, supportLFS: supportLFS,
@ -87,6 +91,10 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo
}, err }, err
} }
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource) reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
if err != nil { if err != nil {

View file

@ -6,7 +6,6 @@ import (
"path" "path"
"strings" "strings"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/html" "codeberg.org/codeberg/pages/html"
@ -27,7 +26,7 @@ const (
// Handler handles a single HTTP request to the web server. // Handler handles a single HTTP request to the web server.
func Handler(mainDomainSuffix, rawDomain string, func Handler(mainDomainSuffix, rawDomain string,
giteaClient *gitea.Client, giteaClient *gitea.Client,
giteaRoot, rawInfoPage string, rawInfoPage string,
blacklistedPaths, allowedCorsDomains []string, blacklistedPaths, allowedCorsDomains []string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey, dnsLookupCache, canonicalDomainCache cache.SetGetKey,
) http.HandlerFunc { ) http.HandlerFunc {
@ -85,51 +84,10 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
// Prepare request information to Gitea // Prepare request information to Gitea
var targetOwner, targetRepo, targetBranch, targetPath string
targetOptions := &upstream.Options{ targetOptions := &upstream.Options{
TryIndexPages: true, TryIndexPages: true,
} }
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will
// also disallow search indexing and add a Link header to the canonical URL.
// TODO: move into external func to not alert vars indirectly
tryBranch1 := func(log zerolog.Logger, repo, branch string, _path []string, canonicalLink string) bool {
if repo == "" {
log.Debug().Msg("tryBranch: repo is empty")
return false
}
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
branch = strings.ReplaceAll(branch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch)
if branchTimestampResult == nil {
log.Debug().Msg("tryBranch: branch doesn't exist")
return false
}
// Branch exists, use it
targetRepo = repo
targetPath = path.Join(_path...)
targetBranch = branchTimestampResult.Branch
targetOptions.BranchTimestamp = branchTimestampResult.Timestamp
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
"; rel=\"canonical\"",
)
}
log.Debug().Msg("tryBranch: true")
return true
}
log.Debug().Msg("preparations") log.Debug().Msg("preparations")
if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) { if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) {
// Serve raw content from RawDomain // Serve raw content from RawDomain
@ -144,21 +102,19 @@ func Handler(mainDomainSuffix, rawDomain string,
ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect) ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect)
return return
} }
targetOwner = pathElements[0] targetOwner := pathElements[0]
targetRepo = pathElements[1] targetRepo := pathElements[1]
// raw.codeberg.org/example/myrepo/@main/index.html // raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") { if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch") log.Debug().Msg("raw domain preparations, now trying with specified branch")
if newRepo, newPath, newBranch, newTimestamp, works := tryBranch2(log, ctx, giteaClient, newPath := path.Join(pathElements[3:]...)
targetOwner, targetRepo, pathElements[2][1:], pathElements[3:], branch := pathElements[2][1:]
giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p", if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works {
); works { targetOptions.BranchTimestamp = timestampBranch.Timestamp
targetOptions.BranchTimestamp = *newTimestamp
log.Debug().Msg("tryBranch, now trying upstream 1") log.Debug().Msg("tryBranch, now trying upstream 1")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, newRepo, newBranch, newPath, targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
canonicalDomainCache)
return return
} }
log.Debug().Msg("missing branch info") log.Debug().Msg("missing branch info")
@ -167,14 +123,15 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
log.Debug().Msg("raw domain preparations, now trying with default branch") log.Debug().Msg("raw domain preparations, now trying with default branch")
tryBranch(log, newPath := path.Join(pathElements[2:]...)
targetRepo, "", pathElements[2:], if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, "", newPath, true); works {
giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p", targetOptions.BranchTimestamp = timestampBranch.Timestamp
) log.Debug().Msg("tryBranch, now trying upstream 2")
log.Debug().Msg("tryBranch, now trying upstream 2") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
targetOptions, targetOwner, targetRepo, targetBranch, targetPath, } else {
canonicalDomainCache) log.Error().Msg("TODO: is this a bug?")
}
return return
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) { } else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
@ -182,9 +139,8 @@ func Handler(mainDomainSuffix, rawDomain string,
log.Debug().Msg("main domain suffix") log.Debug().Msg("main domain suffix")
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/") pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
targetOwner = strings.TrimSuffix(trimmedHost, mainDomainSuffix) targetOwner := strings.TrimSuffix(trimmedHost, mainDomainSuffix)
targetRepo = pathElements[0] targetRepo := pathElements[0]
targetPath = strings.Trim(strings.Join(pathElements[1:], "/"), "/")
if targetOwner == "www" { if targetOwner == "www" {
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname? // www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
@ -203,14 +159,13 @@ func Handler(mainDomainSuffix, rawDomain string,
log.Debug().Msg("main domain preparations, now trying with specified repo & branch") log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
branch := pathElements[1][1:] branch := pathElements[1][1:]
if tryBranch(log, newPath := path.Join(pathElements[2:]...)
pathElements[0], branch, pathElements[2:], repo := pathElements[0]
"/"+pathElements[0]+"/%p", if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, true); works {
) { targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 3") log.Debug().Msg("tryBranch, now trying upstream 3")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath, targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
canonicalDomainCache)
} else { } else {
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo), fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo),
@ -224,12 +179,13 @@ func Handler(mainDomainSuffix, rawDomain string,
if strings.HasPrefix(pathElements[0], "@") { if strings.HasPrefix(pathElements[0], "@") {
log.Debug().Msg("main domain preparations, now trying with specified branch") log.Debug().Msg("main domain preparations, now trying with specified branch")
branch := pathElements[0][1:] branch := pathElements[0][1:]
if tryBranch(log, repo := "pages"
"pages", branch, pathElements[1:], "/%p") { newPath := path.Join(pathElements[1:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 4") log.Debug().Msg("tryBranch, now trying upstream 4")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, "pages", targetBranch, targetPath, targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
canonicalDomainCache)
} else { } else {
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"), fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"),
@ -242,37 +198,42 @@ func Handler(mainDomainSuffix, rawDomain string,
// example.codeberg.page/myrepo/index.html // example.codeberg.page/myrepo/index.html
// example.codeberg.page/pages/... is not allowed here. // example.codeberg.page/pages/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo") log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != "pages" && tryBranch(log, if pathElements[0] != "pages" {
pathElements[0], "pages", pathElements[1:], "") { repo := pathElements[0]
log.Debug().Msg("tryBranch, now trying upstream 5") branch := "pages"
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, newPath := path.Join(pathElements[1:]...)
targetOptions, targetOwner, targetRepo, targetBranch, targetPath, if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, false); works {
canonicalDomainCache) targetOptions.BranchTimestamp = timestampBranch.Timestamp
return log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
return
}
} }
// Try to use the "pages" repo on its default branch // Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html // example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch") log.Debug().Msg("main domain preparations, now trying with default repo/branch")
if tryBranch(log, newPath := path.Join(pathElements...)
"pages", "", pathElements, "") { repo := "pages"
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, "", newPath, false); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 6") log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath, targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
canonicalDomainCache)
return return
} }
// Couldn't find a valid repo/branch // Couldn't find a valid repo/branch
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
fmt.Sprintf("couldn't find a valid repo[%s]/branch[%s]", targetRepo, targetBranch), fmt.Sprintf("couldn't find a valid repo[%s]", targetRepo),
http.StatusFailedDependency) http.StatusFailedDependency)
return return
} else { } else {
trimmedHostStr := string(trimmedHost) trimmedHostStr := string(trimmedHost)
// Serve pages from custom domains // Serve pages from custom domains
targetOwner, targetRepo, targetBranch = dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache) targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache)
if targetOwner == "" { if targetOwner == "" {
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
"could not obtain repo owner from custom domain", "could not obtain repo owner from custom domain",
@ -281,18 +242,19 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/") pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
canonicalLink := "" canonicalLink := false
if strings.HasPrefix(pathElements[0], "@") { if strings.HasPrefix(pathElements[0], "@") {
targetBranch = pathElements[0][1:] targetBranch = pathElements[0][1:]
pathElements = pathElements[1:] pathElements = pathElements[1:]
canonicalLink = "/%p" canonicalLink = true
} }
// Try to use the given repo on the given branch or the default branch // Try to use the given repo on the given branch or the default branch
newPath := path.Join(pathElements...)
log.Debug().Msg("custom domain preparations, now trying with details from DNS") log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if tryBranch(log, if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, targetBranch, newPath, canonicalLink); works {
targetRepo, targetBranch, pathElements, canonicalLink) { targetOptions.BranchTimestamp = timestampBranch.Timestamp
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, timestampBranch.Branch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if !valid { if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest) html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return return
@ -300,7 +262,7 @@ func Handler(mainDomainSuffix, rawDomain string,
// only redirect if the target is also a codeberg page! // only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache) targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache)
if targetOwner != "" { if targetOwner != "" {
ctx.Redirect("https://"+canonicalDomain+string(ctx.Path()), http.StatusTemporaryRedirect) ctx.Redirect("https://"+canonicalDomain+string(newPath), http.StatusTemporaryRedirect)
return return
} }
@ -310,8 +272,7 @@ func Handler(mainDomainSuffix, rawDomain string,
log.Debug().Msg("tryBranch, now trying upstream 7") log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, targetBranch, targetPath, targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
canonicalDomainCache)
return return
} }

View file

@ -11,12 +11,11 @@ import (
) )
func TestHandlerPerformance(t *testing.T) { func TestHandlerPerformance(t *testing.T) {
giteaRoot := "https://codeberg.org" giteaClient, _ := gitea.NewClient("https://codeberg.org", "", cache.NewKeyValueCache(), false, false)
giteaClient, _ := gitea.NewClient(giteaRoot, "", cache.NewKeyValueCache(), false, false)
testHandler := Handler( testHandler := Handler(
"codeberg.page", "raw.codeberg.org", "codeberg.page", "raw.codeberg.org",
giteaClient, giteaClient,
giteaRoot, "https://docs.codeberg.org/pages/raw-content/", "https://docs.codeberg.org/pages/raw-content/",
[]string{"/.well-known/acme-challenge/"}, []string{"/.well-known/acme-challenge/"},
[]string{"raw.codeberg.org", "fonts.codeberg.org", "design.codeberg.org"}, []string{"raw.codeberg.org", "fonts.codeberg.org", "design.codeberg.org"},
cache.NewKeyValueCache(), cache.NewKeyValueCache(),

View file

@ -2,9 +2,7 @@ package server
import ( import (
"net/http" "net/http"
"path"
"strings" "strings"
"time"
"codeberg.org/codeberg/pages/html" "codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache" "codeberg.org/codeberg/pages/server/cache"
@ -51,13 +49,14 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
} }
} }
func tryBranch2(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client, // tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty,
repoOwner, repoName, branch string, _path []string, canonicalLink string) ( // it will also disallow search indexing and add a Link header to the canonical URL.
targetRepo, targetPath, targetBranch string, branchTimestamp *time.Time, func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
works bool) { repoOwner, repoName, branch, path string, canonicalLink bool,
) (*gitea.BranchTimestamp, bool) {
if repoName == "" { if repoName == "" {
log.Debug().Msg("tryBranch: repo == ''") log.Debug().Msg("tryBranch: repo is empty")
return "", "", "", nil, false return nil, false
} }
// Replace "~" to "/" so we can access branch that contains slash character // Replace "~" to "/" so we can access branch that contains slash character
@ -68,25 +67,17 @@ func tryBranch2(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Cli
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch) branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch)
if branchTimestampResult == nil { if branchTimestampResult == nil {
log.Debug().Msg("tryBranch: branch doesn't exist") log.Debug().Msg("tryBranch: branch doesn't exist")
return "", "", "", nil, false return nil, false
} }
// Branch exists, use it if canonicalLink {
targetRepo = repoName
targetPath = path.Join(_path...)
targetBranch = branchTimestampResult.Branch
branchTimestamp = &branchTimestampResult.Timestamp
if canonicalLink != "" {
// Hide from search machines & add canonical link // Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex") ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link", ctx.RespWriter.Header().Set("Link",
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+ giteaClient.ContentWebLink(repoOwner, repoName, branchTimestampResult.Branch, path)+
"; rel=\"canonical\"", "; rel=\"canonical\"")
)
} }
log.Debug().Msg("tryBranch: true") log.Debug().Msg("tryBranch: true")
return return branchTimestampResult, true
} }