tryUpstream always use targetOpt generated by tryBranch

This commit is contained in:
6543 2022-11-12 17:50:08 +01:00
parent 658de3956e
commit 8519bba527
No known key found for this signature in database
GPG key ID: C99B82E40B027BAE
4 changed files with 116 additions and 108 deletions

View file

@ -89,18 +89,12 @@ func Handler(mainDomainSuffix, rawDomain string,
// Prepare request information to Gitea // Prepare request information to Gitea
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/") pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
targetOptions := &upstream.Options{
TryIndexPages: true,
}
log.Debug().Msg("preparations") log.Debug().Msg("preparations")
if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) { if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) {
// Serve raw content from RawDomain // Serve raw content from RawDomain
log.Debug().Msg("raw domain") log.Debug().Msg("raw domain")
targetOptions.TryIndexPages = false
targetOptions.ServeRaw = true
if len(pathElements) < 2 { if len(pathElements) < 2 {
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required // https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect) ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect)
@ -110,15 +104,16 @@ func Handler(mainDomainSuffix, rawDomain string,
// raw.codeberg.org/example/myrepo/@main/index.html // raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") { if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch") log.Debug().Msg("raw domain preparations, now trying with specified branch")
newPath := path.Join(pathElements[3:]...) if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
branch := pathElements[2][1:] TryIndexPages: false,
repoOwner := pathElements[0] ServeRaw: true,
repo := pathElements[1] TargetOwner: pathElements[0],
if timestampBranch, works := tryBranch(log, ctx, giteaClient, repoOwner, repo, branch, newPath, true); works { TargetRepo: pathElements[1],
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetBranch: pathElements[2][1:],
log.Debug().Msg("tryBranch, now trying upstream 1") TargetPath: path.Join(pathElements[3:]...),
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, }, true); works {
targetOptions, repoOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) log.Trace().Msg("tryUpstream: serve raw domain with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return return
} }
log.Debug().Msg("missing branch info") log.Debug().Msg("missing branch info")
@ -127,18 +122,20 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
log.Debug().Msg("raw domain preparations, now trying with default branch") log.Debug().Msg("raw domain preparations, now trying with default branch")
repoOwner := pathElements[0] if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
repo := pathElements[1] TryIndexPages: false,
newPath := path.Join(pathElements[2:]...) ServeRaw: true,
if timestampBranch, works := tryBranch(log, ctx, giteaClient, repoOwner, repo, "", newPath, true); works { TargetOwner: pathElements[0],
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetRepo: pathElements[1],
log.Debug().Msg("tryBranch, now trying upstream 2") TargetPath: path.Join(pathElements[2:]...),
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, }, true); works {
targetOptions, repoOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) log.Trace().Msg("tryUpstream: serve raw domain with default branch")
return tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusNotFound)
} }
html.ReturnErrorPage(ctx, fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", repoOwner, repo), http.StatusNotFound)
return return
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) { } else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
@ -164,17 +161,17 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
log.Debug().Msg("main domain preparations, now trying with specified repo & branch") log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
branch := pathElements[1][1:] if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
newPath := path.Join(pathElements[2:]...) TargetOwner: targetOwner,
repo := pathElements[0] TargetRepo: pathElements[0],
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, true); works { TargetBranch: pathElements[1][1:],
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetPath: path.Join(pathElements[2:]...),
log.Debug().Msg("tryBranch, now trying upstream 3") }, true); works {
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, log.Trace().Msg("tryUpstream: serve with specified repo and branch")
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else { } else {
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo), fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusFailedDependency) http.StatusFailedDependency)
} }
return return
@ -184,17 +181,17 @@ func Handler(mainDomainSuffix, rawDomain string,
// example.codeberg.page/@main/index.html // example.codeberg.page/@main/index.html
if strings.HasPrefix(pathElements[0], "@") { if strings.HasPrefix(pathElements[0], "@") {
log.Debug().Msg("main domain preparations, now trying with specified branch") log.Debug().Msg("main domain preparations, now trying with specified branch")
branch := pathElements[0][1:] if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
repo := "pages" TargetOwner: targetOwner,
newPath := path.Join(pathElements[1:]...) TargetRepo: "pages",
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works { TargetBranch: pathElements[0][1:],
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetPath: path.Join(pathElements[1:]...),
log.Debug().Msg("tryBranch, now trying upstream 4") }, true); works {
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, log.Trace().Msg("tryUpstream: serve default pages repo with specified branch")
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else { } else {
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"), fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusFailedDependency) http.StatusFailedDependency)
} }
return return
@ -205,14 +202,14 @@ func Handler(mainDomainSuffix, rawDomain string,
// example.codeberg.page/pages/... is not allowed here. // example.codeberg.page/pages/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo") log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != "pages" { if pathElements[0] != "pages" {
repo := pathElements[0] if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
branch := "pages" TargetOwner: targetOwner,
newPath := path.Join(pathElements[1:]...) TargetRepo: pathElements[0],
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, false); works { TargetBranch: "pages",
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetPath: path.Join(pathElements[1:]...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 5") log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
return return
} }
} }
@ -220,13 +217,13 @@ func Handler(mainDomainSuffix, rawDomain string,
// Try to use the "pages" repo on its default branch // Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html // example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch") log.Debug().Msg("main domain preparations, now trying with default repo/branch")
newPath := path.Join(pathElements...) if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
repo := "pages" TargetOwner: targetOwner,
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, "", newPath, false); works { TargetRepo: "pages",
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6") log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
return return
} }
@ -256,11 +253,14 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
// Try to use the given repo on the given branch or the default branch // Try to use the given repo on the given branch or the default branch
newPath := path.Join(pathParts...)
log.Debug().Msg("custom domain preparations, now trying with details from DNS") log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, targetBranch, newPath, canonicalLink); works { if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
targetOptions.BranchTimestamp = timestampBranch.Timestamp TargetOwner: targetOwner,
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, timestampBranch.Branch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) TargetRepo: targetRepo,
TargetBranch: targetBranch,
TargetPath: path.Join(pathParts...),
}, canonicalLink); works {
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOpt.TargetOwner, targetOpt.TargetRepo, targetOpt.TargetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if !valid { if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest) html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return return
@ -268,7 +268,7 @@ func Handler(mainDomainSuffix, rawDomain string,
// only redirect if the target is also a codeberg page! // only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache) targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache)
if targetOwner != "" { if targetOwner != "" {
ctx.Redirect("https://"+canonicalDomain+string(newPath), http.StatusTemporaryRedirect) ctx.Redirect("https://"+canonicalDomain+string(targetOpt.TargetPath), http.StatusTemporaryRedirect)
return return
} }
@ -277,8 +277,7 @@ func Handler(mainDomainSuffix, rawDomain string,
} }
log.Debug().Msg("tryBranch, now trying upstream 7") log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
return return
} }

View file

@ -16,18 +16,15 @@ import (
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure. // tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, trimmedHost string, mainDomainSuffix, trimmedHost string,
options *upstream.Options,
targetOptions *upstream.Options,
targetOwner, targetRepo, targetBranch, targetPath string,
canonicalDomainCache cache.SetGetKey, canonicalDomainCache cache.SetGetKey,
) { ) {
// check if a canonical domain exists on a request on MainDomain // check if a canonical domain exists on a request on MainDomain
if strings.HasSuffix(trimmedHost, mainDomainSuffix) { if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, "", string(mainDomainSuffix), canonicalDomainCache) canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, options.TargetOwner, options.TargetRepo, options.TargetBranch, "", string(mainDomainSuffix), canonicalDomainCache)
if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) { if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) {
canonicalPath := ctx.Req.RequestURI canonicalPath := ctx.Req.RequestURI
if targetRepo != "pages" { if options.TargetRepo != "pages" {
path := strings.SplitN(canonicalPath, "/", 3) path := strings.SplitN(canonicalPath, "/", 3)
if len(path) >= 3 { if len(path) >= 3 {
canonicalPath = "/" + path[2] canonicalPath = "/" + path[2]
@ -38,14 +35,11 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
} }
} }
targetOptions.TargetOwner = targetOwner // add host for debugging
targetOptions.TargetRepo = targetRepo options.Host = string(trimmedHost)
targetOptions.TargetBranch = targetBranch
targetOptions.TargetPath = targetPath
targetOptions.Host = string(trimmedHost)
// Try to request the file from the Gitea API // Try to request the file from the Gitea API
if !targetOptions.Upstream(ctx, giteaClient) { if !options.Upstream(ctx, giteaClient) {
html.ReturnErrorPage(ctx, "", ctx.StatusCode) html.ReturnErrorPage(ctx, "", ctx.StatusCode)
} }
} }
@ -53,20 +47,20 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, // tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty,
// it will also disallow search indexing and add a Link header to the canonical URL. // it will also disallow search indexing and add a Link header to the canonical URL.
func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client, func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
repoOwner, repoName, branch, path string, canonicalLink bool, targetOptions *upstream.Options, canonicalLink bool,
) (*gitea.BranchTimestamp, bool) { ) (*upstream.Options, bool) {
if repoName == "" { if targetOptions.TargetOwner == "" || targetOptions.TargetRepo == "" {
log.Debug().Msg("tryBranch: repo is empty") log.Debug().Msg("tryBranch: owner or repo is empty")
return nil, false return nil, false
} }
// Replace "~" to "/" so we can access branch that contains slash character // Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay // Branch name cannot contain "~" so doing this is okay
branch = strings.ReplaceAll(branch, "~", "/") targetOptions.TargetBranch = strings.ReplaceAll(targetOptions.TargetBranch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path // Check if the branch exists, otherwise treat it as a file path
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch) branchExist, _ := targetOptions.GetBranchTimestamp(giteaClient)
if branchTimestampResult == nil { if !branchExist {
log.Debug().Msg("tryBranch: branch doesn't exist") log.Debug().Msg("tryBranch: branch doesn't exist")
return nil, false return nil, false
} }
@ -74,11 +68,9 @@ func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie
if canonicalLink { if canonicalLink {
// Hide from search machines & add canonical link // Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex") ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link", ctx.RespWriter.Header().Set("Link", targetOptions.ContentWebLink(giteaClient)+"; rel=\"canonical\"")
giteaClient.ContentWebLink(repoOwner, repoName, branchTimestampResult.Branch, path)+
"; rel=\"canonical\"")
} }
log.Debug().Msg("tryBranch: true") log.Debug().Msg("tryBranch: true")
return branchTimestampResult, true return targetOptions, true
} }

View file

@ -2,35 +2,46 @@ package upstream
import ( import (
"errors" "errors"
"fmt"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/server/gitea" "codeberg.org/codeberg/pages/server/gitea"
) )
// GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch // GetBranchTimestamp finds the default branch (if branch is "") and save branch and it's last modification time to Options
// (or nil if the branch doesn't exist) func (o *Options) GetBranchTimestamp(giteaClient *gitea.Client) (bool, error) {
func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string) *gitea.BranchTimestamp { log := log.With().Strs("BranchInfo", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch}).Logger()
log := log.With().Strs("BranchInfo", []string{owner, repo, branch}).Logger()
if len(branch) == 0 { if len(o.TargetBranch) == 0 {
// Get default branch // Get default branch
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(owner, repo) defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(o.TargetOwner, o.TargetRepo)
if err != nil { if err != nil {
log.Err(err).Msg("Could't fetch default branch from repository") log.Err(err).Msg("Could't fetch default branch from repository")
return nil return false, err
} }
log.Debug().Msgf("Succesfully fetched default branch %q from Gitea", defaultBranch) log.Debug().Msgf("Succesfully fetched default branch %q from Gitea", defaultBranch)
branch = defaultBranch o.TargetBranch = defaultBranch
} }
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, branch) timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(o.TargetOwner, o.TargetRepo, o.TargetBranch)
if err != nil { if err != nil {
if !errors.Is(err, gitea.ErrorNotFound) { if !errors.Is(err, gitea.ErrorNotFound) {
log.Error().Err(err).Msg("Could not get latest commit's timestamp from branch") log.Error().Err(err).Msg("Could not get latest commit's timestamp from branch")
} }
return nil return false, err
} }
if timestamp == nil || timestamp.Branch == "" {
return false, fmt.Errorf("empty response")
}
log.Debug().Msgf("Succesfully fetched latest commit's timestamp from branch: %#v", timestamp) log.Debug().Msgf("Succesfully fetched latest commit's timestamp from branch: %#v", timestamp)
return timestamp o.BranchTimestamp = timestamp.Timestamp
o.TargetBranch = timestamp.Branch
return true, nil
}
func (o *Options) ContentWebLink(giteaClient *gitea.Client) string {
return giteaClient.ContentWebLink(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath) + "; rel=\"canonical\""
} }

View file

@ -34,10 +34,10 @@ var upstreamNotFoundPages = []string{
// Options provides various options for the upstream request. // Options provides various options for the upstream request.
type Options struct { type Options struct {
TargetOwner, TargetOwner string
TargetRepo, TargetRepo string
TargetBranch, TargetBranch string
TargetPath, TargetPath string
// Used for debugging purposes. // Used for debugging purposes.
Host string Host string
@ -62,16 +62,22 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
// Check if the branch exists and when it was modified // Check if the branch exists and when it was modified
if o.BranchTimestamp.IsZero() { if o.BranchTimestamp.IsZero() {
branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch) branchExist, err := o.GetBranchTimestamp(giteaClient)
// handle 404
if branch == nil || branch.Branch == "" { if err != nil && errors.Is(err, gitea.ErrorNotFound) || !branchExist {
html.ReturnErrorPage(ctx, html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch %q", o.TargetBranch), fmt.Sprintf("branch %q for '%s/%s' not found", o.TargetBranch, o.TargetOwner, o.TargetRepo),
http.StatusNotFound)
return true
}
// handle unexpected errors
if err != nil {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch %q: %v", o.TargetBranch, err),
http.StatusFailedDependency) http.StatusFailedDependency)
return true return true
} }
o.TargetBranch = branch.Branch
o.BranchTimestamp = branch.Timestamp
} }
// Check if the browser has a cached version // Check if the browser has a cached version