tryUpstream always use targetOpt generated by tryBranch

This commit is contained in:
6543 2022-11-12 17:50:08 +01:00
parent 658de3956e
commit 8519bba527
No known key found for this signature in database
GPG key ID: C99B82E40B027BAE
4 changed files with 116 additions and 108 deletions

View file

@ -89,18 +89,12 @@ func Handler(mainDomainSuffix, rawDomain string,
// Prepare request information to Gitea
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
targetOptions := &upstream.Options{
TryIndexPages: true,
}
log.Debug().Msg("preparations")
if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) {
// Serve raw content from RawDomain
log.Debug().Msg("raw domain")
targetOptions.TryIndexPages = false
targetOptions.ServeRaw = true
if len(pathElements) < 2 {
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect)
@ -110,15 +104,16 @@ func Handler(mainDomainSuffix, rawDomain string,
// raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch")
newPath := path.Join(pathElements[3:]...)
branch := pathElements[2][1:]
repoOwner := pathElements[0]
repo := pathElements[1]
if timestampBranch, works := tryBranch(log, ctx, giteaClient, repoOwner, repo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 1")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, repoOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: false,
ServeRaw: true,
TargetOwner: pathElements[0],
TargetRepo: pathElements[1],
TargetBranch: pathElements[2][1:],
TargetPath: path.Join(pathElements[3:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
log.Debug().Msg("missing branch info")
@ -127,18 +122,20 @@ func Handler(mainDomainSuffix, rawDomain string,
}
log.Debug().Msg("raw domain preparations, now trying with default branch")
repoOwner := pathElements[0]
repo := pathElements[1]
newPath := path.Join(pathElements[2:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, repoOwner, repo, "", newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 2")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, repoOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
return
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: false,
ServeRaw: true,
TargetOwner: pathElements[0],
TargetRepo: pathElements[1],
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with default branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusNotFound)
}
html.ReturnErrorPage(ctx, fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", repoOwner, repo), http.StatusNotFound)
return
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
@ -164,17 +161,17 @@ func Handler(mainDomainSuffix, rawDomain string,
}
log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
branch := pathElements[1][1:]
newPath := path.Join(pathElements[2:]...)
repo := pathElements[0]
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 3")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: pathElements[0],
TargetBranch: pathElements[1][1:],
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve with specified repo and branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo),
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusFailedDependency)
}
return
@ -184,17 +181,17 @@ func Handler(mainDomainSuffix, rawDomain string,
// example.codeberg.page/@main/index.html
if strings.HasPrefix(pathElements[0], "@") {
log.Debug().Msg("main domain preparations, now trying with specified branch")
branch := pathElements[0][1:]
repo := "pages"
newPath := path.Join(pathElements[1:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
log.Debug().Msg("tryBranch, now trying upstream 4")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: "pages",
TargetBranch: pathElements[0][1:],
TargetPath: path.Join(pathElements[1:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve default pages repo with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"),
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusFailedDependency)
}
return
@ -205,14 +202,14 @@ func Handler(mainDomainSuffix, rawDomain string,
// example.codeberg.page/pages/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != "pages" {
repo := pathElements[0]
branch := "pages"
newPath := path.Join(pathElements[1:]...)
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, false); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: pathElements[0],
TargetBranch: "pages",
TargetPath: path.Join(pathElements[1:]...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
}
@ -220,13 +217,13 @@ func Handler(mainDomainSuffix, rawDomain string,
// Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch")
newPath := path.Join(pathElements...)
repo := "pages"
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, "", newPath, false); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: "pages",
TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}
@ -256,11 +253,14 @@ func Handler(mainDomainSuffix, rawDomain string,
}
// Try to use the given repo on the given branch or the default branch
newPath := path.Join(pathParts...)
log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, targetBranch, newPath, canonicalLink); works {
targetOptions.BranchTimestamp = timestampBranch.Timestamp
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, timestampBranch.Branch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TargetOwner: targetOwner,
TargetRepo: targetRepo,
TargetBranch: targetBranch,
TargetPath: path.Join(pathParts...),
}, canonicalLink); works {
canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOpt.TargetOwner, targetOpt.TargetRepo, targetOpt.TargetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache)
if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return
@ -268,7 +268,7 @@ func Handler(mainDomainSuffix, rawDomain string,
// only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache)
if targetOwner != "" {
ctx.Redirect("https://"+canonicalDomain+string(newPath), http.StatusTemporaryRedirect)
ctx.Redirect("https://"+canonicalDomain+string(targetOpt.TargetPath), http.StatusTemporaryRedirect)
return
}
@ -277,8 +277,7 @@ func Handler(mainDomainSuffix, rawDomain string,
}
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost,
targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
return
}

View file

@ -16,18 +16,15 @@ import (
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, trimmedHost string,
targetOptions *upstream.Options,
targetOwner, targetRepo, targetBranch, targetPath string,
options *upstream.Options,
canonicalDomainCache cache.SetGetKey,
) {
// check if a canonical domain exists on a request on MainDomain
if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, "", string(mainDomainSuffix), canonicalDomainCache)
canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, options.TargetOwner, options.TargetRepo, options.TargetBranch, "", string(mainDomainSuffix), canonicalDomainCache)
if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) {
canonicalPath := ctx.Req.RequestURI
if targetRepo != "pages" {
if options.TargetRepo != "pages" {
path := strings.SplitN(canonicalPath, "/", 3)
if len(path) >= 3 {
canonicalPath = "/" + path[2]
@ -38,14 +35,11 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
}
}
targetOptions.TargetOwner = targetOwner
targetOptions.TargetRepo = targetRepo
targetOptions.TargetBranch = targetBranch
targetOptions.TargetPath = targetPath
targetOptions.Host = string(trimmedHost)
// add host for debugging
options.Host = string(trimmedHost)
// Try to request the file from the Gitea API
if !targetOptions.Upstream(ctx, giteaClient) {
if !options.Upstream(ctx, giteaClient) {
html.ReturnErrorPage(ctx, "", ctx.StatusCode)
}
}
@ -53,20 +47,20 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty,
// it will also disallow search indexing and add a Link header to the canonical URL.
func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
repoOwner, repoName, branch, path string, canonicalLink bool,
) (*gitea.BranchTimestamp, bool) {
if repoName == "" {
log.Debug().Msg("tryBranch: repo is empty")
targetOptions *upstream.Options, canonicalLink bool,
) (*upstream.Options, bool) {
if targetOptions.TargetOwner == "" || targetOptions.TargetRepo == "" {
log.Debug().Msg("tryBranch: owner or repo is empty")
return nil, false
}
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
branch = strings.ReplaceAll(branch, "~", "/")
targetOptions.TargetBranch = strings.ReplaceAll(targetOptions.TargetBranch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch)
if branchTimestampResult == nil {
branchExist, _ := targetOptions.GetBranchTimestamp(giteaClient)
if !branchExist {
log.Debug().Msg("tryBranch: branch doesn't exist")
return nil, false
}
@ -74,11 +68,9 @@ func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie
if canonicalLink {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link",
giteaClient.ContentWebLink(repoOwner, repoName, branchTimestampResult.Branch, path)+
"; rel=\"canonical\"")
ctx.RespWriter.Header().Set("Link", targetOptions.ContentWebLink(giteaClient)+"; rel=\"canonical\"")
}
log.Debug().Msg("tryBranch: true")
return branchTimestampResult, true
return targetOptions, true
}

View file

@ -2,35 +2,46 @@ package upstream
import (
"errors"
"fmt"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/server/gitea"
)
// GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
// (or nil if the branch doesn't exist)
func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string) *gitea.BranchTimestamp {
log := log.With().Strs("BranchInfo", []string{owner, repo, branch}).Logger()
// GetBranchTimestamp finds the default branch (if branch is "") and save branch and it's last modification time to Options
func (o *Options) GetBranchTimestamp(giteaClient *gitea.Client) (bool, error) {
log := log.With().Strs("BranchInfo", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch}).Logger()
if len(branch) == 0 {
if len(o.TargetBranch) == 0 {
// Get default branch
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(owner, repo)
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(o.TargetOwner, o.TargetRepo)
if err != nil {
log.Err(err).Msg("Could't fetch default branch from repository")
return nil
return false, err
}
log.Debug().Msgf("Succesfully fetched default branch %q from Gitea", defaultBranch)
branch = defaultBranch
o.TargetBranch = defaultBranch
}
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, branch)
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(o.TargetOwner, o.TargetRepo, o.TargetBranch)
if err != nil {
if !errors.Is(err, gitea.ErrorNotFound) {
log.Error().Err(err).Msg("Could not get latest commit's timestamp from branch")
}
return nil
return false, err
}
if timestamp == nil || timestamp.Branch == "" {
return false, fmt.Errorf("empty response")
}
log.Debug().Msgf("Succesfully fetched latest commit's timestamp from branch: %#v", timestamp)
return timestamp
o.BranchTimestamp = timestamp.Timestamp
o.TargetBranch = timestamp.Branch
return true, nil
}
func (o *Options) ContentWebLink(giteaClient *gitea.Client) string {
return giteaClient.ContentWebLink(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath) + "; rel=\"canonical\""
}

View file

@ -34,10 +34,10 @@ var upstreamNotFoundPages = []string{
// Options provides various options for the upstream request.
type Options struct {
TargetOwner,
TargetRepo,
TargetBranch,
TargetPath,
TargetOwner string
TargetRepo string
TargetBranch string
TargetPath string
// Used for debugging purposes.
Host string
@ -62,16 +62,22 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
// Check if the branch exists and when it was modified
if o.BranchTimestamp.IsZero() {
branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch)
if branch == nil || branch.Branch == "" {
branchExist, err := o.GetBranchTimestamp(giteaClient)
// handle 404
if err != nil && errors.Is(err, gitea.ErrorNotFound) || !branchExist {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch %q", o.TargetBranch),
fmt.Sprintf("branch %q for '%s/%s' not found", o.TargetBranch, o.TargetOwner, o.TargetRepo),
http.StatusNotFound)
return true
}
// handle unexpected errors
if err != nil {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch %q: %v", o.TargetBranch, err),
http.StatusFailedDependency)
return true
}
o.TargetBranch = branch.Branch
o.BranchTimestamp = branch.Timestamp
}
// Check if the browser has a cached version