mirror of
https://codeberg.org/Codeberg/pages-server.git
synced 2024-11-18 10:29:43 +00:00
Implement SEO optimizations and improve error handling and branch detection
This commit is contained in:
parent
c4bae34b71
commit
8ead10c82e
2 changed files with 111 additions and 71 deletions
160
handler.go
160
handler.go
|
@ -63,16 +63,58 @@ func handler(ctx *fasthttp.RequestCtx) {
|
||||||
TryIndexPages: true,
|
TryIndexPages: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will
|
||||||
|
// also disallow search indexing and add a Link header to the canonical URL.
|
||||||
|
var tryBranch = func(repo string, branch string, path []string, canonicalLink string) bool {
|
||||||
|
if repo == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
fmt.Printf("Trying branch: %s/%s/%s with path %v\n", targetOwner, repo, branch, path)
|
||||||
|
|
||||||
|
escapedBranch, _ := url.PathUnescape(branch)
|
||||||
|
if escapedBranch == "" {
|
||||||
|
escapedBranch = branch
|
||||||
|
}
|
||||||
|
// Check if the branch exists, otherwise treat it as a file path
|
||||||
|
targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, repo, branch)
|
||||||
|
fmt.Printf("Branch %s has timestamp %v\n", targetBranch, targetOptions.BranchTimestamp)
|
||||||
|
if targetOptions.BranchTimestamp != (time.Time{}) {
|
||||||
|
// Branch exists, use it
|
||||||
|
targetRepo = repo
|
||||||
|
targetPath = strings.Trim(strings.Join(path, "/"), "/")
|
||||||
|
|
||||||
|
if canonicalLink != "" {
|
||||||
|
// Hide from search machines & add canonical link
|
||||||
|
ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex")
|
||||||
|
ctx.Response.Header.Set("Link",
|
||||||
|
strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+
|
||||||
|
"; rel=\"canonical\"",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
} else {
|
||||||
|
// branch doesn't exist
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
|
||||||
|
var tryUpstream = func() {
|
||||||
|
// Try to request the file from the Gitea API
|
||||||
|
if !upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath, targetOptions) {
|
||||||
|
returnErrorPage(ctx, ctx.Response.StatusCode())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if RawDomain != nil && bytes.Equal(ctx.Request.Host(), RawDomain) {
|
if RawDomain != nil && bytes.Equal(ctx.Request.Host(), RawDomain) {
|
||||||
// Serve raw content from RawDomain
|
// Serve raw content from RawDomain
|
||||||
|
|
||||||
// TODO: add canonical link and "X-Robots-Tag: noarchive, noindex"
|
|
||||||
|
|
||||||
targetOptions.TryIndexPages = false
|
targetOptions.TryIndexPages = false
|
||||||
targetOptions.ForbiddenMimeTypes["text/html"] = struct{}{}
|
targetOptions.ForbiddenMimeTypes["text/html"] = struct{}{}
|
||||||
targetOptions.DefaultMimeType = "text/plain; charset=utf-8"
|
targetOptions.DefaultMimeType = "text/plain; charset=utf-8"
|
||||||
|
|
||||||
pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 4)
|
pathElements := strings.Split(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/")
|
||||||
if len(pathElements) < 2 {
|
if len(pathElements) < 2 {
|
||||||
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
|
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
|
||||||
ctx.Redirect(RawInfoPage, fasthttp.StatusTemporaryRedirect)
|
ctx.Redirect(RawInfoPage, fasthttp.StatusTemporaryRedirect)
|
||||||
|
@ -80,46 +122,74 @@ func handler(ctx *fasthttp.RequestCtx) {
|
||||||
}
|
}
|
||||||
targetOwner = pathElements[0]
|
targetOwner = pathElements[0]
|
||||||
targetRepo = pathElements[1]
|
targetRepo = pathElements[1]
|
||||||
if len(pathElements) > 3 {
|
|
||||||
targetPath = strings.Trim(pathElements[2]+"/"+pathElements[3], "/")
|
|
||||||
} else if len(pathElements) > 2 {
|
|
||||||
targetPath = pathElements[2]
|
|
||||||
}
|
|
||||||
|
|
||||||
// raw.codeberg.page/example/myrepo/@main/index.html
|
// raw.codeberg.page/example/myrepo/@main/index.html
|
||||||
if len(pathElements) > 3 && strings.HasPrefix(pathElements[2], "@") {
|
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
|
||||||
branch, _ := url.PathUnescape(pathElements[2][1:])
|
if tryBranch(targetRepo, pathElements[2][1:], pathElements[3:],
|
||||||
if branch == "" {
|
string(GiteaRoot)+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p",
|
||||||
branch = pathElements[2][1:]
|
) {
|
||||||
|
tryUpstream()
|
||||||
|
return
|
||||||
}
|
}
|
||||||
// Check if the branch exists, otherwise treat it as a file path
|
returnErrorPage(ctx, fasthttp.StatusFailedDependency)
|
||||||
targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, branch)
|
return
|
||||||
if targetOptions.BranchTimestamp != (time.Time{}) {
|
|
||||||
targetPath = strings.Trim(pathElements[3], "/") // branch exists, use it
|
|
||||||
} else {
|
} else {
|
||||||
targetBranch = "" // branch doesn't exist, use default branch
|
tryBranch(targetRepo, "", pathElements[2:],
|
||||||
}
|
string(GiteaRoot)+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p",
|
||||||
|
)
|
||||||
|
tryUpstream()
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if bytes.HasSuffix(ctx.Request.Host(), MainDomainSuffix) {
|
} else if bytes.HasSuffix(ctx.Request.Host(), MainDomainSuffix) {
|
||||||
// Serve pages from subdomains of MainDomainSuffix
|
// Serve pages from subdomains of MainDomainSuffix
|
||||||
|
|
||||||
// TODO: add @branch syntax with "X-Robots-Tag: noarchive, noindex"
|
pathElements := strings.Split(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/")
|
||||||
|
|
||||||
pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 2)
|
|
||||||
targetOwner = string(bytes.TrimSuffix(ctx.Request.Host(), MainDomainSuffix))
|
targetOwner = string(bytes.TrimSuffix(ctx.Request.Host(), MainDomainSuffix))
|
||||||
targetRepo = pathElements[0]
|
targetRepo = pathElements[0]
|
||||||
if len(pathElements) > 1 {
|
targetPath = strings.Trim(strings.Join(pathElements[1:], "/"), "/")
|
||||||
targetPath = strings.Trim(pathElements[1], "/")
|
|
||||||
|
// Check if the first directory is a repo with the second directory as a branch
|
||||||
|
// example.codeberg.page/myrepo/@main/index.html
|
||||||
|
if len(pathElements) > 1 && strings.HasPrefix(pathElements[1], "@") {
|
||||||
|
if tryBranch(pathElements[0], pathElements[1][1:], pathElements[2:],
|
||||||
|
"/"+pathElements[0]+"/%p",
|
||||||
|
) {
|
||||||
|
tryUpstream()
|
||||||
|
} else {
|
||||||
|
returnErrorPage(ctx, fasthttp.StatusFailedDependency)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the first directory is a branch for the "pages" repo
|
||||||
|
// example.codeberg.page/@main/index.html
|
||||||
|
if strings.HasPrefix(pathElements[0], "@") {
|
||||||
|
if tryBranch("pages", pathElements[0][1:], pathElements[1:], "/%p") {
|
||||||
|
tryUpstream()
|
||||||
|
} else {
|
||||||
|
returnErrorPage(ctx, fasthttp.StatusFailedDependency)
|
||||||
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the first directory is a repo with a "pages" branch
|
// Check if the first directory is a repo with a "pages" branch
|
||||||
targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, "pages")
|
// example.codeberg.page/myrepo/index.html
|
||||||
if targetOptions.BranchTimestamp == (time.Time{}) {
|
if tryBranch(pathElements[0], "pages", pathElements[1:], "") {
|
||||||
targetRepo = "pages"
|
tryUpstream()
|
||||||
targetBranch = ""
|
return
|
||||||
targetPath = strings.Trim(pathElements[0]+"/"+targetPath, "/")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to use the "pages" repo on its default branch
|
||||||
|
// example.codeberg.page/index.html
|
||||||
|
if tryBranch("pages", "", pathElements, "") {
|
||||||
|
tryUpstream()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Couldn't find a valid repo/branch
|
||||||
|
returnErrorPage(ctx, fasthttp.StatusFailedDependency)
|
||||||
|
return
|
||||||
} else {
|
} else {
|
||||||
// Serve pages from external domains
|
// Serve pages from external domains
|
||||||
|
|
||||||
|
@ -129,23 +199,6 @@ func handler(ctx *fasthttp.RequestCtx) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if a username can't exist because it's reserved (we'd risk to hit a Gitea route in that case)
|
|
||||||
if _, ok := ReservedUsernames[targetOwner]; ok {
|
|
||||||
returnErrorPage(ctx, fasthttp.StatusForbidden)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for blob path
|
|
||||||
if strings.HasPrefix(targetPath, "blob/") {
|
|
||||||
returnErrorPage(ctx, fasthttp.StatusForbidden)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to request the file from the Gitea API
|
|
||||||
if !upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath, targetOptions) {
|
|
||||||
returnErrorPage(ctx, ctx.Response.StatusCode())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced
|
// returnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced
|
||||||
|
@ -153,7 +206,7 @@ func handler(ctx *fasthttp.RequestCtx) {
|
||||||
func returnErrorPage(ctx *fasthttp.RequestCtx, code int) {
|
func returnErrorPage(ctx *fasthttp.RequestCtx, code int) {
|
||||||
ctx.Response.SetStatusCode(code)
|
ctx.Response.SetStatusCode(code)
|
||||||
ctx.Response.Header.SetContentType("text/html; charset=utf-8")
|
ctx.Response.Header.SetContentType("text/html; charset=utf-8")
|
||||||
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code) + " " + fasthttp.StatusMessage(code))))
|
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+fasthttp.StatusMessage(code))))
|
||||||
}
|
}
|
||||||
|
|
||||||
// getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
|
// getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch
|
||||||
|
@ -163,8 +216,9 @@ func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string,
|
||||||
branchWithFallback = branch
|
branchWithFallback = branch
|
||||||
if branch == "" {
|
if branch == "" {
|
||||||
var body = make([]byte, 0)
|
var body = make([]byte, 0)
|
||||||
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo, 10*time.Second)
|
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo), 10*time.Second)
|
||||||
if err != nil || status != 200 {
|
if err != nil || status != 200 {
|
||||||
|
fmt.Printf("Default branch request to Gitea API failed with status code %d and error %s\n", status, err)
|
||||||
branchWithFallback = ""
|
branchWithFallback = ""
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -173,8 +227,9 @@ func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string,
|
||||||
}
|
}
|
||||||
|
|
||||||
var body = make([]byte, 0)
|
var body = make([]byte, 0)
|
||||||
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch, 10*time.Second)
|
status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo)+"/branches/"+url.PathEscape(branch), 10*time.Second)
|
||||||
if err != nil || status != 200 {
|
if err != nil || status != 200 {
|
||||||
|
fmt.Printf("Branch info request to Gitea API failed with status code %d and error %s\n", status, err)
|
||||||
branchWithFallback = ""
|
branchWithFallback = ""
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -196,12 +251,17 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t
|
||||||
|
|
||||||
// Handle repositories with no/broken pages setup
|
// Handle repositories with no/broken pages setup
|
||||||
if options.BranchTimestamp == (time.Time{}) || targetBranch == "" {
|
if options.BranchTimestamp == (time.Time{}) || targetBranch == "" {
|
||||||
ctx.Response.SetStatusCode(fasthttp.StatusNotFound)
|
ctx.Response.SetStatusCode(fasthttp.StatusFailedDependency)
|
||||||
ctx.Response.Header.SetContentType("text/html; charset=utf-8")
|
ctx.Response.Header.SetContentType("text/html; charset=utf-8")
|
||||||
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte("pages not set up for this repo")))
|
ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte("pages not set up for this repo")))
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if targetOwner == "" || targetRepo == "" || targetBranch == "" {
|
||||||
|
returnErrorPage(ctx, fasthttp.StatusBadRequest)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Check if the browser has a cached version
|
// Check if the browser has a cached version
|
||||||
if ifModifiedSince, err := time.Parse(time.RFC1123, string(ctx.Request.Header.Peek("If-Modified-Since"))); err == nil {
|
if ifModifiedSince, err := time.Parse(time.RFC1123, string(ctx.Request.Header.Peek("If-Modified-Since"))); err == nil {
|
||||||
if !ifModifiedSince.Before(options.BranchTimestamp) {
|
if !ifModifiedSince.Before(options.BranchTimestamp) {
|
||||||
|
@ -212,7 +272,7 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t
|
||||||
|
|
||||||
// Make a GET request to the upstream URL
|
// Make a GET request to the upstream URL
|
||||||
req := fasthttp.AcquireRequest()
|
req := fasthttp.AcquireRequest()
|
||||||
req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath)
|
req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + url.PathEscape(targetOwner) + "/" + url.PathEscape(targetRepo) + "/raw/" + url.PathEscape(targetBranch) + "/" + url.PathEscape(targetPath))
|
||||||
res := fasthttp.AcquireResponse()
|
res := fasthttp.AcquireResponse()
|
||||||
err := fasthttp.DoTimeout(req, res, 10*time.Second)
|
err := fasthttp.DoTimeout(req, res, 10*time.Second)
|
||||||
|
|
||||||
|
|
20
main.go
20
main.go
|
@ -20,7 +20,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
_ "embed"
|
_ "embed"
|
||||||
|
@ -67,14 +66,6 @@ var IndexPages = []string{
|
||||||
"index.html",
|
"index.html",
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReservedUsernames specifies the usernames that are reserved by Gitea and thus may not be used as owner names.
|
|
||||||
// The contents are taken from https://github.com/go-gitea/gitea/blob/master/models/user.go#L783; reserved names with
|
|
||||||
// dots are removed as they are forbidden for Codeberg Pages anyways.
|
|
||||||
var ReservedUsernames = createLookupMapFromWords(`
|
|
||||||
admin api assets attachments avatars captcha commits debug error explore ghost help install issues less login metrics milestones new notifications org plugins pulls raw repo search stars template user
|
|
||||||
|
|
||||||
`)
|
|
||||||
|
|
||||||
// main sets up and starts the web server.
|
// main sets up and starts the web server.
|
||||||
func main() {
|
func main() {
|
||||||
// Make sure MainDomain has a trailing dot, and GiteaRoot has no trailing slash
|
// Make sure MainDomain has a trailing dot, and GiteaRoot has no trailing slash
|
||||||
|
@ -122,14 +113,3 @@ func envOr(env string, or string) string {
|
||||||
}
|
}
|
||||||
return or
|
return or
|
||||||
}
|
}
|
||||||
|
|
||||||
func createLookupMapFromWords(input string) map[string]struct{} {
|
|
||||||
var res = map[string]struct{}{}
|
|
||||||
input = strings.NewReplacer("\t", " ", "\n", " ", "\r", " ").Replace(input)
|
|
||||||
for _, word := range strings.Split(input, " ") {
|
|
||||||
if len(word) > 0 {
|
|
||||||
res[word] = struct{}{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue