diff --git a/html/error.go b/html/error.go index 325dada..572357b 100644 --- a/html/error.go +++ b/html/error.go @@ -2,23 +2,26 @@ package html import ( "bytes" + "net/http" "strconv" - - "github.com/valyala/fasthttp" ) -// ReturnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced -// with the provided status code. -func ReturnErrorPage(ctx *fasthttp.RequestCtx, code int) { - ctx.Response.SetStatusCode(code) - ctx.Response.Header.SetContentType("text/html; charset=utf-8") - message := fasthttp.StatusMessage(code) - if code == fasthttp.StatusMisdirectedRequest { +func errorMessage(statusCode int) string { + message := http.StatusText(statusCode) + + switch statusCode { + case http.StatusMisdirectedRequest: message += " - domain not specified in .domains file" - } - if code == fasthttp.StatusFailedDependency { + case http.StatusFailedDependency: message += " - target repo/branch doesn't exist or is private" } - // TODO: use template engine? - ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+message))) + + return message +} + +// TODO: use template engine? +func errorBody(statusCode int) []byte { + return bytes.ReplaceAll(NotFoundPage, + []byte("%status"), + []byte(strconv.Itoa(statusCode)+" "+errorMessage(statusCode))) } diff --git a/html/error_fasthttp.go b/html/error_fasthttp.go new file mode 100644 index 0000000..46e96e8 --- /dev/null +++ b/html/error_fasthttp.go @@ -0,0 +1,20 @@ +//go:build fasthttp + +package html + +import ( + "bytes" + "strconv" + + "github.com/valyala/fasthttp" +) + +// ReturnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced +// with the provided status code. +func ReturnErrorPage(ctx *fasthttp.RequestCtx, code int) { + ctx.Response.SetStatusCode(code) + ctx.Response.Header.SetContentType("text/html; charset=utf-8") + + // TODO: use template engine? + ctx.Response.SetBody(errorBody(code)) +} diff --git a/html/error_std.go b/html/error_std.go new file mode 100644 index 0000000..04bfb06 --- /dev/null +++ b/html/error_std.go @@ -0,0 +1,18 @@ +//go:build !fasthttp + +package html + +import ( + "bytes" + "io" + "net/http" +) + +// ReturnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced +// with the provided status code. +func ReturnErrorPage(resp *http.Response, code int) { + resp.StatusCode = code + resp.Header.Set("Content-Type", "text/html; charset=utf-8") + + resp.Body = io.NopCloser(bytes.NewReader(errorBody(code))) +} diff --git a/server/handler.go b/server/handler.go index 11da0a5..44c379a 100644 --- a/server/handler.go +++ b/server/handler.go @@ -1,3 +1,5 @@ +//go:build !fasthttp + package server import ( diff --git a/server/handler_fasthttp.go b/server/handler_fasthttp.go new file mode 100644 index 0000000..45b944f --- /dev/null +++ b/server/handler_fasthttp.go @@ -0,0 +1,307 @@ +//go:build fasthttp + +package server + +import ( + "bytes" + "strings" + + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + "github.com/valyala/fasthttp" + + "codeberg.org/codeberg/pages/html" + "codeberg.org/codeberg/pages/server/cache" + "codeberg.org/codeberg/pages/server/dns" + "codeberg.org/codeberg/pages/server/gitea" + "codeberg.org/codeberg/pages/server/upstream" + "codeberg.org/codeberg/pages/server/utils" + "codeberg.org/codeberg/pages/server/version" +) + +// Handler handles a single HTTP request to the web server. +func Handler(mainDomainSuffix, rawDomain []byte, + giteaClient *gitea.Client, + giteaRoot, rawInfoPage string, + blacklistedPaths, allowedCorsDomains [][]byte, + dnsLookupCache, canonicalDomainCache, branchTimestampCache, fileResponseCache cache.SetGetKey, +) func(ctx *fasthttp.RequestCtx) { + return func(ctx *fasthttp.RequestCtx) { + log := log.With().Str("Handler", string(ctx.Request.Header.RequestURI())).Logger() + + ctx.Response.Header.Set("Server", "CodebergPages/"+version.Version) + + // Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin + ctx.Response.Header.Set("Referrer-Policy", "strict-origin-when-cross-origin") + + // Enable browser caching for up to 10 minutes + ctx.Response.Header.Set("Cache-Control", "public, max-age=600") + + trimmedHost := utils.TrimHostPort(ctx.Request.Host()) + + // Add HSTS for RawDomain and MainDomainSuffix + if hsts := GetHSTSHeader(trimmedHost, mainDomainSuffix, rawDomain); hsts != "" { + ctx.Response.Header.Set("Strict-Transport-Security", hsts) + } + + // Block all methods not required for static pages + if !ctx.IsGet() && !ctx.IsHead() && !ctx.IsOptions() { + ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") + ctx.Error("Method not allowed", fasthttp.StatusMethodNotAllowed) + return + } + + // Block blacklisted paths (like ACME challenges) + for _, blacklistedPath := range blacklistedPaths { + if bytes.HasPrefix(ctx.Path(), blacklistedPath) { + html.ReturnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + } + + // Allow CORS for specified domains + allowCors := false + for _, allowedCorsDomain := range allowedCorsDomains { + if bytes.Equal(trimmedHost, allowedCorsDomain) { + allowCors = true + break + } + } + if allowCors { + ctx.Response.Header.Set("Access-Control-Allow-Origin", "*") + ctx.Response.Header.Set("Access-Control-Allow-Methods", "GET, HEAD") + } + ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") + if ctx.IsOptions() { + ctx.Response.Header.SetStatusCode(fasthttp.StatusNoContent) + return + } + + // Prepare request information to Gitea + var targetOwner, targetRepo, targetBranch, targetPath string + targetOptions := &upstream.Options{ + TryIndexPages: true, + } + + // tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will + // also disallow search indexing and add a Link header to the canonical URL. + tryBranch := func(log zerolog.Logger, repo, branch string, path []string, canonicalLink string) bool { + if repo == "" { + log.Debug().Msg("tryBranch: repo == ''") + return false + } + + // Replace "~" to "/" so we can access branch that contains slash character + // Branch name cannot contain "~" so doing this is okay + branch = strings.ReplaceAll(branch, "~", "/") + + // Check if the branch exists, otherwise treat it as a file path + branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch, branchTimestampCache) + if branchTimestampResult == nil { + log.Debug().Msg("tryBranch: branch doesn't exist") + return false + } + + // Branch exists, use it + targetRepo = repo + targetPath = strings.Trim(strings.Join(path, "/"), "/") + targetBranch = branchTimestampResult.Branch + + targetOptions.BranchTimestamp = branchTimestampResult.Timestamp + + if canonicalLink != "" { + // Hide from search machines & add canonical link + ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex") + ctx.Response.Header.Set("Link", + strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+ + "; rel=\"canonical\"", + ) + } + + log.Debug().Msg("tryBranch: true") + return true + } + + log.Debug().Msg("preparations") + if rawDomain != nil && bytes.Equal(trimmedHost, rawDomain) { + // Serve raw content from RawDomain + log.Debug().Msg("raw domain") + + targetOptions.TryIndexPages = false + if targetOptions.ForbiddenMimeTypes == nil { + targetOptions.ForbiddenMimeTypes = make(map[string]bool) + } + targetOptions.ForbiddenMimeTypes["text/html"] = true + targetOptions.DefaultMimeType = "text/plain; charset=utf-8" + + pathElements := strings.Split(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/") + if len(pathElements) < 2 { + // https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required + ctx.Redirect(rawInfoPage, fasthttp.StatusTemporaryRedirect) + return + } + targetOwner = pathElements[0] + targetRepo = pathElements[1] + + // raw.codeberg.org/example/myrepo/@main/index.html + if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") { + log.Debug().Msg("raw domain preparations, now trying with specified branch") + if tryBranch(log, + targetRepo, pathElements[2][1:], pathElements[3:], + giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p", + ) { + log.Debug().Msg("tryBranch, now trying upstream 1") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + return + } + log.Debug().Msg("missing branch") + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + return + } + + log.Debug().Msg("raw domain preparations, now trying with default branch") + tryBranch(log, + targetRepo, "", pathElements[2:], + giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p", + ) + log.Debug().Msg("tryBranch, now trying upstream 2") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + return + + } else if bytes.HasSuffix(trimmedHost, mainDomainSuffix) { + // Serve pages from subdomains of MainDomainSuffix + log.Debug().Msg("main domain suffix") + + pathElements := strings.Split(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/") + targetOwner = string(bytes.TrimSuffix(trimmedHost, mainDomainSuffix)) + targetRepo = pathElements[0] + targetPath = strings.Trim(strings.Join(pathElements[1:], "/"), "/") + + if targetOwner == "www" { + // www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname? + ctx.Redirect("https://"+string(mainDomainSuffix[1:])+string(ctx.Path()), fasthttp.StatusPermanentRedirect) + return + } + + // Check if the first directory is a repo with the second directory as a branch + // example.codeberg.page/myrepo/@main/index.html + if len(pathElements) > 1 && strings.HasPrefix(pathElements[1], "@") { + if targetRepo == "pages" { + // example.codeberg.org/pages/@... redirects to example.codeberg.org/@... + ctx.Redirect("/"+strings.Join(pathElements[1:], "/"), fasthttp.StatusTemporaryRedirect) + return + } + + log.Debug().Msg("main domain preparations, now trying with specified repo & branch") + if tryBranch(log, + pathElements[0], pathElements[1][1:], pathElements[2:], + "/"+pathElements[0]+"/%p", + ) { + log.Debug().Msg("tryBranch, now trying upstream 3") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + } else { + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + } + return + } + + // Check if the first directory is a branch for the "pages" repo + // example.codeberg.page/@main/index.html + if strings.HasPrefix(pathElements[0], "@") { + log.Debug().Msg("main domain preparations, now trying with specified branch") + if tryBranch(log, + "pages", pathElements[0][1:], pathElements[1:], "/%p") { + log.Debug().Msg("tryBranch, now trying upstream 4") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + } else { + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + } + return + } + + // Check if the first directory is a repo with a "pages" branch + // example.codeberg.page/myrepo/index.html + // example.codeberg.page/pages/... is not allowed here. + log.Debug().Msg("main domain preparations, now trying with specified repo") + if pathElements[0] != "pages" && tryBranch(log, + pathElements[0], "pages", pathElements[1:], "") { + log.Debug().Msg("tryBranch, now trying upstream 5") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + return + } + + // Try to use the "pages" repo on its default branch + // example.codeberg.page/index.html + log.Debug().Msg("main domain preparations, now trying with default repo/branch") + if tryBranch(log, + "pages", "", pathElements, "") { + log.Debug().Msg("tryBranch, now trying upstream 6") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + return + } + + // Couldn't find a valid repo/branch + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + return + } else { + trimmedHostStr := string(trimmedHost) + + // Serve pages from external domains + targetOwner, targetRepo, targetBranch = dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache) + if targetOwner == "" { + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + return + } + + pathElements := strings.Split(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/") + canonicalLink := "" + if strings.HasPrefix(pathElements[0], "@") { + targetBranch = pathElements[0][1:] + pathElements = pathElements[1:] + canonicalLink = "/%p" + } + + // Try to use the given repo on the given branch or the default branch + log.Debug().Msg("custom domain preparations, now trying with details from DNS") + if tryBranch(log, + targetRepo, targetBranch, pathElements, canonicalLink) { + canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) + if !valid { + html.ReturnErrorPage(ctx, fasthttp.StatusMisdirectedRequest) + return + } else if canonicalDomain != trimmedHostStr { + // only redirect if the target is also a codeberg page! + targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache) + if targetOwner != "" { + ctx.Redirect("https://"+canonicalDomain+string(ctx.RequestURI()), fasthttp.StatusTemporaryRedirect) + return + } + + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + return + } + + log.Debug().Msg("tryBranch, now trying upstream 7") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, targetBranch, targetPath, + canonicalDomainCache, branchTimestampCache, fileResponseCache) + return + } + + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + return + } + } +} diff --git a/server/handler_test.go b/server/handler_test.go index 23d9af5..98c62d3 100644 --- a/server/handler_test.go +++ b/server/handler_test.go @@ -1,3 +1,5 @@ +//go:build !fasthttp + package server import ( diff --git a/server/handler_test_fasthttp.go b/server/handler_test_fasthttp.go new file mode 100644 index 0000000..6172456 --- /dev/null +++ b/server/handler_test_fasthttp.go @@ -0,0 +1,53 @@ +//go:build fasthttp + +package server + +import ( + "fmt" + "testing" + "time" + + "github.com/valyala/fasthttp" + + "codeberg.org/codeberg/pages/server/cache" + "codeberg.org/codeberg/pages/server/gitea" +) + +func TestHandlerPerformance(t *testing.T) { + giteaRoot := "https://codeberg.org" + giteaClient, _ := gitea.NewClient(giteaRoot, "") + testHandler := Handler( + []byte("codeberg.page"), []byte("raw.codeberg.org"), + giteaClient, + giteaRoot, "https://docs.codeberg.org/pages/raw-content/", + [][]byte{[]byte("/.well-known/acme-challenge/")}, + [][]byte{[]byte("raw.codeberg.org"), []byte("fonts.codeberg.org"), []byte("design.codeberg.org")}, + cache.NewKeyValueCache(), + cache.NewKeyValueCache(), + cache.NewKeyValueCache(), + cache.NewKeyValueCache(), + ) + + testCase := func(uri string, status int) { + ctx := &fasthttp.RequestCtx{ + Request: *fasthttp.AcquireRequest(), + Response: *fasthttp.AcquireResponse(), + } + ctx.Request.SetRequestURI(uri) + fmt.Printf("Start: %v\n", time.Now()) + start := time.Now() + testHandler(ctx) + end := time.Now() + fmt.Printf("Done: %v\n", time.Now()) + if ctx.Response.StatusCode() != status { + t.Errorf("request failed with status code %d", ctx.Response.StatusCode()) + } else { + t.Logf("request took %d milliseconds", end.Sub(start).Milliseconds()) + } + } + + testCase("https://mondstern.codeberg.page/", 424) // TODO: expect 200 + testCase("https://mondstern.codeberg.page/", 424) // TODO: expect 200 + testCase("https://example.momar.xyz/", 424) // TODO: expect 200 + testCase("https://codeberg.page/", 424) // TODO: expect 200 +} diff --git a/server/setup.go b/server/setup.go index 027b3e1..9a332aa 100644 --- a/server/setup.go +++ b/server/setup.go @@ -1,3 +1,5 @@ +//go:build !fasthttp + package server import ( diff --git a/server/setup_fasthttp.go b/server/setup_fasthttp.go new file mode 100644 index 0000000..8269e8a --- /dev/null +++ b/server/setup_fasthttp.go @@ -0,0 +1,46 @@ +//go:build fasthttp + +package server + +import ( + "bytes" + "net/http" + "time" + + "github.com/valyala/fasthttp" + + "codeberg.org/codeberg/pages/server/cache" + "codeberg.org/codeberg/pages/server/utils" +) + +func SetupServer(handler fasthttp.RequestHandler) *fasthttp.Server { + // Enable compression by wrapping the handler with the compression function provided by FastHTTP + compressedHandler := fasthttp.CompressHandlerBrotliLevel(handler, fasthttp.CompressBrotliBestSpeed, fasthttp.CompressBestSpeed) + + return &fasthttp.Server{ + Handler: compressedHandler, + DisablePreParseMultipartForm: true, + NoDefaultServerHeader: true, + NoDefaultDate: true, + ReadTimeout: 30 * time.Second, // needs to be this high for ACME certificates with ZeroSSL & HTTP-01 challenge + } +} + +func SetupHTTPACMEChallengeServer(challengeCache cache.SetGetKey) *fasthttp.Server { + challengePath := []byte("/.well-known/acme-challenge/") + + return &fasthttp.Server{ + Handler: func(ctx *fasthttp.RequestCtx) { + if bytes.HasPrefix(ctx.Path(), challengePath) { + challenge, ok := challengeCache.Get(string(utils.TrimHostPort(ctx.Host())) + "/" + string(bytes.TrimPrefix(ctx.Path(), challengePath))) + if !ok || challenge == nil { + ctx.SetStatusCode(http.StatusNotFound) + ctx.SetBodyString("no challenge for this token") + } + ctx.SetBodyString(challenge.(string)) + } else { + ctx.Redirect("https://"+string(ctx.Host())+string(ctx.RequestURI()), http.StatusMovedPermanently) + } + }, + } +} diff --git a/server/try.go b/server/try.go index 254d3ec..4f3ea1f 100644 --- a/server/try.go +++ b/server/try.go @@ -1,3 +1,5 @@ +//go:build !fasthttp + package server import ( diff --git a/server/try_fasthttp.go b/server/try_fasthttp.go new file mode 100644 index 0000000..4b4d8f7 --- /dev/null +++ b/server/try_fasthttp.go @@ -0,0 +1,51 @@ +//go:build fasthttp + +package server + +import ( + "bytes" + "strings" + + "github.com/valyala/fasthttp" + + "codeberg.org/codeberg/pages/html" + "codeberg.org/codeberg/pages/server/cache" + "codeberg.org/codeberg/pages/server/gitea" + "codeberg.org/codeberg/pages/server/upstream" +) + +// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure. +func tryUpstream(ctx *fasthttp.RequestCtx, giteaClient *gitea.Client, + mainDomainSuffix, trimmedHost []byte, + + targetOptions *upstream.Options, + targetOwner, targetRepo, targetBranch, targetPath string, + + canonicalDomainCache, branchTimestampCache, fileResponseCache cache.SetGetKey, +) { + // check if a canonical domain exists on a request on MainDomain + if bytes.HasSuffix(trimmedHost, mainDomainSuffix) { + canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, "", string(mainDomainSuffix), canonicalDomainCache) + if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) { + canonicalPath := string(ctx.RequestURI()) + if targetRepo != "pages" { + path := strings.SplitN(canonicalPath, "/", 3) + if len(path) >= 3 { + canonicalPath = "/" + path[2] + } + } + ctx.Redirect("https://"+canonicalDomain+canonicalPath, fasthttp.StatusTemporaryRedirect) + return + } + } + + targetOptions.TargetOwner = targetOwner + targetOptions.TargetRepo = targetRepo + targetOptions.TargetBranch = targetBranch + targetOptions.TargetPath = targetPath + + // Try to request the file from the Gitea API + if !targetOptions.Upstream(ctx, giteaClient, branchTimestampCache, fileResponseCache) { + html.ReturnErrorPage(ctx, ctx.Response.StatusCode()) + } +}