2021-12-05 13:45:17 +00:00
package server
import (
"bytes"
"strings"
2022-06-11 21:02:06 +00:00
"github.com/rs/zerolog"
2021-12-05 14:02:44 +00:00
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
2021-12-05 13:45:17 +00:00
"codeberg.org/codeberg/pages/html"
2021-12-05 14:02:44 +00:00
"codeberg.org/codeberg/pages/server/cache"
2021-12-05 14:21:05 +00:00
"codeberg.org/codeberg/pages/server/dns"
2022-06-11 21:02:06 +00:00
"codeberg.org/codeberg/pages/server/gitea"
2021-12-05 13:47:33 +00:00
"codeberg.org/codeberg/pages/server/upstream"
2021-12-03 02:44:21 +00:00
"codeberg.org/codeberg/pages/server/utils"
2022-06-14 18:35:11 +00:00
"codeberg.org/codeberg/pages/server/version"
2021-12-05 13:45:17 +00:00
)
// Handler handles a single HTTP request to the web server.
2021-12-05 14:02:44 +00:00
func Handler ( mainDomainSuffix , rawDomain [ ] byte ,
2022-06-11 21:02:06 +00:00
giteaClient * gitea . Client ,
giteaRoot , rawInfoPage string ,
2021-12-05 14:02:44 +00:00
blacklistedPaths , allowedCorsDomains [ ] [ ] byte ,
2022-03-27 19:54:06 +00:00
dnsLookupCache , canonicalDomainCache , branchTimestampCache , fileResponseCache cache . SetGetKey ,
) func ( ctx * fasthttp . RequestCtx ) {
2021-12-05 13:45:17 +00:00
return func ( ctx * fasthttp . RequestCtx ) {
2022-08-13 16:03:31 +00:00
log := log . With ( ) . Strs ( "Handler" , [ ] string { string ( ctx . Request . Host ( ) ) , string ( ctx . Request . Header . RequestURI ( ) ) } ) . Logger ( )
2021-12-05 13:45:17 +00:00
2022-06-14 18:35:11 +00:00
ctx . Response . Header . Set ( "Server" , "CodebergPages/" + version . Version )
2021-12-05 13:45:17 +00:00
// Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin
ctx . Response . Header . Set ( "Referrer-Policy" , "strict-origin-when-cross-origin" )
// Enable browser caching for up to 10 minutes
ctx . Response . Header . Set ( "Cache-Control" , "public, max-age=600" )
2021-12-03 02:44:21 +00:00
trimmedHost := utils . TrimHostPort ( ctx . Request . Host ( ) )
2021-12-05 13:45:17 +00:00
// Add HSTS for RawDomain and MainDomainSuffix
if hsts := GetHSTSHeader ( trimmedHost , mainDomainSuffix , rawDomain ) ; hsts != "" {
ctx . Response . Header . Set ( "Strict-Transport-Security" , hsts )
}
// Block all methods not required for static pages
if ! ctx . IsGet ( ) && ! ctx . IsHead ( ) && ! ctx . IsOptions ( ) {
ctx . Response . Header . Set ( "Allow" , "GET, HEAD, OPTIONS" )
ctx . Error ( "Method not allowed" , fasthttp . StatusMethodNotAllowed )
return
}
// Block blacklisted paths (like ACME challenges)
for _ , blacklistedPath := range blacklistedPaths {
if bytes . HasPrefix ( ctx . Path ( ) , blacklistedPath ) {
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusForbidden )
2021-12-05 13:45:17 +00:00
return
}
}
// Allow CORS for specified domains
2022-04-10 16:11:00 +00:00
allowCors := false
for _ , allowedCorsDomain := range allowedCorsDomains {
if bytes . Equal ( trimmedHost , allowedCorsDomain ) {
allowCors = true
break
2021-12-05 13:45:17 +00:00
}
2022-04-10 16:11:00 +00:00
}
if allowCors {
ctx . Response . Header . Set ( "Access-Control-Allow-Origin" , "*" )
ctx . Response . Header . Set ( "Access-Control-Allow-Methods" , "GET, HEAD" )
}
ctx . Response . Header . Set ( "Allow" , "GET, HEAD, OPTIONS" )
if ctx . IsOptions ( ) {
2021-12-05 13:45:17 +00:00
ctx . Response . Header . SetStatusCode ( fasthttp . StatusNoContent )
return
}
// Prepare request information to Gitea
var targetOwner , targetRepo , targetBranch , targetPath string
2022-03-27 19:54:06 +00:00
targetOptions := & upstream . Options {
2022-06-11 21:02:06 +00:00
TryIndexPages : true ,
2021-12-05 13:45:17 +00:00
}
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will
// also disallow search indexing and add a Link header to the canonical URL.
2022-06-11 21:02:06 +00:00
tryBranch := func ( log zerolog . Logger , repo , branch string , path [ ] string , canonicalLink string ) bool {
2021-12-05 13:45:17 +00:00
if repo == "" {
2022-09-13 21:06:31 +00:00
log . Warn ( ) . Msg ( "tryBranch: repo is empty" )
2021-12-05 13:45:17 +00:00
return false
}
2022-07-08 11:39:24 +00:00
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
branch = strings . ReplaceAll ( branch , "~" , "/" )
2021-12-05 13:45:17 +00:00
// Check if the branch exists, otherwise treat it as a file path
2022-06-11 21:02:06 +00:00
branchTimestampResult := upstream . GetBranchTimestamp ( giteaClient , targetOwner , repo , branch , branchTimestampCache )
2021-12-05 13:45:17 +00:00
if branchTimestampResult == nil {
2022-09-13 21:06:31 +00:00
log . Warn ( ) . Msg ( "tryBranch: branch doesn't exist" )
2021-12-05 13:45:17 +00:00
return false
}
// Branch exists, use it
targetRepo = repo
targetPath = strings . Trim ( strings . Join ( path , "/" ) , "/" )
2021-12-05 13:47:33 +00:00
targetBranch = branchTimestampResult . Branch
2021-12-05 13:45:17 +00:00
2021-12-05 13:47:33 +00:00
targetOptions . BranchTimestamp = branchTimestampResult . Timestamp
2021-12-05 13:45:17 +00:00
if canonicalLink != "" {
// Hide from search machines & add canonical link
ctx . Response . Header . Set ( "X-Robots-Tag" , "noarchive, noindex" )
ctx . Response . Header . Set ( "Link" ,
strings . NewReplacer ( "%b" , targetBranch , "%p" , targetPath ) . Replace ( canonicalLink ) +
"; rel=\"canonical\"" ,
)
}
2022-06-11 21:02:06 +00:00
log . Debug ( ) . Msg ( "tryBranch: true" )
2021-12-05 13:45:17 +00:00
return true
}
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Preparing" )
2021-12-05 13:45:17 +00:00
if rawDomain != nil && bytes . Equal ( trimmedHost , rawDomain ) {
// Serve raw content from RawDomain
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Serving raw domain" )
2021-12-05 13:45:17 +00:00
targetOptions . TryIndexPages = false
2022-06-11 21:02:06 +00:00
if targetOptions . ForbiddenMimeTypes == nil {
targetOptions . ForbiddenMimeTypes = make ( map [ string ] bool )
}
targetOptions . ForbiddenMimeTypes [ "text/html" ] = true
2021-12-05 13:45:17 +00:00
targetOptions . DefaultMimeType = "text/plain; charset=utf-8"
pathElements := strings . Split ( string ( bytes . Trim ( ctx . Request . URI ( ) . Path ( ) , "/" ) ) , "/" )
if len ( pathElements ) < 2 {
// https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required
ctx . Redirect ( rawInfoPage , fasthttp . StatusTemporaryRedirect )
return
}
targetOwner = pathElements [ 0 ]
targetRepo = pathElements [ 1 ]
// raw.codeberg.org/example/myrepo/@main/index.html
if len ( pathElements ) > 2 && strings . HasPrefix ( pathElements [ 2 ] , "@" ) {
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Preparing raw domain, now trying with specified branch" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
targetRepo , pathElements [ 2 ] [ 1 : ] , pathElements [ 3 : ] ,
2021-12-03 02:05:38 +00:00
giteaRoot + "/" + targetOwner + "/" + targetRepo + "/src/branch/%b/%p" ,
2021-12-05 13:45:17 +00:00
) {
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 1" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
2021-12-05 13:45:17 +00:00
return
}
2022-08-12 03:06:26 +00:00
log . Warn ( ) . Msg ( "Path missed a branch" )
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
return
}
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Preparing raw domain, now trying with default branch" )
2022-06-11 21:02:06 +00:00
tryBranch ( log ,
targetRepo , "" , pathElements [ 2 : ] ,
2021-12-05 18:53:23 +00:00
giteaRoot + "/" + targetOwner + "/" + targetRepo + "/src/branch/%b/%p" ,
)
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 2" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 18:53:23 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
return
2021-12-05 13:45:17 +00:00
} else if bytes . HasSuffix ( trimmedHost , mainDomainSuffix ) {
// Serve pages from subdomains of MainDomainSuffix
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "Serve pages from main domain suffix" )
2021-12-05 13:45:17 +00:00
pathElements := strings . Split ( string ( bytes . Trim ( ctx . Request . URI ( ) . Path ( ) , "/" ) ) , "/" )
targetOwner = string ( bytes . TrimSuffix ( trimmedHost , mainDomainSuffix ) )
targetRepo = pathElements [ 0 ]
targetPath = strings . Trim ( strings . Join ( pathElements [ 1 : ] , "/" ) , "/" )
if targetOwner == "www" {
2021-12-05 18:53:23 +00:00
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
2021-12-05 13:45:17 +00:00
ctx . Redirect ( "https://" + string ( mainDomainSuffix [ 1 : ] ) + string ( ctx . Path ( ) ) , fasthttp . StatusPermanentRedirect )
return
}
// Check if the first directory is a repo with the second directory as a branch
// example.codeberg.page/myrepo/@main/index.html
if len ( pathElements ) > 1 && strings . HasPrefix ( pathElements [ 1 ] , "@" ) {
if targetRepo == "pages" {
// example.codeberg.org/pages/@... redirects to example.codeberg.org/@...
ctx . Redirect ( "/" + strings . Join ( pathElements [ 1 : ] , "/" ) , fasthttp . StatusTemporaryRedirect )
return
}
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Preparing main domain, now trying with specified repo & branch" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
pathElements [ 0 ] , pathElements [ 1 ] [ 1 : ] , pathElements [ 2 : ] ,
2021-12-05 13:45:17 +00:00
"/" + pathElements [ 0 ] + "/%p" ,
) {
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 3" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
2021-12-05 13:45:17 +00:00
} else {
2022-08-12 03:06:26 +00:00
log . Warn ( ) . Msg ( "tryBranch: upstream 3 failed" )
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
}
return
}
// Check if the first directory is a branch for the "pages" repo
// example.codeberg.page/@main/index.html
if strings . HasPrefix ( pathElements [ 0 ] , "@" ) {
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Preparing main domain, now trying with specified branch" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
"pages" , pathElements [ 0 ] [ 1 : ] , pathElements [ 1 : ] , "/%p" ) {
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 4" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
2021-12-05 13:45:17 +00:00
} else {
2022-08-12 03:06:26 +00:00
log . Warn ( ) . Msg ( "tryBranch: upstream 4 failed" )
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
}
return
}
// Check if the first directory is a repo with a "pages" branch
// example.codeberg.page/myrepo/index.html
// example.codeberg.page/pages/... is not allowed here.
log . Debug ( ) . Msg ( "main domain preparations, now trying with specified repo" )
2022-06-11 21:02:06 +00:00
if pathElements [ 0 ] != "pages" && tryBranch ( log ,
pathElements [ 0 ] , "pages" , pathElements [ 1 : ] , "" ) {
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 5" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
2021-12-05 13:45:17 +00:00
return
}
// Try to use the "pages" repo on its default branch
// example.codeberg.page/index.html
log . Debug ( ) . Msg ( "main domain preparations, now trying with default repo/branch" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
"pages" , "" , pathElements , "" ) {
2022-08-12 03:06:26 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 6" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
2021-12-05 13:45:17 +00:00
return
}
// Couldn't find a valid repo/branch
2022-08-12 03:06:26 +00:00
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
return
} else {
trimmedHostStr := string ( trimmedHost )
// Serve pages from external domains
2021-12-05 14:21:05 +00:00
targetOwner , targetRepo , targetBranch = dns . GetTargetFromDNS ( trimmedHostStr , string ( mainDomainSuffix ) , dnsLookupCache )
2021-12-05 13:45:17 +00:00
if targetOwner == "" {
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
2021-12-05 13:45:17 +00:00
return
}
pathElements := strings . Split ( string ( bytes . Trim ( ctx . Request . URI ( ) . Path ( ) , "/" ) ) , "/" )
canonicalLink := ""
if strings . HasPrefix ( pathElements [ 0 ] , "@" ) {
targetBranch = pathElements [ 0 ] [ 1 : ]
pathElements = pathElements [ 1 : ]
canonicalLink = "/%p"
}
// Try to use the given repo on the given branch or the default branch
2022-08-12 03:06:26 +00:00
log . Debug ( ) . Msg ( "Preparing custom domain, now trying with details from DNS" )
2022-06-11 21:02:06 +00:00
if tryBranch ( log ,
targetRepo , targetBranch , pathElements , canonicalLink ) {
canonicalDomain , valid := upstream . CheckCanonicalDomain ( giteaClient , targetOwner , targetRepo , targetBranch , trimmedHostStr , string ( mainDomainSuffix ) , canonicalDomainCache )
2021-12-05 13:45:17 +00:00
if ! valid {
2022-08-12 03:06:26 +00:00
log . Warn ( ) . Msg ( "Custom domains, domain from DNS isn't valid/canonical" )
2021-12-05 13:47:33 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusMisdirectedRequest )
2021-12-05 13:45:17 +00:00
return
} else if canonicalDomain != trimmedHostStr {
// only redirect if the target is also a codeberg page!
2021-12-05 14:21:05 +00:00
targetOwner , _ , _ = dns . GetTargetFromDNS ( strings . SplitN ( canonicalDomain , "/" , 2 ) [ 0 ] , string ( mainDomainSuffix ) , dnsLookupCache )
2021-12-05 13:45:17 +00:00
if targetOwner != "" {
ctx . Redirect ( "https://" + canonicalDomain + string ( ctx . RequestURI ( ) ) , fasthttp . StatusTemporaryRedirect )
return
}
2021-12-05 18:53:23 +00:00
2022-08-12 03:06:26 +00:00
log . Warn ( ) . Msg ( "Custom domains, targetOwner from DNS is empty" )
2021-12-05 18:53:23 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
return
2021-12-05 13:45:17 +00:00
}
2022-09-18 14:13:27 +00:00
log . Info ( ) . Msg ( "tryBranch, now trying upstream 7" )
2022-06-11 21:02:06 +00:00
tryUpstream ( ctx , giteaClient , mainDomainSuffix , trimmedHost ,
2021-12-05 17:17:28 +00:00
targetOptions , targetOwner , targetRepo , targetBranch , targetPath ,
canonicalDomainCache , branchTimestampCache , fileResponseCache )
2021-12-05 13:45:17 +00:00
return
}
2021-12-05 18:53:23 +00:00
2022-08-12 03:06:26 +00:00
log . Warn ( ) . Msg ( "Couldn't handle request, none of the options succeed" )
2021-12-05 18:53:23 +00:00
html . ReturnErrorPage ( ctx , fasthttp . StatusFailedDependency )
return
2021-12-05 13:45:17 +00:00
}
}
}