Initial redirects implementation (#148)

Adds basic support for `_redirects` files. It supports a subset of what IPFS supports: https://docs.ipfs.tech/how-to/websites-on-ipfs/redirects-and-custom-404s/

Example:
```
/redirect https://example.com/ 301
/another-redirect /page 301
/302  https://example.com/  302
/app/* /index.html 200
/articles/* /posts/:splat 301
```
301 redirect: https://video-prize-ranch.localhost.mock.directory:4430/redirect
SPA rewrite: https://video-prize-ranch.localhost.mock.directory:4430/app/path/path
Catch-all with splat: https://video-prize-ranch.localhost.mock.directory:4430/articles/path/path

Closes #46

Co-authored-by: video-prize-ranch <cb.8a3w5@simplelogin.co>
Co-authored-by: 6543 <6543@obermui.de>
Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/148
Reviewed-by: 6543 <6543@obermui.de>
Co-authored-by: video-prize-ranch <video-prize-ranch@noreply.codeberg.org>
Co-committed-by: video-prize-ranch <video-prize-ranch@noreply.codeberg.org>
This commit is contained in:
video-prize-ranch 2023-03-30 21:36:31 +00:00 committed by 6543
parent 970c13cf5c
commit 974229681f
11 changed files with 235 additions and 21 deletions

45
FEATURES.md Normal file
View file

@ -0,0 +1,45 @@
# Features
## Custom domains
...
## Redirects
Redirects can be created with a `_redirects` file with the following format:
```
# Comment
from to [status]
```
* Lines starting with `#` are ignored
* `from` - the path to redirect from (Note: repository and branch names are removed from request URLs)
* `to` - the path or URL to redirect to
* `status` - status code to use when redirecting (default 301)
### Status codes
* `200` - returns content from specified path (no external URLs) without changing the URL (rewrite)
* `301` - Moved Permanently (Permanent redirect)
* `302` - Found (Temporary redirect)
### Examples
#### SPA (single-page application) rewrite
Redirects all paths to `/index.html` for single-page apps.
```
/* /index.html 200
```
#### Splats
Redirects every path under `/articles` to `/posts` while keeping the path.
```
/articles/* /posts/:splat 302
```
Example: `/articles/2022/10/12/post-1/` -> `/posts/2022/10/12/post-1/`

View file

@ -81,6 +81,8 @@ func Serve(ctx *cli.Context) error {
canonicalDomainCache := cache.NewKeyValueCache()
// dnsLookupCache stores DNS lookups for custom domains
dnsLookupCache := cache.NewKeyValueCache()
// redirectsCache stores redirects in _redirects files
redirectsCache := cache.NewKeyValueCache()
// clientResponseCache stores responses from the Gitea server
clientResponseCache := cache.NewKeyValueCache()
@ -138,7 +140,7 @@ func Serve(ctx *cli.Context) error {
rawInfoPage,
BlacklistedPaths, allowedCorsDomains,
defaultBranches,
dnsLookupCache, canonicalDomainCache)
dnsLookupCache, canonicalDomainCache, redirectsCache)
// Start the ssl listener
log.Info().Msgf("Start SSL server using TCP listener on %s", listener.Addr())

View file

@ -151,6 +151,46 @@ func TestGetNotFound(t *testing.T) {
assert.EqualValues(t, 37, getSize(resp.Body))
}
func TestRedirect(t *testing.T) {
log.Println("=== TestRedirect ===")
// test redirects
resp, err := getTestHTTPSClient().Get("https://cb_pages_tests.localhost.mock.directory:4430/some_redirects/redirect")
assert.NoError(t, err)
if !assert.NotNil(t, resp) {
t.FailNow()
}
assert.EqualValues(t, http.StatusMovedPermanently, resp.StatusCode)
assert.EqualValues(t, "https://example.com/", resp.Header.Get("Location"))
}
func TestSPARedirect(t *testing.T) {
log.Println("=== TestSPARedirect ===")
// test SPA redirects
url := "https://cb_pages_tests.localhost.mock.directory:4430/some_redirects/app/aqdjw"
resp, err := getTestHTTPSClient().Get(url)
assert.NoError(t, err)
if !assert.NotNil(t, resp) {
t.FailNow()
}
assert.EqualValues(t, http.StatusOK, resp.StatusCode)
assert.EqualValues(t, url, resp.Request.URL.String())
assert.EqualValues(t, "text/html; charset=utf-8", resp.Header.Get("Content-Type"))
assert.EqualValues(t, "258", resp.Header.Get("Content-Length"))
assert.EqualValues(t, 258, getSize(resp.Body))
}
func TestSplatRedirect(t *testing.T) {
log.Println("=== TestSplatRedirect ===")
// test splat redirects
resp, err := getTestHTTPSClient().Get("https://cb_pages_tests.localhost.mock.directory:4430/some_redirects/articles/qfopefe")
assert.NoError(t, err)
if !assert.NotNil(t, resp) {
t.FailNow()
}
assert.EqualValues(t, http.StatusMovedPermanently, resp.StatusCode)
assert.EqualValues(t, "/posts/qfopefe", resp.Header.Get("Location"))
}
func TestFollowSymlink(t *testing.T) {
log.Printf("=== TestFollowSymlink ===\n")

View file

@ -25,7 +25,7 @@ func Handler(mainDomainSuffix, rawDomain string,
rawInfoPage string,
blacklistedPaths, allowedCorsDomains []string,
defaultPagesBranches []string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
dnsLookupCache, canonicalDomainCache, redirectsCache cache.SetGetKey,
) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
log := log.With().Strs("Handler", []string{req.Host, req.RequestURI}).Logger()
@ -93,7 +93,7 @@ func Handler(mainDomainSuffix, rawDomain string,
mainDomainSuffix, rawInfoPage,
trimmedHost,
pathElements,
canonicalDomainCache)
canonicalDomainCache, redirectsCache)
} else if strings.HasSuffix(trimmedHost, mainDomainSuffix) {
log.Debug().Msg("subdomain request detecded")
handleSubDomain(log, ctx, giteaClient,
@ -101,7 +101,7 @@ func Handler(mainDomainSuffix, rawDomain string,
defaultPagesBranches,
trimmedHost,
pathElements,
canonicalDomainCache)
canonicalDomainCache, redirectsCache)
} else {
log.Debug().Msg("custom domain request detecded")
handleCustomDomain(log, ctx, giteaClient,
@ -109,7 +109,7 @@ func Handler(mainDomainSuffix, rawDomain string,
trimmedHost,
pathElements,
defaultPagesBranches[0],
dnsLookupCache, canonicalDomainCache)
dnsLookupCache, canonicalDomainCache, redirectsCache)
}
}
}

View file

@ -19,7 +19,7 @@ func handleCustomDomain(log zerolog.Logger, ctx *context.Context, giteaClient *g
trimmedHost string,
pathElements []string,
firstDefaultBranch string,
dnsLookupCache, canonicalDomainCache cache.SetGetKey,
dnsLookupCache, canonicalDomainCache, redirectsCache cache.SetGetKey,
) {
// Serve pages from custom domains
targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHost, mainDomainSuffix, firstDefaultBranch, dnsLookupCache)
@ -64,7 +64,7 @@ func handleCustomDomain(log zerolog.Logger, ctx *context.Context, giteaClient *g
}
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}

View file

@ -19,7 +19,7 @@ func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie
mainDomainSuffix, rawInfoPage string,
trimmedHost string,
pathElements []string,
canonicalDomainCache cache.SetGetKey,
canonicalDomainCache, redirectsCache cache.SetGetKey,
) {
// Serve raw content from RawDomain
log.Debug().Msg("raw domain")
@ -41,7 +41,7 @@ func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie
TargetPath: path.Join(pathElements[3:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
log.Debug().Msg("missing branch info")
@ -58,7 +58,7 @@ func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with default branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo),

View file

@ -21,7 +21,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite
defaultPagesBranches []string,
trimmedHost string,
pathElements []string,
canonicalDomainCache cache.SetGetKey,
canonicalDomainCache, redirectsCache cache.SetGetKey,
) {
// Serve pages from subdomains of MainDomainSuffix
log.Debug().Msg("main domain suffix")
@ -53,7 +53,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve with specified repo and branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
@ -83,7 +83,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite
TargetPath: path.Join(pathElements[1:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve default pages repo with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo),
@ -106,7 +106,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite
TargetPath: path.Join(pathElements[1:]...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
}
@ -122,7 +122,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite
TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
}
@ -137,7 +137,7 @@ func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gite
TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache)
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}

View file

@ -21,6 +21,7 @@ func TestHandlerPerformance(t *testing.T) {
[]string{"pages"},
cache.NewKeyValueCache(),
cache.NewKeyValueCache(),
cache.NewKeyValueCache(),
)
testCase := func(uri string, status int) {

View file

@ -18,6 +18,7 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, trimmedHost string,
options *upstream.Options,
canonicalDomainCache cache.SetGetKey,
redirectsCache cache.SetGetKey,
) {
// check if a canonical domain exists on a request on MainDomain
if strings.HasSuffix(trimmedHost, mainDomainSuffix) && !options.ServeRaw {
@ -39,7 +40,7 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
options.Host = trimmedHost
// Try to request the file from the Gitea API
if !options.Upstream(ctx, giteaClient) {
if !options.Upstream(ctx, giteaClient, redirectsCache) {
html.ReturnErrorPage(ctx, "", ctx.StatusCode)
}
}

View file

@ -0,0 +1,117 @@
package upstream
import (
"strconv"
"strings"
"time"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
"github.com/rs/zerolog/log"
)
type Redirect struct {
From string
To string
StatusCode int
}
// redirectsCacheTimeout specifies the timeout for the redirects cache.
var redirectsCacheTimeout = 10 * time.Minute
const redirectsConfig = "_redirects"
// getRedirects returns redirects specified in the _redirects file.
func (o *Options) getRedirects(giteaClient *gitea.Client, redirectsCache cache.SetGetKey) []Redirect {
var redirects []Redirect
cacheKey := o.TargetOwner + "/" + o.TargetRepo + "/" + o.TargetBranch
// Check for cached redirects
if cachedValue, ok := redirectsCache.Get(cacheKey); ok {
redirects = cachedValue.([]Redirect)
} else {
// Get _redirects file and parse
body, err := giteaClient.GiteaRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, redirectsConfig)
if err == nil {
for _, line := range strings.Split(string(body), "\n") {
redirectArr := strings.Fields(line)
// Ignore comments and invalid lines
if strings.HasPrefix(line, "#") || len(redirectArr) < 2 {
continue
}
// Get redirect status code
statusCode := 301
if len(redirectArr) == 3 {
statusCode, err = strconv.Atoi(redirectArr[2])
if err != nil {
log.Info().Err(err).Msgf("could not read %s of %s/%s", redirectsConfig, o.TargetOwner, o.TargetRepo)
}
}
redirects = append(redirects, Redirect{
From: redirectArr[0],
To: redirectArr[1],
StatusCode: statusCode,
})
}
}
_ = redirectsCache.Set(cacheKey, redirects, redirectsCacheTimeout)
}
return redirects
}
func (o *Options) matchRedirects(ctx *context.Context, giteaClient *gitea.Client, redirects []Redirect, redirectsCache cache.SetGetKey) (final bool) {
if len(redirects) > 0 {
for _, redirect := range redirects {
reqUrl := ctx.Req.RequestURI
// remove repo and branch from request url
reqUrl = strings.TrimPrefix(reqUrl, "/"+o.TargetRepo)
reqUrl = strings.TrimPrefix(reqUrl, "/@"+o.TargetBranch)
// check if from url matches request url
if strings.TrimSuffix(redirect.From, "/") == strings.TrimSuffix(reqUrl, "/") {
// do rewrite if status code is 200
if redirect.StatusCode == 200 {
o.TargetPath = redirect.To
o.Upstream(ctx, giteaClient, redirectsCache)
return true
} else {
ctx.Redirect(redirect.To, redirect.StatusCode)
return true
}
}
// handle wildcard redirects
trimmedFromUrl := strings.TrimSuffix(redirect.From, "/*")
if strings.HasSuffix(redirect.From, "/*") && strings.HasPrefix(reqUrl, trimmedFromUrl) {
if strings.Contains(redirect.To, ":splat") {
splatUrl := strings.ReplaceAll(redirect.To, ":splat", strings.TrimPrefix(reqUrl, trimmedFromUrl))
// do rewrite if status code is 200
if redirect.StatusCode == 200 {
o.TargetPath = splatUrl
o.Upstream(ctx, giteaClient, redirectsCache)
return true
} else {
ctx.Redirect(splatUrl, redirect.StatusCode)
return true
}
} else {
// do rewrite if status code is 200
if redirect.StatusCode == 200 {
o.TargetPath = redirect.To
o.Upstream(ctx, giteaClient, redirectsCache)
return true
} else {
ctx.Redirect(redirect.To, redirect.StatusCode)
return true
}
}
}
}
}
return false
}

View file

@ -11,6 +11,7 @@ import (
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/html"
"codeberg.org/codeberg/pages/server/cache"
"codeberg.org/codeberg/pages/server/context"
"codeberg.org/codeberg/pages/server/gitea"
)
@ -52,7 +53,7 @@ type Options struct {
}
// Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (final bool) {
func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.SetGetKey) (final bool) {
log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
if o.TargetOwner == "" || o.TargetRepo == "" {
@ -103,6 +104,12 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
// Handle not found error
if err != nil && errors.Is(err, gitea.ErrorNotFound) {
// Get and match redirects
redirects := o.getRedirects(giteaClient, redirectsCache)
if o.matchRedirects(ctx, giteaClient, redirects, redirectsCache) {
return true
}
if o.TryIndexPages {
// copy the o struct & try if an index page exists
optionsForIndexPages := *o
@ -110,7 +117,7 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
optionsForIndexPages.appendTrailingSlash = true
for _, indexPage := range upstreamIndexPages {
optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage
if optionsForIndexPages.Upstream(ctx, giteaClient) {
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
@ -118,7 +125,7 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
optionsForIndexPages.appendTrailingSlash = false
optionsForIndexPages.redirectIfExists = strings.TrimSuffix(ctx.Path(), "/") + ".html"
optionsForIndexPages.TargetPath = o.TargetPath + ".html"
if optionsForIndexPages.Upstream(ctx, giteaClient) {
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
@ -131,11 +138,12 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin
optionsForNotFoundPages.appendTrailingSlash = false
for _, notFoundPage := range upstreamNotFoundPages {
optionsForNotFoundPages.TargetPath = "/" + notFoundPage
if optionsForNotFoundPages.Upstream(ctx, giteaClient) {
if optionsForNotFoundPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
}
return false
}