pages-server/server/gitea/cache.go
ltdk 5b120f0488 Implement static serving of compressed files (#387)
This provides an option for #223 without fully resolving it. (I think.)

Essentially, it acts very similar to the `gzip_static` and similar options for nginx, where it will check for the existence of pre-compressed files and serve those instead if the client allows it. I couldn't find a pre-existing way to actually parse the Accept-Encoding header properly (admittedly didn't look very hard) and just implemented one on my own that should be fine.

This should hopefully not have the same DOS vulnerabilities as #302, since it relies on the existing caching system. Compressed versions of files will be cached just like any other files, and that includes cache for missing files as well.

The compressed files will also be accessible directly, and this won't automatically decompress them. So, if you have a `tar.gz` file that you access directly, it will still be downloaded as the gzipped version, although you will now gain the option to download the `.tar` directly and decompress it in transit. (Which doesn't affect the server at all, just the client's way of interpreting it.)

----

One key thing this change also adds is a short-circuit when accessing directories: these always return 404 via the API, although they'd try the cache anyway and go through that route, which was kind of slow. Adding in the additional encodings, it's going to try for .gz, .br, and .zst files in the worst case as well, which feels wrong. So, instead, it just always falls back to the index-check behaviour if the path ends in a slash or is empty. (Which is implicitly just a slash.)

----

For testing, I set up this repo: https://codeberg.org/clarfonthey/testrepo

I ended up realising that LFS wasn't supported by default with `just dev`, so, it ended up working until I made sure the files on the repo *didn't* use LFS.

Assuming you've run `just dev`, you can go directly to this page in the browser here: https://clarfonthey.localhost.mock.directory:4430/testrepo/
And also you can try a few cURL commands:

```shell
curl https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure
curl -H 'Accept-Encoding: gz' https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure | gunzip -
curl -H 'Accept-Encoding: br' https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure | brotli --decompress -
curl -H 'Accept-Encoding: zst' https://clarfonthey.localhost.mock.directory:4430/testrepo/ --verbose --insecure | zstd --decompress -
```

Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/387
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: ltdk <usr@ltdk.xyz>
Co-committed-by: ltdk <usr@ltdk.xyz>
2024-09-29 21:00:54 +00:00

137 lines
3.6 KiB
Go

package gitea
import (
"bytes"
"fmt"
"io"
"net/http"
"time"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/server/cache"
)
const (
// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long.
defaultBranchCacheTimeout = 15 * time.Minute
// branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter
// than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be
// picked up faster, while still allowing the content to be cached longer if nothing changes.
branchExistenceCacheTimeout = 5 * time.Minute
// fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending
// on your available memory.
// TODO: move as option into cache interface
fileCacheTimeout = 5 * time.Minute
// ownerExistenceCacheTimeout specifies the timeout for the existence of a repo/org
ownerExistenceCacheTimeout = 5 * time.Minute
// fileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default.
fileCacheSizeLimit = int64(1000 * 1000)
)
type FileResponse struct {
Exists bool
IsSymlink bool
ETag string
// uncompressed MIME type
MimeType string
// raw MIME type (if compressed, type of compression)
RawMime string
Body []byte
}
func (f FileResponse) IsEmpty() bool {
return len(f.Body) == 0
}
func (f FileResponse) createHttpResponse(cacheKey string, decompress bool) (header http.Header, statusCode int) {
header = make(http.Header)
if f.Exists {
statusCode = http.StatusOK
} else {
statusCode = http.StatusNotFound
}
if f.IsSymlink {
header.Set(giteaObjectTypeHeader, objTypeSymlink)
}
header.Set(ETagHeader, f.ETag)
if decompress {
header.Set(ContentTypeHeader, f.MimeType)
} else {
header.Set(ContentTypeHeader, f.RawMime)
}
header.Set(ContentLengthHeader, fmt.Sprintf("%d", len(f.Body)))
header.Set(PagesCacheIndicatorHeader, "true")
log.Trace().Msgf("fileCache for %q used", cacheKey)
return header, statusCode
}
type BranchTimestamp struct {
Branch string
Timestamp time.Time
notFound bool
}
type writeCacheReader struct {
originalReader io.ReadCloser
buffer *bytes.Buffer
fileResponse *FileResponse
cacheKey string
cache cache.ICache
hasError bool
}
func (t *writeCacheReader) Read(p []byte) (n int, err error) {
log.Trace().Msgf("[cache] read %q", t.cacheKey)
n, err = t.originalReader.Read(p)
if err != nil && err != io.EOF {
log.Trace().Err(err).Msgf("[cache] original reader for %q has returned an error", t.cacheKey)
t.hasError = true
} else if n > 0 {
_, _ = t.buffer.Write(p[:n])
}
return
}
func (t *writeCacheReader) Close() error {
doWrite := !t.hasError
fc := *t.fileResponse
fc.Body = t.buffer.Bytes()
if fc.IsEmpty() {
log.Trace().Msg("[cache] file response is empty")
doWrite = false
}
if doWrite {
err := t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
if err != nil {
log.Trace().Err(err).Msgf("[cache] writer for %q has returned an error", t.cacheKey)
}
}
log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, doWrite)
return t.originalReader.Close()
}
func (f FileResponse) CreateCacheReader(r io.ReadCloser, cache cache.ICache, cacheKey string) io.ReadCloser {
if r == nil || cache == nil || cacheKey == "" {
log.Error().Msg("could not create CacheReader")
return nil
}
return &writeCacheReader{
originalReader: r,
buffer: bytes.NewBuffer(make([]byte, 0)),
fileResponse: &f,
cache: cache,
cacheKey: cacheKey,
}
}