switch to std http implementation instead of fasthttp (#106)

close #100
close #109
close #113
close #28
close #63

Reviewed-on: https://codeberg.org/Codeberg/pages-server/pulls/106
This commit is contained in:
6543 2022-11-12 20:37:20 +01:00
parent 69eabb248a
commit b9966487f6
28 changed files with 827 additions and 584 deletions

View file

@ -1,12 +1,116 @@
package gitea
import (
"bytes"
"fmt"
"io"
"net/http"
"time"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/server/cache"
)
const (
// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long.
defaultBranchCacheTimeout = 15 * time.Minute
// branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter
// than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be
// picked up faster, while still allowing the content to be cached longer if nothing changes.
branchExistenceCacheTimeout = 5 * time.Minute
// fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending
// on your available memory.
// TODO: move as option into cache interface
fileCacheTimeout = 5 * time.Minute
// fileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default.
fileCacheSizeLimit = int64(1000 * 1000)
)
type FileResponse struct {
Exists bool
ETag []byte
MimeType string
Body []byte
Exists bool
IsSymlink bool
ETag string
MimeType string
Body []byte
}
func (f FileResponse) IsEmpty() bool {
return len(f.Body) != 0
}
func (f FileResponse) createHttpResponse(cacheKey string) (http.Header, int) {
header := make(http.Header)
var statusCode int
if f.Exists {
statusCode = http.StatusOK
} else {
statusCode = http.StatusNotFound
}
if f.IsSymlink {
header.Set(giteaObjectTypeHeader, objTypeSymlink)
}
header.Set(ETagHeader, f.ETag)
header.Set(ContentTypeHeader, f.MimeType)
header.Set(ContentLengthHeader, fmt.Sprintf("%d", len(f.Body)))
header.Set(PagesCacheIndicatorHeader, "true")
log.Trace().Msgf("fileCache for %q used", cacheKey)
return header, statusCode
}
type BranchTimestamp struct {
Branch string
Timestamp time.Time
notFound bool
}
type writeCacheReader struct {
originalReader io.ReadCloser
buffer *bytes.Buffer
rileResponse *FileResponse
cacheKey string
cache cache.SetGetKey
hasError bool
}
func (t *writeCacheReader) Read(p []byte) (n int, err error) {
n, err = t.originalReader.Read(p)
if err != nil {
log.Trace().Err(err).Msgf("[cache] original reader for %q has returned an error", t.cacheKey)
t.hasError = true
} else if n > 0 {
_, _ = t.buffer.Write(p[:n])
}
return
}
func (t *writeCacheReader) Close() error {
if !t.hasError {
fc := *t.rileResponse
fc.Body = t.buffer.Bytes()
_ = t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
}
log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, !t.hasError)
return t.originalReader.Close()
}
func (f FileResponse) CreateCacheReader(r io.ReadCloser, cache cache.SetGetKey, cacheKey string) io.ReadCloser {
if r == nil || cache == nil || cacheKey == "" {
log.Error().Msg("could not create CacheReader")
return nil
}
return &writeCacheReader{
originalReader: r,
buffer: bytes.NewBuffer(make([]byte, 0)),
rileResponse: &f,
cache: cache,
cacheKey: cacheKey,
}
}

View file

@ -1,142 +1,276 @@
package gitea
import (
"bytes"
"errors"
"fmt"
"io"
"mime"
"net/http"
"net/url"
"path"
"strconv"
"strings"
"time"
"code.gitea.io/sdk/gitea"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
"github.com/valyala/fastjson"
)
const (
giteaAPIRepos = "/api/v1/repos/"
giteaObjectTypeHeader = "X-Gitea-Object-Type"
"codeberg.org/codeberg/pages/server/cache"
)
var ErrorNotFound = errors.New("not found")
const (
// cache key prefixe
branchTimestampCacheKeyPrefix = "branchTime"
defaultBranchCacheKeyPrefix = "defaultBranch"
rawContentCacheKeyPrefix = "rawContent"
// pages server
PagesCacheIndicatorHeader = "X-Pages-Cache"
symlinkReadLimit = 10000
// gitea
giteaObjectTypeHeader = "X-Gitea-Object-Type"
objTypeSymlink = "symlink"
// std
ETagHeader = "ETag"
ContentTypeHeader = "Content-Type"
ContentLengthHeader = "Content-Length"
)
type Client struct {
giteaRoot string
giteaAPIToken string
fastClient *fasthttp.Client
infoTimeout time.Duration
contentTimeout time.Duration
sdkClient *gitea.Client
responseCache cache.SetGetKey
followSymlinks bool
supportLFS bool
forbiddenMimeTypes map[string]bool
defaultMimeType string
}
// TODO: once golang v1.19 is min requirement, we can switch to 'JoinPath()' of 'net/url' package
func joinURL(baseURL string, paths ...string) string {
p := make([]string, 0, len(paths))
for i := range paths {
path := strings.TrimSpace(paths[i])
path = strings.Trim(path, "/")
if len(path) != 0 {
p = append(p, path)
}
}
return baseURL + "/" + strings.Join(p, "/")
}
func NewClient(giteaRoot, giteaAPIToken string, followSymlinks, supportLFS bool) (*Client, error) {
func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) {
rootURL, err := url.Parse(giteaRoot)
if err != nil {
return nil, err
}
giteaRoot = strings.Trim(rootURL.String(), "/")
stdClient := http.Client{Timeout: 10 * time.Second}
// TODO: pass down
var (
forbiddenMimeTypes map[string]bool
defaultMimeType string
)
if forbiddenMimeTypes == nil {
forbiddenMimeTypes = make(map[string]bool)
}
if defaultMimeType == "" {
defaultMimeType = "application/octet-stream"
}
sdk, err := gitea.NewClient(giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(giteaAPIToken))
return &Client{
giteaRoot: giteaRoot,
giteaAPIToken: giteaAPIToken,
infoTimeout: 5 * time.Second,
contentTimeout: 10 * time.Second,
fastClient: getFastHTTPClient(),
sdkClient: sdk,
responseCache: respCache,
followSymlinks: followSymlinks,
supportLFS: supportLFS,
forbiddenMimeTypes: forbiddenMimeTypes,
defaultMimeType: defaultMimeType,
}, err
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
resp, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
if err != nil {
return nil, err
}
return resp.Body(), nil
defer reader.Close()
return io.ReadAll(reader)
}
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (*fasthttp.Response, error) {
var apiURL string
if client.supportLFS {
apiURL = joinURL(client.giteaRoot, giteaAPIRepos, targetOwner, targetRepo, "media", resource+"?ref="+url.QueryEscape(ref))
} else {
apiURL = joinURL(client.giteaRoot, giteaAPIRepos, targetOwner, targetRepo, "raw", resource+"?ref="+url.QueryEscape(ref))
}
resp, err := client.do(client.contentTimeout, apiURL)
if err != nil {
return nil, err
}
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
if err != nil {
return nil, err
}
switch resp.StatusCode() {
case fasthttp.StatusOK:
objType := string(resp.Header.Peek(giteaObjectTypeHeader))
log.Trace().Msgf("server raw content object: %s", objType)
if client.followSymlinks && objType == "symlink" {
// TODO: limit to 1000 chars if we switched to std
linkDest := strings.TrimSpace(string(resp.Body()))
log.Debug().Msgf("follow symlink from '%s' to '%s'", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
// handle if cache entry exist
if cache, ok := client.responseCache.Get(cacheKey); ok {
cache := cache.(FileResponse)
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
// TODO: check against some timestamp missmatch?!?
if cache.Exists {
if cache.IsSymlink {
linkDest := string(cache.Body)
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
} else {
log.Debug().Msg("[cache] return bytes")
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
}
} else {
return nil, cachedHeader, cachedStatusCode, ErrorNotFound
}
return resp, nil
case fasthttp.StatusNotFound:
return nil, ErrorNotFound
default:
return nil, fmt.Errorf("unexpected status code '%d'", resp.StatusCode())
}
// not in cache, open reader via gitea api
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
if resp != nil {
switch resp.StatusCode {
case http.StatusOK:
// first handle symlinks
{
objType := resp.Header.Get(giteaObjectTypeHeader)
log.Trace().Msgf("server raw content object %q", objType)
if client.followSymlinks && objType == objTypeSymlink {
defer reader.Close()
// read limited chars for symlink
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit))
if err != nil {
return nil, nil, http.StatusInternalServerError, err
}
linkDest := strings.TrimSpace(string(linkDestBytes))
// we store symlink not content to reduce duplicates in cache
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: true,
IsSymlink: true,
Body: []byte(linkDest),
ETag: resp.Header.Get(ETagHeader),
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
}
}
// now we are sure it's content so set the MIME type
mimeType := client.getMimeTypeByExtension(resource)
resp.Response.Header.Set(ContentTypeHeader, mimeType)
if !shouldRespBeSavedToCache(resp.Response) {
return reader, resp.Response.Header, resp.StatusCode, err
}
// now we write to cache and respond at the sime time
fileResp := FileResponse{
Exists: true,
ETag: resp.Header.Get(ETagHeader),
MimeType: mimeType,
}
return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil
case http.StatusNotFound:
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: false,
ETag: resp.Header.Get(ETagHeader),
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound
default:
return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
}
return nil, nil, http.StatusInternalServerError, err
}
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (time.Time, error) {
url := joinURL(client.giteaRoot, giteaAPIRepos, repoOwner, repoName, "branches", branchName)
res, err := client.do(client.infoTimeout, url)
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) {
cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName)
if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil {
branchTimeStamp := stamp.(*BranchTimestamp)
if branchTimeStamp.notFound {
log.Trace().Msgf("[cache] use branch %q not found", branchName)
return &BranchTimestamp{}, ErrorNotFound
}
log.Trace().Msgf("[cache] use branch %q exist", branchName)
return branchTimeStamp, nil
}
branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName)
if err != nil {
return time.Time{}, err
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.Trace().Msgf("[cache] set cache branch %q not found", branchName)
if err := client.responseCache.Set(cacheKey, &BranchTimestamp{Branch: branchName, notFound: true}, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return &BranchTimestamp{}, ErrorNotFound
}
return &BranchTimestamp{}, err
}
if res.StatusCode() != fasthttp.StatusOK {
return time.Time{}, fmt.Errorf("unexpected status code '%d'", res.StatusCode())
if resp.StatusCode != http.StatusOK {
return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
return time.Parse(time.RFC3339, fastjson.GetString(res.Body(), "commit", "timestamp"))
stamp := &BranchTimestamp{
Branch: branch.Name,
Timestamp: branch.Commit.Timestamp,
}
log.Trace().Msgf("set cache branch [%s] exist", branchName)
if err := client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return stamp, nil
}
func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) {
url := joinURL(client.giteaRoot, giteaAPIRepos, repoOwner, repoName)
res, err := client.do(client.infoTimeout, url)
cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName)
if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil {
return branch.(string), nil
}
repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName)
if err != nil {
return "", err
}
if res.StatusCode() != fasthttp.StatusOK {
return "", fmt.Errorf("unexpected status code '%d'", res.StatusCode())
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
return fastjson.GetString(res.Body(), "default_branch"), nil
branch := repo.DefaultBranch
if err := client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return branch, nil
}
func (client *Client) do(timeout time.Duration, url string) (*fasthttp.Response, error) {
req := fasthttp.AcquireRequest()
req.SetRequestURI(url)
req.Header.Set(fasthttp.HeaderAuthorization, "token "+client.giteaAPIToken)
res := fasthttp.AcquireResponse()
err := client.fastClient.DoTimeout(req, res, timeout)
return res, err
func (client *Client) getMimeTypeByExtension(resource string) string {
mimeType := mime.TypeByExtension(path.Ext(resource))
mimeTypeSplit := strings.SplitN(mimeType, ";", 2)
if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" {
mimeType = client.defaultMimeType
}
log.Trace().Msgf("probe mime of %q is %q", resource, mimeType)
return mimeType
}
func shouldRespBeSavedToCache(resp *http.Response) bool {
if resp == nil {
return false
}
contentLengthRaw := resp.Header.Get(ContentLengthHeader)
if contentLengthRaw == "" {
return false
}
contentLeng, err := strconv.ParseInt(contentLengthRaw, 10, 64)
if err != nil {
log.Error().Err(err).Msg("could not parse content length")
}
// if content to big or could not be determined we not cache it
return contentLeng > 0 && contentLeng < fileCacheSizeLimit
}

View file

@ -1,23 +0,0 @@
package gitea
import (
"net/url"
"testing"
"github.com/stretchr/testify/assert"
)
func TestJoinURL(t *testing.T) {
baseURL := ""
assert.EqualValues(t, "/", joinURL(baseURL))
assert.EqualValues(t, "/", joinURL(baseURL, "", ""))
baseURL = "http://wwow.url.com"
assert.EqualValues(t, "http://wwow.url.com/a/b/c/d", joinURL(baseURL, "a", "b/c/", "d"))
baseURL = "http://wow.url.com/subpath/2"
assert.EqualValues(t, "http://wow.url.com/subpath/2/content.pdf", joinURL(baseURL, "/content.pdf"))
assert.EqualValues(t, "http://wow.url.com/subpath/2/wonderful.jpg", joinURL(baseURL, "wonderful.jpg"))
assert.EqualValues(t, "http://wow.url.com/subpath/2/raw/wonderful.jpg?ref=main", joinURL(baseURL, "raw", "wonderful.jpg"+"?ref="+url.QueryEscape("main")))
assert.EqualValues(t, "http://wow.url.com/subpath/2/raw/wonderful.jpg%3Fref=main", joinURL(baseURL, "raw", "wonderful.jpg%3Fref=main"))
}

View file

@ -1,15 +0,0 @@
package gitea
import (
"time"
"github.com/valyala/fasthttp"
)
func getFastHTTPClient() *fasthttp.Client {
return &fasthttp.Client{
MaxConnDuration: 60 * time.Second,
MaxConnWaitTimeout: 1000 * time.Millisecond,
MaxConnsPerHost: 128 * 16, // TODO: adjust bottlenecks for best performance with Gitea!
}
}