From 8dac935cd8a0ad9e6386c25b5d03569fd841c2cd Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Sun, 18 Sep 2022 21:02:55 +0200 Subject: [PATCH] wip: add fileCache back --- server/gitea/cache.go | 40 +++++++++--- server/gitea/client.go | 139 ++++++++++++++++++++++++++++------------- 2 files changed, 127 insertions(+), 52 deletions(-) diff --git a/server/gitea/cache.go b/server/gitea/cache.go index 12303c8..7d7343d 100644 --- a/server/gitea/cache.go +++ b/server/gitea/cache.go @@ -1,24 +1,48 @@ package gitea -import "time" +import ( + "net/http" + "time" +) type FileResponse struct { - Exists bool - ETag []byte - MimeType string - Body []byte + Exists bool + IsSymlink bool + ETag string + MimeType string + Body []byte } func (f FileResponse) IsEmpty() bool { return len(f.Body) != 0 } +func (f FileResponse) createHttpResponse() *http.Response { + resp := &http.Response{ + Header: make(http.Header), + } + + if f.Exists { + resp.StatusCode = http.StatusOK + } else { + resp.StatusCode = http.StatusNotFound + } + + if f.IsSymlink { + resp.Header.Set(giteaObjectTypeHeader, objTypeSymlink) + } + resp.Header.Set(eTagHeader, f.ETag) + resp.Header.Set(contentTypeHeader, f.MimeType) + + return resp +} + type BranchTimestamp struct { Branch string Timestamp time.Time } -var ( +const ( // defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long. defaultBranchCacheTimeout = 15 * time.Minute @@ -30,8 +54,8 @@ var ( // fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending // on your available memory. // TODO: move as option into cache interface - // fileCacheTimeout = 5 * time.Minute + fileCacheTimeout = 5 * time.Minute // fileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default. - // fileCacheSizeLimit = 1024 * 1024 + fileCacheSizeLimit = int64(1024 * 1024) ) diff --git a/server/gitea/client.go b/server/gitea/client.go index ee2f8b3..01d3f9e 100644 --- a/server/gitea/client.go +++ b/server/gitea/client.go @@ -1,11 +1,13 @@ package gitea import ( + "bytes" "errors" "fmt" "io" "net/http" "net/url" + "strconv" "strings" "time" @@ -18,9 +20,19 @@ import ( var ErrorNotFound = errors.New("not found") const ( + // cache key prefixe branchTimestampCacheKeyPrefix = "branchTime" defaultBranchCacheKeyPrefix = "defaultBranch" - giteaObjectTypeHeader = "X-Gitea-Object-Type" + rawContentCacheKeyPrefix = "rawContent" + + // gitea + giteaObjectTypeHeader = "X-Gitea-Object-Type" + objTypeSymlink = "symlink" + + // std + eTagHeader = "ETag" + contentTypeHeader = "Content-Type" + contentLengthHeader = "Content-Length" ) type Client struct { @@ -59,61 +71,100 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str } func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, *http.Response, error) { + cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource) + log := log.With().Str("cache_key", cacheKey).Logger() + + // handle if cache entry exist + if cache, ok := client.responseCache.Get(cacheKey); ok == true { + cache := cache.(FileResponse) + // TODO: check against some timestamp missmatch?!? + if cache.Exists { + if cache.IsSymlink { + linkDest := string(cache.Body) + log.Debug().Msgf("[cache] follow symlink from'%s' to '%s'", resource, linkDest) + return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) + } else { + log.Debug().Msg("[cache] return bytes") + return io.NopCloser(bytes.NewReader(cache.Body)), cache.createHttpResponse(), nil + } + } else { + return nil, cache.createHttpResponse(), ErrorNotFound + } + } + // if cachedValue, ok := fileResponseCache.Get(uri + "?timestamp=" + o.timestamp()); ok && !cachedValue.(gitea.FileResponse).IsEmpty() { // cachedResponse = cachedValue.(gitea.FileResponse) reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS) if resp != nil { switch resp.StatusCode { case http.StatusOK: + // first handle symlinks + { + objType := resp.Header.Get(giteaObjectTypeHeader) + log.Trace().Msgf("server raw content object: %s", objType) + if client.followSymlinks && objType == objTypeSymlink { + // limit to 1000 chars + defer reader.Close() + linkDestBytes, err := io.ReadAll(io.LimitReader(reader, 10000)) + if err != nil { + return nil, nil, err + } + linkDest := strings.TrimSpace(string(linkDestBytes)) - // add caching + if err := client.responseCache.Set(cacheKey, FileResponse{ + Exists: true, + IsSymlink: true, + Body: []byte(linkDest), + ETag: resp.Header.Get(eTagHeader), + }, fileCacheTimeout); err != nil { + log.Error().Err(err).Msg("could not save symlink in cache") + } - // Write the response body to the original request - // var cacheBodyWriter bytes.Buffer - // if res != nil { - // if res.Header.ContentLength() > fileCacheSizeLimit { - // // fasthttp else will set "Content-Length: 0" - // ctx.Response().SetBodyStream(&strings.Reader{}, -1) - // - // err = res.BodyWriteTo(ctx.Response.BodyWriter()) - // } else { - // // TODO: cache is half-empty if request is cancelled - does the ctx.Err() below do the trick? - // err = res.BodyWriteTo(io.MultiWriter(ctx.Response().BodyWriter(), &cacheBodyWriter)) - // } - // } else { - // _, err = ctx.Write(cachedResponse.Body) - // } - - // if res != nil && res.Header.ContentLength() <= fileCacheSizeLimit && ctx.Err() == nil { - // cachedResponse.Exists = true - // cachedResponse.MimeType = mimeType - // cachedResponse.Body = cacheBodyWriter.Bytes() - // _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), cachedResponse, fileCacheTimeout) - // } - // store ETag in resp !!!! - - objType := resp.Header.Get(giteaObjectTypeHeader) - log.Trace().Msgf("server raw content object: %s", objType) - if client.followSymlinks && objType == "symlink" { - // limit to 1000 chars - defer reader.Close() - linkDestBytes, err := io.ReadAll(io.LimitReader(reader, 10000)) - if err != nil { - return nil, nil, err + log.Debug().Msgf("follow symlink from '%s' to '%s'", resource, linkDest) + return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) } - linkDest := strings.TrimSpace(string(linkDestBytes)) - - log.Debug().Msgf("follow symlink from '%s' to '%s'", resource, linkDest) - return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) } - return reader, resp.Response, err - case http.StatusNotFound: + // now we are sure it's content + { + contentLeng, err2 := strconv.ParseInt(resp.Header.Get(contentLengthHeader), 20, 64) + if err2 != nil { + log.Error().Err(err2).Msg("could not parse content length") + } + if contentLeng <= 0 && contentLeng > fileCacheSizeLimit { + // if content to big or could not be determined we return now + return reader, resp.Response, err + } - // add not exist caching - // _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{ - // Exists: false, - // }, fileCacheTimeout) + // now we write to cache and respond at the sime time + + // TODO: at the sime time !!! + /* + we need a "go" + // TODO: cache is half-empty if request is cancelled - does the ctx.Err() below do the trick? + // err = res.BodyWriteTo(io.MultiWriter(ctx.Response().BodyWriter(), &cacheBodyWriter)) + */ + body, err := io.ReadAll(io.LimitReader(reader, contentLeng)) + if err != nil { + if err := client.responseCache.Set(cacheKey, FileResponse{ + Exists: true, + ETag: resp.Header.Get(eTagHeader), + MimeType: resp.Header.Get(contentTypeHeader), + Body: body, + }, fileCacheTimeout); err != nil { + log.Error().Err(err).Msg("could not save content in cache") + } + } + return io.NopCloser(bytes.NewReader(body)), resp.Response, nil + } + + case http.StatusNotFound: + if err := client.responseCache.Set(cacheKey, FileResponse{ + Exists: false, + ETag: resp.Header.Get(eTagHeader), + }, fileCacheTimeout); err != nil { + log.Error().Err(err).Msg("could not save 404 in cache") + } return nil, resp.Response, ErrorNotFound default: