pages-server/server/gitea/client.go

285 lines
8.7 KiB
Go
Raw Normal View History

package gitea
import (
2022-09-18 19:02:55 +00:00
"bytes"
"errors"
2022-08-28 18:54:17 +00:00
"fmt"
"io"
2022-09-18 19:56:56 +00:00
"mime"
2022-08-28 18:54:17 +00:00
"net/http"
"net/url"
2022-09-18 19:56:56 +00:00
"path"
2022-09-18 19:02:55 +00:00
"strconv"
2022-08-28 18:54:17 +00:00
"strings"
"time"
"code.gitea.io/sdk/gitea"
"github.com/rs/zerolog/log"
"codeberg.org/codeberg/pages/server/cache"
)
var ErrorNotFound = errors.New("not found")
2022-07-27 13:39:46 +00:00
const (
2022-09-18 19:02:55 +00:00
// cache key prefixe
2022-07-27 13:39:46 +00:00
branchTimestampCacheKeyPrefix = "branchTime"
defaultBranchCacheKeyPrefix = "defaultBranch"
2022-09-18 19:02:55 +00:00
rawContentCacheKeyPrefix = "rawContent"
// pages server
2022-11-07 22:21:35 +00:00
PagesCacheIndicatorHeader = "X-Pages-Cache"
symlinkReadLimit = 10000
2022-09-18 19:02:55 +00:00
// gitea
giteaObjectTypeHeader = "X-Gitea-Object-Type"
objTypeSymlink = "symlink"
// std
2022-11-07 22:21:35 +00:00
ETagHeader = "ETag"
ContentTypeHeader = "Content-Type"
ContentLengthHeader = "Content-Length"
2022-07-27 13:39:46 +00:00
)
2022-08-28 18:54:17 +00:00
type Client struct {
sdkClient *gitea.Client
responseCache cache.SetGetKey
2022-11-12 01:54:56 +00:00
giteaRoot string
2022-08-28 18:54:17 +00:00
followSymlinks bool
supportLFS bool
2022-09-18 19:56:56 +00:00
forbiddenMimeTypes map[string]bool
defaultMimeType string
2022-08-28 18:54:17 +00:00
}
func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) {
rootURL, err := url.Parse(giteaRoot)
if err != nil {
return nil, err
}
giteaRoot = strings.Trim(rootURL.String(), "/")
stdClient := http.Client{Timeout: 10 * time.Second}
2022-09-18 19:56:56 +00:00
// TODO: pass down
var (
forbiddenMimeTypes map[string]bool
defaultMimeType string
)
if forbiddenMimeTypes == nil {
forbiddenMimeTypes = make(map[string]bool)
}
if defaultMimeType == "" {
defaultMimeType = "application/octet-stream"
}
2022-08-28 18:54:17 +00:00
sdk, err := gitea.NewClient(giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(giteaAPIToken))
return &Client{
2022-09-18 19:56:56 +00:00
sdkClient: sdk,
responseCache: respCache,
2022-11-12 01:54:56 +00:00
giteaRoot: giteaRoot,
2022-08-28 18:54:17 +00:00
followSymlinks: followSymlinks,
supportLFS: supportLFS,
2022-09-18 19:56:56 +00:00
forbiddenMimeTypes: forbiddenMimeTypes,
defaultMimeType: defaultMimeType,
2022-08-28 18:54:17 +00:00
}, err
}
2022-11-12 01:54:56 +00:00
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
}
2022-08-28 18:54:17 +00:00
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
2022-11-07 23:05:27 +00:00
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
2022-08-28 18:54:17 +00:00
if err != nil {
return nil, err
}
defer reader.Close()
return io.ReadAll(reader)
}
2022-11-07 23:05:27 +00:00
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
2022-09-18 19:02:55 +00:00
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
// handle if cache entry exist
2022-09-19 10:15:14 +00:00
if cache, ok := client.responseCache.Get(cacheKey); ok {
2022-09-18 19:02:55 +00:00
cache := cache.(FileResponse)
2022-11-07 23:15:09 +00:00
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
2022-09-18 19:02:55 +00:00
// TODO: check against some timestamp missmatch?!?
if cache.Exists {
if cache.IsSymlink {
linkDest := string(cache.Body)
2022-11-11 23:20:19 +00:00
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
2022-09-18 19:02:55 +00:00
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
} else {
log.Debug().Msg("[cache] return bytes")
2022-11-07 23:05:27 +00:00
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
2022-09-18 19:02:55 +00:00
}
} else {
2022-11-07 23:05:27 +00:00
return nil, cachedHeader, cachedStatusCode, ErrorNotFound
2022-09-18 19:02:55 +00:00
}
}
2022-11-11 23:27:40 +00:00
// not in cache, open reader via gitea api
2022-08-28 18:54:17 +00:00
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
if resp != nil {
switch resp.StatusCode {
case http.StatusOK:
2022-09-18 19:02:55 +00:00
// first handle symlinks
{
objType := resp.Header.Get(giteaObjectTypeHeader)
2022-11-11 23:20:19 +00:00
log.Trace().Msgf("server raw content object %q", objType)
2022-09-18 19:02:55 +00:00
if client.followSymlinks && objType == objTypeSymlink {
defer reader.Close()
// read limited chars for symlink
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit))
2022-09-18 19:02:55 +00:00
if err != nil {
2022-11-07 23:05:27 +00:00
return nil, nil, http.StatusInternalServerError, err
2022-09-18 19:02:55 +00:00
}
linkDest := strings.TrimSpace(string(linkDestBytes))
// we store symlink not content to reduce duplicates in cache
2022-09-18 19:02:55 +00:00
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: true,
IsSymlink: true,
Body: []byte(linkDest),
2022-11-07 22:21:35 +00:00
ETag: resp.Header.Get(ETagHeader),
2022-09-18 19:02:55 +00:00
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
2022-09-18 19:02:55 +00:00
}
2022-11-11 23:21:45 +00:00
log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest)
2022-09-18 19:02:55 +00:00
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
}
}
2022-08-28 18:54:17 +00:00
2022-11-11 23:32:32 +00:00
// now we are sure it's content so set the MIME type
mimeType := client.getMimeTypeByExtension(resource)
resp.Response.Header.Set(ContentTypeHeader, mimeType)
2022-09-18 19:56:56 +00:00
2022-11-11 23:32:32 +00:00
if !shouldRespBeSavedToCache(resp.Response) {
return reader, resp.Response.Header, resp.StatusCode, err
}
2022-08-28 18:54:17 +00:00
2022-11-11 23:32:32 +00:00
// now we write to cache and respond at the sime time
fileResp := FileResponse{
Exists: true,
ETag: resp.Header.Get(ETagHeader),
MimeType: mimeType,
2022-08-28 18:54:17 +00:00
}
2022-11-11 23:32:32 +00:00
return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil
2022-08-28 18:54:17 +00:00
case http.StatusNotFound:
2022-09-18 19:02:55 +00:00
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: false,
2022-11-07 22:21:35 +00:00
ETag: resp.Header.Get(ETagHeader),
2022-09-18 19:02:55 +00:00
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
2022-09-18 19:02:55 +00:00
}
2022-08-28 18:54:17 +00:00
2022-11-07 23:05:27 +00:00
return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound
2022-08-28 18:54:17 +00:00
default:
2022-11-07 23:05:27 +00:00
return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
2022-08-28 18:54:17 +00:00
}
}
2022-11-07 23:05:27 +00:00
return nil, nil, http.StatusInternalServerError, err
2022-08-28 18:54:17 +00:00
}
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) {
cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName)
if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil {
branchTimeStamp := stamp.(*BranchTimestamp)
if branchTimeStamp.notFound {
log.Trace().Msgf("[cache] use branch %q not found", branchName)
return &BranchTimestamp{}, ErrorNotFound
}
log.Trace().Msgf("[cache] use branch %q exist", branchName)
return branchTimeStamp, nil
2022-08-28 18:54:17 +00:00
}
branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName)
if err != nil {
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.Trace().Msgf("[cache] set cache branch %q not found", branchName)
if err := client.responseCache.Set(cacheKey, &BranchTimestamp{Branch: branchName, notFound: true}, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
2022-08-28 18:54:17 +00:00
return &BranchTimestamp{}, ErrorNotFound
}
return &BranchTimestamp{}, err
}
if resp.StatusCode != http.StatusOK {
return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
stamp := &BranchTimestamp{
Branch: branch.Name,
Timestamp: branch.Commit.Timestamp,
}
log.Trace().Msgf("set cache branch [%s] exist", branchName)
2022-08-28 18:54:17 +00:00
if err := client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
2022-08-28 18:54:17 +00:00
}
return stamp, nil
}
func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) {
cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName)
if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil {
return branch.(string), nil
}
repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName)
if err != nil {
return "", err
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
branch := repo.DefaultBranch
if err := client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
2022-08-28 18:54:17 +00:00
}
return branch, nil
}
2022-09-18 19:56:56 +00:00
func (client *Client) getMimeTypeByExtension(resource string) string {
mimeType := mime.TypeByExtension(path.Ext(resource))
mimeTypeSplit := strings.SplitN(mimeType, ";", 2)
if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" {
mimeType = client.defaultMimeType
}
2022-11-11 23:56:30 +00:00
log.Trace().Msgf("probe mime of %q is %q", resource, mimeType)
2022-09-18 19:56:56 +00:00
return mimeType
}
2022-09-18 20:13:50 +00:00
func shouldRespBeSavedToCache(resp *http.Response) bool {
if resp == nil {
return false
}
2022-11-11 23:56:30 +00:00
contentLengthRaw := resp.Header.Get(ContentLengthHeader)
if contentLengthRaw == "" {
2022-09-18 20:13:50 +00:00
return false
}
2022-11-11 23:56:30 +00:00
contentLeng, err := strconv.ParseInt(contentLengthRaw, 10, 64)
2022-09-18 20:13:50 +00:00
if err != nil {
log.Error().Err(err).Msg("could not parse content length")
}
// if content to big or could not be determined we not cache it
return contentLeng > 0 && contentLeng < fileCacheSizeLimit
}