2022-06-11 21:02:06 +00:00
|
|
|
package gitea
|
|
|
|
|
|
|
|
import (
|
2022-11-12 19:37:20 +00:00
|
|
|
"bytes"
|
2022-06-11 21:02:06 +00:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
2022-11-12 19:37:20 +00:00
|
|
|
"io"
|
|
|
|
"mime"
|
|
|
|
"net/http"
|
2022-06-11 21:02:06 +00:00
|
|
|
"net/url"
|
2022-11-12 19:37:20 +00:00
|
|
|
"path"
|
|
|
|
"strconv"
|
2022-06-13 18:07:32 +00:00
|
|
|
"strings"
|
2022-06-11 21:02:06 +00:00
|
|
|
"time"
|
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
"code.gitea.io/sdk/gitea"
|
2022-08-12 04:40:12 +00:00
|
|
|
"github.com/rs/zerolog/log"
|
2022-11-12 19:37:20 +00:00
|
|
|
|
2024-02-15 16:08:29 +00:00
|
|
|
"codeberg.org/codeberg/pages/config"
|
2022-11-12 19:37:20 +00:00
|
|
|
"codeberg.org/codeberg/pages/server/cache"
|
2023-11-15 15:25:14 +00:00
|
|
|
"codeberg.org/codeberg/pages/server/version"
|
2022-06-11 21:02:06 +00:00
|
|
|
)
|
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
var ErrorNotFound = errors.New("not found")
|
|
|
|
|
2022-08-12 04:40:12 +00:00
|
|
|
const (
|
2023-11-15 15:25:14 +00:00
|
|
|
// cache key prefixes
|
2022-11-12 19:37:20 +00:00
|
|
|
branchTimestampCacheKeyPrefix = "branchTime"
|
|
|
|
defaultBranchCacheKeyPrefix = "defaultBranch"
|
|
|
|
rawContentCacheKeyPrefix = "rawContent"
|
|
|
|
|
|
|
|
// pages server
|
|
|
|
PagesCacheIndicatorHeader = "X-Pages-Cache"
|
|
|
|
symlinkReadLimit = 10000
|
|
|
|
|
|
|
|
// gitea
|
2022-08-12 04:40:12 +00:00
|
|
|
giteaObjectTypeHeader = "X-Gitea-Object-Type"
|
2022-11-12 19:37:20 +00:00
|
|
|
objTypeSymlink = "symlink"
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
// std
|
|
|
|
ETagHeader = "ETag"
|
|
|
|
ContentTypeHeader = "Content-Type"
|
|
|
|
ContentLengthHeader = "Content-Length"
|
|
|
|
)
|
2022-06-11 21:02:06 +00:00
|
|
|
|
|
|
|
type Client struct {
|
2022-11-12 19:37:20 +00:00
|
|
|
sdkClient *gitea.Client
|
2024-02-15 16:08:29 +00:00
|
|
|
responseCache cache.ICache
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2022-11-12 19:43:44 +00:00
|
|
|
giteaRoot string
|
|
|
|
|
2022-08-12 04:40:12 +00:00
|
|
|
followSymlinks bool
|
|
|
|
supportLFS bool
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
forbiddenMimeTypes map[string]bool
|
|
|
|
defaultMimeType string
|
2022-06-13 18:07:32 +00:00
|
|
|
}
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2024-02-15 16:08:29 +00:00
|
|
|
func NewClient(cfg config.GiteaConfig, respCache cache.ICache) (*Client, error) {
|
|
|
|
rootURL, err := url.Parse(cfg.Root)
|
2022-11-12 19:37:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-02-15 16:08:29 +00:00
|
|
|
giteaRoot := strings.Trim(rootURL.String(), "/")
|
2022-06-13 18:07:32 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
stdClient := http.Client{Timeout: 10 * time.Second}
|
|
|
|
|
2024-02-15 16:08:29 +00:00
|
|
|
forbiddenMimeTypes := make(map[string]bool, len(cfg.ForbiddenMimeTypes))
|
|
|
|
for _, mimeType := range cfg.ForbiddenMimeTypes {
|
|
|
|
forbiddenMimeTypes[mimeType] = true
|
2022-11-12 19:37:20 +00:00
|
|
|
}
|
2024-02-15 16:08:29 +00:00
|
|
|
|
|
|
|
defaultMimeType := cfg.DefaultMimeType
|
2022-11-12 19:37:20 +00:00
|
|
|
if defaultMimeType == "" {
|
|
|
|
defaultMimeType = "application/octet-stream"
|
|
|
|
}
|
|
|
|
|
2023-11-15 15:25:14 +00:00
|
|
|
sdk, err := gitea.NewClient(
|
|
|
|
giteaRoot,
|
|
|
|
gitea.SetHTTPClient(&stdClient),
|
2024-02-15 16:08:29 +00:00
|
|
|
gitea.SetToken(cfg.Token),
|
2023-11-15 15:25:14 +00:00
|
|
|
gitea.SetUserAgent("pages-server/"+version.Version),
|
|
|
|
)
|
|
|
|
|
2022-06-11 21:02:06 +00:00
|
|
|
return &Client{
|
2022-11-12 19:37:20 +00:00
|
|
|
sdkClient: sdk,
|
|
|
|
responseCache: respCache,
|
2022-08-12 04:40:12 +00:00
|
|
|
|
2022-11-12 19:43:44 +00:00
|
|
|
giteaRoot: giteaRoot,
|
|
|
|
|
2024-02-15 16:08:29 +00:00
|
|
|
followSymlinks: cfg.FollowSymlinks,
|
|
|
|
supportLFS: cfg.LFSEnabled,
|
2022-11-12 19:37:20 +00:00
|
|
|
|
|
|
|
forbiddenMimeTypes: forbiddenMimeTypes,
|
|
|
|
defaultMimeType: defaultMimeType,
|
2022-06-13 18:07:32 +00:00
|
|
|
}, err
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:43:44 +00:00
|
|
|
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
|
|
|
|
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
|
|
|
|
}
|
|
|
|
|
2022-06-11 21:02:06 +00:00
|
|
|
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
|
2022-11-12 19:37:20 +00:00
|
|
|
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
|
2022-06-11 21:02:06 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-11-12 19:37:20 +00:00
|
|
|
defer reader.Close()
|
|
|
|
return io.ReadAll(reader)
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
|
|
|
|
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
|
|
|
|
log := log.With().Str("cache_key", cacheKey).Logger()
|
2024-02-26 22:21:42 +00:00
|
|
|
log.Trace().Msg("try file in cache")
|
2022-11-12 19:37:20 +00:00
|
|
|
// handle if cache entry exist
|
|
|
|
if cache, ok := client.responseCache.Get(cacheKey); ok {
|
|
|
|
cache := cache.(FileResponse)
|
|
|
|
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
|
2023-03-11 05:07:17 +00:00
|
|
|
// TODO: check against some timestamp mismatch?!?
|
2022-11-12 19:37:20 +00:00
|
|
|
if cache.Exists {
|
2024-02-26 22:21:42 +00:00
|
|
|
log.Debug().Msg("[cache] exists")
|
2022-11-12 19:37:20 +00:00
|
|
|
if cache.IsSymlink {
|
|
|
|
linkDest := string(cache.Body)
|
|
|
|
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
|
|
|
|
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
|
2024-02-26 22:21:42 +00:00
|
|
|
} else if !cache.IsEmpty() {
|
|
|
|
log.Debug().Msgf("[cache] return %d bytes", len(cache.Body))
|
2022-11-12 19:37:20 +00:00
|
|
|
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
|
2024-02-26 22:21:42 +00:00
|
|
|
} else if cache.IsEmpty() {
|
|
|
|
log.Debug().Msg("[cache] is empty")
|
2022-11-12 19:37:20 +00:00
|
|
|
}
|
|
|
|
}
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
2024-02-26 22:21:42 +00:00
|
|
|
log.Trace().Msg("file not in cache")
|
2022-11-12 19:37:20 +00:00
|
|
|
// not in cache, open reader via gitea api
|
|
|
|
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
|
|
|
|
if resp != nil {
|
|
|
|
switch resp.StatusCode {
|
|
|
|
case http.StatusOK:
|
|
|
|
// first handle symlinks
|
|
|
|
{
|
|
|
|
objType := resp.Header.Get(giteaObjectTypeHeader)
|
|
|
|
log.Trace().Msgf("server raw content object %q", objType)
|
|
|
|
if client.followSymlinks && objType == objTypeSymlink {
|
|
|
|
defer reader.Close()
|
|
|
|
// read limited chars for symlink
|
|
|
|
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit))
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, http.StatusInternalServerError, err
|
|
|
|
}
|
|
|
|
linkDest := strings.TrimSpace(string(linkDestBytes))
|
|
|
|
|
2023-03-11 05:07:17 +00:00
|
|
|
// handle relative links
|
|
|
|
// we first remove the link from the path, and make a relative join (resolve parent paths like "/../" too)
|
|
|
|
linkDest = path.Join(path.Dir(resource), linkDest)
|
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
// we store symlink not content to reduce duplicates in cache
|
2024-02-26 22:21:42 +00:00
|
|
|
fileResponse := FileResponse{
|
2022-11-12 19:37:20 +00:00
|
|
|
Exists: true,
|
|
|
|
IsSymlink: true,
|
|
|
|
Body: []byte(linkDest),
|
|
|
|
ETag: resp.Header.Get(ETagHeader),
|
2024-02-26 22:21:42 +00:00
|
|
|
}
|
|
|
|
log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body))
|
|
|
|
if err := client.responseCache.Set(cacheKey, fileResponse, fileCacheTimeout); err != nil {
|
2022-11-12 19:37:20 +00:00
|
|
|
log.Error().Err(err).Msg("[cache] error on cache write")
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest)
|
|
|
|
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// now we are sure it's content so set the MIME type
|
|
|
|
mimeType := client.getMimeTypeByExtension(resource)
|
|
|
|
resp.Response.Header.Set(ContentTypeHeader, mimeType)
|
2022-08-12 04:40:12 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
if !shouldRespBeSavedToCache(resp.Response) {
|
|
|
|
return reader, resp.Response.Header, resp.StatusCode, err
|
|
|
|
}
|
2022-08-12 04:40:12 +00:00
|
|
|
|
2023-11-15 15:25:14 +00:00
|
|
|
// now we write to cache and respond at the same time
|
2022-11-12 19:37:20 +00:00
|
|
|
fileResp := FileResponse{
|
|
|
|
Exists: true,
|
|
|
|
ETag: resp.Header.Get(ETagHeader),
|
|
|
|
MimeType: mimeType,
|
|
|
|
}
|
|
|
|
return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil
|
2022-08-12 04:40:12 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
case http.StatusNotFound:
|
|
|
|
if err := client.responseCache.Set(cacheKey, FileResponse{
|
|
|
|
Exists: false,
|
|
|
|
ETag: resp.Header.Get(ETagHeader),
|
|
|
|
}, fileCacheTimeout); err != nil {
|
|
|
|
log.Error().Err(err).Msg("[cache] error on cache write")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound
|
|
|
|
default:
|
|
|
|
return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
|
|
|
|
}
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
2022-11-12 19:37:20 +00:00
|
|
|
return nil, nil, http.StatusInternalServerError, err
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) {
|
|
|
|
cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName)
|
|
|
|
|
|
|
|
if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil {
|
|
|
|
branchTimeStamp := stamp.(*BranchTimestamp)
|
|
|
|
if branchTimeStamp.notFound {
|
|
|
|
log.Trace().Msgf("[cache] use branch %q not found", branchName)
|
|
|
|
return &BranchTimestamp{}, ErrorNotFound
|
|
|
|
}
|
|
|
|
log.Trace().Msgf("[cache] use branch %q exist", branchName)
|
|
|
|
return branchTimeStamp, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName)
|
2022-06-11 21:02:06 +00:00
|
|
|
if err != nil {
|
2022-11-12 19:37:20 +00:00
|
|
|
if resp != nil && resp.StatusCode == http.StatusNotFound {
|
|
|
|
log.Trace().Msgf("[cache] set cache branch %q not found", branchName)
|
|
|
|
if err := client.responseCache.Set(cacheKey, &BranchTimestamp{Branch: branchName, notFound: true}, branchExistenceCacheTimeout); err != nil {
|
|
|
|
log.Error().Err(err).Msg("[cache] error on cache write")
|
|
|
|
}
|
|
|
|
return &BranchTimestamp{}, ErrorNotFound
|
|
|
|
}
|
|
|
|
return &BranchTimestamp{}, err
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
2022-11-12 19:37:20 +00:00
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
2022-11-12 19:37:20 +00:00
|
|
|
|
|
|
|
stamp := &BranchTimestamp{
|
|
|
|
Branch: branch.Name,
|
|
|
|
Timestamp: branch.Commit.Timestamp,
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Trace().Msgf("set cache branch [%s] exist", branchName)
|
|
|
|
if err := client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout); err != nil {
|
|
|
|
log.Error().Err(err).Msg("[cache] error on cache write")
|
|
|
|
}
|
|
|
|
return stamp, nil
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) {
|
2022-11-12 19:37:20 +00:00
|
|
|
cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName)
|
|
|
|
|
|
|
|
if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil {
|
|
|
|
return branch.(string), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName)
|
2022-06-11 21:02:06 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2022-11-12 19:37:20 +00:00
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
2022-11-12 19:37:20 +00:00
|
|
|
|
|
|
|
branch := repo.DefaultBranch
|
|
|
|
if err := client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout); err != nil {
|
|
|
|
log.Error().Err(err).Msg("[cache] error on cache write")
|
|
|
|
}
|
|
|
|
return branch, nil
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
func (client *Client) getMimeTypeByExtension(resource string) string {
|
|
|
|
mimeType := mime.TypeByExtension(path.Ext(resource))
|
|
|
|
mimeTypeSplit := strings.SplitN(mimeType, ";", 2)
|
|
|
|
if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" {
|
|
|
|
mimeType = client.defaultMimeType
|
|
|
|
}
|
|
|
|
log.Trace().Msgf("probe mime of %q is %q", resource, mimeType)
|
|
|
|
return mimeType
|
|
|
|
}
|
|
|
|
|
|
|
|
func shouldRespBeSavedToCache(resp *http.Response) bool {
|
|
|
|
if resp == nil {
|
|
|
|
return false
|
|
|
|
}
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
contentLengthRaw := resp.Header.Get(ContentLengthHeader)
|
|
|
|
if contentLengthRaw == "" {
|
|
|
|
return false
|
|
|
|
}
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2023-11-15 15:25:14 +00:00
|
|
|
contentLength, err := strconv.ParseInt(contentLengthRaw, 10, 64)
|
2022-11-12 19:37:20 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("could not parse content length")
|
|
|
|
}
|
2022-06-11 21:02:06 +00:00
|
|
|
|
2022-11-12 19:37:20 +00:00
|
|
|
// if content to big or could not be determined we not cache it
|
2023-11-15 15:25:14 +00:00
|
|
|
return contentLength > 0 && contentLength < fileCacheSizeLimit
|
2022-06-11 21:02:06 +00:00
|
|
|
}
|