package gitea import ( "bytes" "errors" "fmt" "io" "mime" "net/http" "net/url" "path" "strconv" "strings" "time" "code.gitea.io/sdk/gitea" "github.com/rs/zerolog/log" "codeberg.org/codeberg/pages/server/cache" ) var ErrorNotFound = errors.New("not found") const ( // cache key prefixe branchTimestampCacheKeyPrefix = "branchTime" defaultBranchCacheKeyPrefix = "defaultBranch" rawContentCacheKeyPrefix = "rawContent" // gitea giteaObjectTypeHeader = "X-Gitea-Object-Type" objTypeSymlink = "symlink" // std eTagHeader = "ETag" contentTypeHeader = "Content-Type" contentLengthHeader = "Content-Length" ) type Client struct { sdkClient *gitea.Client responseCache cache.SetGetKey followSymlinks bool supportLFS bool forbiddenMimeTypes map[string]bool defaultMimeType string } func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, followSymlinks, supportLFS bool) (*Client, error) { rootURL, err := url.Parse(giteaRoot) if err != nil { return nil, err } giteaRoot = strings.Trim(rootURL.String(), "/") stdClient := http.Client{Timeout: 10 * time.Second} // TODO: pass down var ( forbiddenMimeTypes map[string]bool defaultMimeType string ) if forbiddenMimeTypes == nil { forbiddenMimeTypes = make(map[string]bool) } if defaultMimeType == "" { defaultMimeType = "application/octet-stream" } sdk, err := gitea.NewClient(giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(giteaAPIToken)) return &Client{ sdkClient: sdk, responseCache: respCache, followSymlinks: followSymlinks, supportLFS: supportLFS, forbiddenMimeTypes: forbiddenMimeTypes, defaultMimeType: defaultMimeType, }, err } func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { reader, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource) if err != nil { return nil, err } defer reader.Close() return io.ReadAll(reader) } func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, *http.Response, error) { cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource) log := log.With().Str("cache_key", cacheKey).Logger() // handle if cache entry exist if cache, ok := client.responseCache.Get(cacheKey); ok { cache := cache.(FileResponse) // TODO: check against some timestamp missmatch?!? if cache.Exists { if cache.IsSymlink { linkDest := string(cache.Body) log.Debug().Msgf("[cache] follow symlink from'%s' to '%s'", resource, linkDest) return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) } else { log.Debug().Msg("[cache] return bytes") return io.NopCloser(bytes.NewReader(cache.Body)), cache.createHttpResponse(), nil } } else { return nil, cache.createHttpResponse(), ErrorNotFound } } // if cachedValue, ok := fileResponseCache.Get(uri + "?timestamp=" + o.timestamp()); ok && !cachedValue.(gitea.FileResponse).IsEmpty() { // cachedResponse = cachedValue.(gitea.FileResponse) reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS) if resp != nil { switch resp.StatusCode { case http.StatusOK: // first handle symlinks { objType := resp.Header.Get(giteaObjectTypeHeader) log.Trace().Msgf("server raw content object: %s", objType) if client.followSymlinks && objType == objTypeSymlink { // limit to 1000 chars defer reader.Close() linkDestBytes, err := io.ReadAll(io.LimitReader(reader, 10000)) if err != nil { return nil, nil, err } linkDest := strings.TrimSpace(string(linkDestBytes)) if err := client.responseCache.Set(cacheKey, FileResponse{ Exists: true, IsSymlink: true, Body: []byte(linkDest), ETag: resp.Header.Get(eTagHeader), }, fileCacheTimeout); err != nil { log.Error().Err(err).Msg("could not save symlink in cache") } log.Debug().Msgf("follow symlink from '%s' to '%s'", resource, linkDest) return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) } } // now we are sure it's content { // Set the MIME type mimeType := client.getMimeTypeByExtension(resource) resp.Response.Header.Set(contentTypeHeader, mimeType) if !shouldRespBeSavedToCache(resp.Response) { return reader, resp.Response, err } // now we write to cache and respond at the sime time fileResp := FileResponse{ Exists: true, ETag: resp.Header.Get(eTagHeader), MimeType: mimeType, } return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response, nil } case http.StatusNotFound: if err := client.responseCache.Set(cacheKey, FileResponse{ Exists: false, ETag: resp.Header.Get(eTagHeader), }, fileCacheTimeout); err != nil { log.Error().Err(err).Msg("could not save 404 in cache") } return nil, resp.Response, ErrorNotFound default: return nil, resp.Response, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } } return nil, nil, err } func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) { cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName) if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil { return stamp.(*BranchTimestamp), nil } branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName) if err != nil { if resp != nil && resp.StatusCode == http.StatusNotFound { return &BranchTimestamp{}, ErrorNotFound } return &BranchTimestamp{}, err } if resp.StatusCode != http.StatusOK { return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } stamp := &BranchTimestamp{ Branch: branch.Name, Timestamp: branch.Commit.Timestamp, } if err := client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout); err != nil { log.Error().Err(err).Msgf("error on store of repo branch timestamp [%s/%s@%s]", repoOwner, repoName, branchName) } return stamp, nil } func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) { cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName) if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil { return branch.(string), nil } repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName) if err != nil { return "", err } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } branch := repo.DefaultBranch if err := client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout); err != nil { log.Error().Err(err).Msgf("error on store of repo default branch [%s/%s]", repoOwner, repoName) } return branch, nil } func (client *Client) getMimeTypeByExtension(resource string) string { mimeType := mime.TypeByExtension(path.Ext(resource)) mimeTypeSplit := strings.SplitN(mimeType, ";", 2) if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" { mimeType = client.defaultMimeType } return mimeType } func shouldRespBeSavedToCache(resp *http.Response) bool { if resp == nil { return false } contentLengRaw := resp.Header.Get(contentLengthHeader) if contentLengRaw == "" { return false } contentLeng, err := strconv.ParseInt(contentLengRaw, 10, 64) if err != nil { log.Error().Err(err).Msg("could not parse content length") } // if content to big or could not be determined we not cache it return contentLeng > 0 && contentLeng < fileCacheSizeLimit }