From 0482d5eafc1236ee68c7ede8395b62ee3066b4cf Mon Sep 17 00:00:00 2001 From: "diana.strebkova@t-systems.com" Date: Mon, 27 Jan 2025 21:36:35 +0100 Subject: [PATCH 1/3] Added cleanup method for files in Maven snapshot versions --- custom/conf/app.example.ini | 3 + models/packages/package_file.go | 59 ++++++++++++++ models/packages/package_version.go | 11 ++- modules/packages/maven/metadata.go | 39 ++++++++++ modules/setting/packages.go | 10 ++- services/packages/cleanup/cleanup.go | 7 +- services/packages/maven/cleanup.go | 112 +++++++++++++++++++++++++++ 7 files changed, 233 insertions(+), 8 deletions(-) create mode 100644 services/packages/maven/cleanup.go diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index c10de9595393d..6a59200592da9 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2629,6 +2629,9 @@ LEVEL = Info ;LIMIT_SIZE_HELM = -1 ;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_MAVEN = -1 +;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive. +;; Cleanup expired packages/data then targets the files within all maven snapshots versions +;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1 ;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_NPM = -1 ;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) diff --git a/models/packages/package_file.go b/models/packages/package_file.go index 270cb32fdf6b5..ecda71385c970 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -5,11 +5,14 @@ package packages import ( "context" + "errors" + "fmt" "strconv" "strings" "time" "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -226,6 +229,62 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error) return db.Exist[PackageFile](ctx, opts.toConds()) } +// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber. +func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) { + if maxBuildNumber < 0 { + return nil, errors.New("maxBuildNumber must be a non-negative integer") + } + + files, err := GetFilesByVersionID(ctx, versionID) + if err != nil { + return nil, fmt.Errorf("failed to retrieve files: %w", err) + } + + var filteredFiles []*PackageFile + for _, file := range files { + buildNumber, err := extractBuildNumberFromFileName(file.Name) + if err != nil { + if err.Error() == "metadata file" { + continue + } + log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err) + continue + } + + if buildNumber <= maxBuildNumber { + filteredFiles = append(filteredFiles, file) + } + } + + log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber) + return filteredFiles, nil +} + +// extractBuildNumberFromFileName extracts the build number from the file name. +func extractBuildNumberFromFileName(filename string) (int, error) { + // Skip metadata files + if strings.Contains(filename, "maven-metadata.xml") { + return 0, errors.New("metadata file") + } + + // Split filename by hyphens to extract the build number + parts := strings.Split(filename, "-") + if len(parts) < 3 { + return 0, fmt.Errorf("invalid file name format: '%s'", filename) + } + + // Extract the last part before the extension + buildNumberWithExt := parts[len(parts)-1] + buildNumberStr := strings.Split(buildNumberWithExt, ".")[0] + + buildNumber, err := strconv.Atoi(buildNumberStr) + if err != nil { + return 0, fmt.Errorf("failed to convert build number to integer: '%s'", buildNumberStr) + } + + return buildNumber, nil +} + // CalculateFileSize sums up all blob sizes matching the search options. // It does NOT respect the deduplication of blobs. func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) { diff --git a/models/packages/package_version.go b/models/packages/package_version.go index 278e8e3a86b0e..b3248e3d10f9e 100644 --- a/models/packages/package_version.go +++ b/models/packages/package_version.go @@ -120,11 +120,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType // GetVersionsByPackageType gets all versions of a specific type func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) { - pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{ - OwnerID: ownerID, + opts := &PackageSearchOptions{ Type: packageType, IsInternal: optional.Some(false), - }) + } + + if ownerID != 0 { + opts.OwnerID = ownerID + } + + pvs, _, err := SearchVersions(ctx, opts) return pvs, err } diff --git a/modules/packages/maven/metadata.go b/modules/packages/maven/metadata.go index a61a62c086208..d903d4b394329 100644 --- a/modules/packages/maven/metadata.go +++ b/modules/packages/maven/metadata.go @@ -5,6 +5,7 @@ package maven import ( "encoding/xml" + "errors" "io" "code.gitea.io/gitea/modules/util" @@ -61,6 +62,27 @@ type pomStruct struct { } `xml:"dependencies>dependency"` } +type MavenMetadata struct { + XMLName xml.Name `xml:"metadata"` + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` + Versioning struct { + LastUpdated string `xml:"lastUpdated"` + Snapshot struct { + Timestamp string `xml:"timestamp"` + BuildNumber string `xml:"buildNumber"` + } `xml:"snapshot"` + SnapshotVersions []struct { + SnapshotVersion struct { + Extension string `xml:"extension"` + Value string `xml:"value"` + Updated string `xml:"updated"` + } `xml:"snapshotVersion"` + } `xml:"snapshotVersions>snapshotVersion"` + } `xml:"versioning"` +} + // ParsePackageMetaData parses the metadata of a pom file func ParsePackageMetaData(r io.Reader) (*Metadata, error) { var pom pomStruct @@ -109,3 +131,20 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) { Dependencies: dependencies, }, nil } + +// ParseMavenMetadata parses the Maven metadata XML to extract the build number. +func ParseMavenMetaData(r io.Reader) (string, error) { + var metadata MavenMetadata + + dec := xml.NewDecoder(r) + dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding. + if err := dec.Decode(&metadata); err != nil { + return "", err + } + + if metadata.Versioning.Snapshot.BuildNumber == "" { + return "", errors.New("no build number in snapshot metadata found") + } + + return metadata.Versioning.Snapshot.BuildNumber, nil +} diff --git a/modules/setting/packages.go b/modules/setting/packages.go index b598424064832..60c8f0e8b93c0 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -41,10 +41,12 @@ var ( LimitSizeSwift int64 LimitSizeVagrant int64 - DefaultRPMSignEnabled bool + DefaultRPMSignEnabled bool + RetainMavenSnapshotBuilds int }{ - Enabled: true, - LimitTotalOwnerCount: -1, + Enabled: true, + LimitTotalOwnerCount: -1, + RetainMavenSnapshotBuilds: -1, } ) @@ -87,7 +89,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeRubyGems = mustBytes(sec, "LIMIT_SIZE_RUBYGEMS") Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT") Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") - Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) + Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) return nil } diff --git a/services/packages/cleanup/cleanup.go b/services/packages/cleanup/cleanup.go index b7ba2b6ac4afc..7d70afbd804b3 100644 --- a/services/packages/cleanup/cleanup.go +++ b/services/packages/cleanup/cleanup.go @@ -1,7 +1,7 @@ // Copyright 2022 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package container +package cleanup import ( "context" @@ -20,6 +20,7 @@ import ( cargo_service "code.gitea.io/gitea/services/packages/cargo" container_service "code.gitea.io/gitea/services/packages/container" debian_service "code.gitea.io/gitea/services/packages/debian" + maven_service "code.gitea.io/gitea/services/packages/maven" rpm_service "code.gitea.io/gitea/services/packages/rpm" ) @@ -166,6 +167,10 @@ func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error return err } + if err := maven_service.CleanupSnapshotVersions(ctx); err != nil { + return err + } + ps, err := packages_model.FindUnreferencedPackages(ctx) if err != nil { return err diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go new file mode 100644 index 0000000000000..658dd73cab3e6 --- /dev/null +++ b/services/packages/maven/cleanup.go @@ -0,0 +1,112 @@ +package maven + +import ( + "context" + "fmt" + "strconv" + "strings" + + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/packages/maven" + "code.gitea.io/gitea/modules/setting" + packages_service "code.gitea.io/gitea/services/packages" +) + +// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages. +func CleanupSnapshotVersions(ctx context.Context) error { + retainBuilds := setting.Packages.RetainMavenSnapshotBuilds + log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds) + + if retainBuilds == -1 { + log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1") + return nil + } + + if retainBuilds < 1 { + return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) + } + + versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven) + if err != nil { + return fmt.Errorf("failed to retrieve Maven package versions: %w", err) + } + + for _, version := range versions { + log.Info("Processing version: %s (ID: %d)", version.Version, version.ID) + + if !isSnapshotVersion(version.Version) { + log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID) + continue + } + + if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil { + log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err) + return err + } + } + + log.Info("Completed CleanupSnapshotVersion") + return nil +} + +func isSnapshotVersion(version string) bool { + return strings.Contains(version, "-SNAPSHOT") +} + +func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error { + log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds) + + metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey) + if err != nil { + return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) + } + + maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile) + if err != nil { + return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) + } + + log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber) + + thresholdBuildNumber := maxBuildNumber - retainBuilds + if thresholdBuildNumber <= 0 { + log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) + return nil + } + + filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber) + if err != nil { + return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err) + } + + for _, file := range filesToRemove { + log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber) + if err := packages_service.DeletePackageFile(ctx, file); err != nil { + return fmt.Errorf("failed to delete file '%s': %w", file.Name, err) + } + } + + log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID) + return nil +} + +func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) { + content, _, _, err := packages_service.GetPackageFileStream(ctx, metadataFile) + if err != nil { + return 0, fmt.Errorf("failed to get package file stream: %w", err) + } + defer content.Close() + + buildNumberStr, err := maven.ParseMavenMetaData(content) + if err != nil { + return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err) + } + + buildNumber, err := strconv.Atoi(buildNumberStr) + if err != nil { + return 0, fmt.Errorf("invalid build number format: %w", err) + } + + return buildNumber, nil +} From e23b1fec1cffe38dae642d6b030e244137cde017 Mon Sep 17 00:00:00 2001 From: "diana.strebkova@t-systems.com" Date: Fri, 7 Mar 2025 11:32:16 +0100 Subject: [PATCH 2/3] Added fix when minio serve_direct is true --- models/packages/package_file.go | 2 +- modules/setting/packages.go | 1 + services/packages/maven/cleanup.go | 9 +++++++-- services/packages/packages.go | 8 +++++--- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/models/packages/package_file.go b/models/packages/package_file.go index ecda71385c970..ca7bd4023efca 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -279,7 +279,7 @@ func extractBuildNumberFromFileName(filename string) (int, error) { buildNumber, err := strconv.Atoi(buildNumberStr) if err != nil { - return 0, fmt.Errorf("failed to convert build number to integer: '%s'", buildNumberStr) + return 0, fmt.Errorf("failed to convert maven package build number to integer: '%s'", buildNumberStr) } return buildNumber, nil diff --git a/modules/setting/packages.go b/modules/setting/packages.go index 60c8f0e8b93c0..bbce57fc28231 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -89,6 +89,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeRubyGems = mustBytes(sec, "LIMIT_SIZE_RUBYGEMS") Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT") Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") + Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) return nil } diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go index 658dd73cab3e6..d4f6138977303 100644 --- a/services/packages/maven/cleanup.go +++ b/services/packages/maven/cleanup.go @@ -51,7 +51,7 @@ func CleanupSnapshotVersions(ctx context.Context) error { } func isSnapshotVersion(version string) bool { - return strings.Contains(version, "-SNAPSHOT") + return strings.HasSuffix(version, "-SNAPSHOT") } func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error { @@ -92,7 +92,12 @@ func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) } func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) { - content, _, _, err := packages_service.GetPackageFileStream(ctx, metadataFile) + pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID) + if err != nil { + return 0, fmt.Errorf("failed to get package blob: %w", err) + } + + content, _, _, err := packages_service.GetPackageBlobStream(ctx, metadataFile, pb, nil, true) if err != nil { return 0, fmt.Errorf("failed to get package file stream: %w", err) } diff --git a/services/packages/packages.go b/services/packages/packages.go index bd1d460fd3ba8..7f505622423d4 100644 --- a/services/packages/packages.go +++ b/services/packages/packages.go @@ -600,8 +600,8 @@ func GetPackageFileStream(ctx context.Context, pf *packages_model.PackageFile) ( } // GetPackageBlobStream returns the content of the specific package blob -// If the storage supports direct serving and it's enabled, only the direct serving url is returned. -func GetPackageBlobStream(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, serveDirectReqParams url.Values) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) { +// If the storage supports direct serving and it's enabled, only the direct serving url is returned; otherwise, forceInternalServe should be set to true. +func GetPackageBlobStream(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, serveDirectReqParams url.Values, forceInternalServe ...bool) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) { key := packages_module.BlobHash256Key(pb.HashSHA256) cs := packages_module.NewContentStore() @@ -610,7 +610,9 @@ func GetPackageBlobStream(ctx context.Context, pf *packages_model.PackageFile, p var u *url.URL var err error - if cs.ShouldServeDirect() { + internalServe := len(forceInternalServe) > 0 && forceInternalServe[0] + + if !internalServe && cs.ShouldServeDirect() { u, err = cs.GetServeDirectURL(key, pf.Name, serveDirectReqParams) if err != nil && !errors.Is(err, storage.ErrURLNotSupported) { log.Error("Error getting serve direct url: %v", err) From cd5e9df07d54231bfe30e2c948fbae57dc6fedaf Mon Sep 17 00:00:00 2001 From: "diana.strebkova@t-systems.com" Date: Tue, 11 Mar 2025 19:20:58 +0100 Subject: [PATCH 3/3] Added handling for artifacts with classifiers and added debug mode which will list files for deletion --- custom/conf/app.example.ini | 2 + models/packages/package_file.go | 67 +++++++++++++---------- modules/packages/maven/metadata.go | 45 +++++++++++----- modules/setting/packages.go | 2 + services/packages/maven/cleanup.go | 86 ++++++++++++++++++------------ 5 files changed, 126 insertions(+), 76 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 6a59200592da9..da755ab01c8b2 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2633,6 +2633,8 @@ LEVEL = Info ;; Cleanup expired packages/data then targets the files within all maven snapshots versions ;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1 ;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) +; Enable debug logging for Maven cleanup. Enabling debug will stop snapshot version artifacts from being deleted but will log the files which were meant for deletion. +; DEBUG_MAVEN_CLEANUP = true ;LIMIT_SIZE_NPM = -1 ;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_NUGET = -1 diff --git a/models/packages/package_file.go b/models/packages/package_file.go index ca7bd4023efca..f940efc90e0ec 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -12,7 +12,6 @@ import ( "time" "code.gitea.io/gitea/models/db" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -24,6 +23,8 @@ func init() { } var ( + // ErrMetadataFile indicated a metadata file + ErrMetadataFile = errors.New("metadata file") // ErrDuplicatePackageFile indicates a duplicated package file error ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists") // ErrPackageFileNotExist indicates a package file not exist error @@ -229,57 +230,69 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error) return db.Exist[PackageFile](ctx, opts.toConds()) } -// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber. -func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) { - if maxBuildNumber < 0 { - return nil, errors.New("maxBuildNumber must be a non-negative integer") +// GetFilesBelowBuildNumber retrieves all files for maven snapshot version where the build number is <= maxBuildNumber. +// Returns two slices: one for filtered files and one for skipped files. +func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) { + if maxBuildNumber <= 0 { + return nil, nil, errors.New("maxBuildNumber must be a positive integer") } files, err := GetFilesByVersionID(ctx, versionID) if err != nil { - return nil, fmt.Errorf("failed to retrieve files: %w", err) + return nil, nil, fmt.Errorf("failed to retrieve files: %w", err) } - var filteredFiles []*PackageFile + var filteredFiles, skippedFiles []*PackageFile for _, file := range files { - buildNumber, err := extractBuildNumberFromFileName(file.Name) + buildNumber, err := extractBuildNumberFromFileName(file.Name, classifiers...) if err != nil { - if err.Error() == "metadata file" { - continue + if !errors.Is(err, ErrMetadataFile) { + skippedFiles = append(skippedFiles, file) } - log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err) continue } - if buildNumber <= maxBuildNumber { filteredFiles = append(filteredFiles, file) } } - log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber) - return filteredFiles, nil + return filteredFiles, skippedFiles, nil } -// extractBuildNumberFromFileName extracts the build number from the file name. -func extractBuildNumberFromFileName(filename string) (int, error) { - // Skip metadata files +// extractBuildNumberFromFileName extracts the build number from a Maven snapshot file name. +// Expected formats: +// +// "artifact-1.0.0-20250311.083409-9.tgz" returns 9 +// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10 +func extractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) { if strings.Contains(filename, "maven-metadata.xml") { - return 0, errors.New("metadata file") + return 0, ErrMetadataFile } - // Split filename by hyphens to extract the build number - parts := strings.Split(filename, "-") - if len(parts) < 3 { - return 0, fmt.Errorf("invalid file name format: '%s'", filename) + dotIdx := strings.LastIndex(filename, ".") + if dotIdx == -1 { + return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename) + } + base := filename[:dotIdx] + + // Remove classifier suffix if present. + for _, classifier := range classifiers { + suffix := "-" + classifier + if strings.HasSuffix(base, suffix) { + base = base[:len(base)-len(suffix)] + break + } } - // Extract the last part before the extension - buildNumberWithExt := parts[len(parts)-1] - buildNumberStr := strings.Split(buildNumberWithExt, ".")[0] - + // The build number should be the token after the last dash. + lastDash := strings.LastIndex(base, "-") + if lastDash == -1 { + return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename) + } + buildNumberStr := base[lastDash+1:] buildNumber, err := strconv.Atoi(buildNumberStr) if err != nil { - return 0, fmt.Errorf("failed to convert maven package build number to integer: '%s'", buildNumberStr) + return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err) } return buildNumber, nil diff --git a/modules/packages/maven/metadata.go b/modules/packages/maven/metadata.go index d903d4b394329..8dcbf14d18135 100644 --- a/modules/packages/maven/metadata.go +++ b/modules/packages/maven/metadata.go @@ -7,6 +7,7 @@ import ( "encoding/xml" "errors" "io" + "strconv" "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/validation" @@ -32,6 +33,12 @@ type Dependency struct { Version string `json:"version,omitempty"` } +// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version +type SnapshotMetadata struct { + BuildNumber int `json:"build_number,omitempty"` + Classifiers []string `json:"classifiers,omitempty"` +} + type pomStruct struct { XMLName xml.Name `xml:"project"` @@ -62,7 +69,7 @@ type pomStruct struct { } `xml:"dependencies>dependency"` } -type MavenMetadata struct { +type snapshotMetadataStruct struct { XMLName xml.Name `xml:"metadata"` GroupID string `xml:"groupId"` ArtifactID string `xml:"artifactId"` @@ -74,11 +81,10 @@ type MavenMetadata struct { BuildNumber string `xml:"buildNumber"` } `xml:"snapshot"` SnapshotVersions []struct { - SnapshotVersion struct { - Extension string `xml:"extension"` - Value string `xml:"value"` - Updated string `xml:"updated"` - } `xml:"snapshotVersion"` + Extension string `xml:"extension"` + Classifier string `xml:"classifier"` + Value string `xml:"value"` + Updated string `xml:"updated"` } `xml:"snapshotVersions>snapshotVersion"` } `xml:"versioning"` } @@ -132,19 +138,30 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) { }, nil } -// ParseMavenMetadata parses the Maven metadata XML to extract the build number. -func ParseMavenMetaData(r io.Reader) (string, error) { - var metadata MavenMetadata +// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers. +func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) { + var metadata snapshotMetadataStruct dec := xml.NewDecoder(r) - dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding. + dec.CharsetReader = charset.NewReaderLabel if err := dec.Decode(&metadata); err != nil { - return "", err + return nil, err } - if metadata.Versioning.Snapshot.BuildNumber == "" { - return "", errors.New("no build number in snapshot metadata found") + buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber) + if err != nil { + return nil, errors.New("invalid or missing build number in snapshot metadata") } - return metadata.Versioning.Snapshot.BuildNumber, nil + var classifiers []string + for _, snapshotVersion := range metadata.Versioning.SnapshotVersions { + if snapshotVersion.Classifier != "" { + classifiers = append(classifiers, snapshotVersion.Classifier) + } + } + + return &SnapshotMetadata{ + BuildNumber: buildNumber, + Classifiers: classifiers, + }, nil } diff --git a/modules/setting/packages.go b/modules/setting/packages.go index bbce57fc28231..c1628a8ca80b3 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -43,6 +43,7 @@ var ( DefaultRPMSignEnabled bool RetainMavenSnapshotBuilds int + DebugMavenCleanup bool }{ Enabled: true, LimitTotalOwnerCount: -1, @@ -91,6 +92,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) + Packages.DebugMavenCleanup = sec.Key("DEBUG_MAVEN_CLEANUP").MustBool(true) return nil } diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go index d4f6138977303..f7a14cdc4c323 100644 --- a/services/packages/maven/cleanup.go +++ b/services/packages/maven/cleanup.go @@ -3,7 +3,6 @@ package maven import ( "context" "fmt" - "strconv" "strings" "code.gitea.io/gitea/models/packages" @@ -13,40 +12,46 @@ import ( packages_service "code.gitea.io/gitea/services/packages" ) -// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages. +// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages. func CleanupSnapshotVersions(ctx context.Context) error { retainBuilds := setting.Packages.RetainMavenSnapshotBuilds - log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds) + debugSession := setting.Packages.DebugMavenCleanup + log.Debug("Starting Maven CleanupSnapshotVersions with retainBuilds: %d, debugSession: %t", retainBuilds, debugSession) if retainBuilds == -1 { - log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1") + log.Info("Maven CleanupSnapshotVersions skipped because retainBuilds is set to -1") return nil } if retainBuilds < 1 { - return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) + return fmt.Errorf("Maven CleanupSnapshotVersions: forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) } versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven) if err != nil { - return fmt.Errorf("failed to retrieve Maven package versions: %w", err) + return fmt.Errorf("Maven CleanupSnapshotVersions: failed to retrieve Maven package versions: %w", err) } - for _, version := range versions { - log.Info("Processing version: %s (ID: %d)", version.Version, version.ID) + var errors []error + for _, version := range versions { if !isSnapshotVersion(version.Version) { - log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID) continue } - if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil { - log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err) - return err + if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds, debugSession); err != nil { + errors = append(errors, fmt.Errorf("Maven CleanupSnapshotVersions: version '%s' (ID: %d): %w", version.Version, version.ID, err)) + } + } + + if len(errors) > 0 { + for _, err := range errors { + log.Warn("Maven CleanupSnapshotVersions: Error during cleanup: %v", err) } + return fmt.Errorf("Maven CleanupSnapshotVersions: cleanup completed with errors: %v", errors) } - log.Info("Completed CleanupSnapshotVersion") + log.Debug("Completed Maven CleanupSnapshotVersions") return nil } @@ -54,64 +59,75 @@ func isSnapshotVersion(version string) bool { return strings.HasSuffix(version, "-SNAPSHOT") } -func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error { - log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds) +func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int, debugSession bool) error { + log.Debug("Starting Maven cleanSnapshotFiles for versionID: %d with retainBuilds: %d, debugSession: %t", versionID, retainBuilds, debugSession) metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey) if err != nil { - return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) } - maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile) + maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile) if err != nil { - return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) + return fmt.Errorf("cleanSnapshotFiles: failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) } - log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber) - thresholdBuildNumber := maxBuildNumber - retainBuilds if thresholdBuildNumber <= 0 { - log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) + log.Debug("cleanSnapshotFiles: No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) return nil } - filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber) + filesToRemove, skippedFiles, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...) if err != nil { - return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err) + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve files for version ID %d: %w", versionID, err) + } + + if debugSession { + var fileNamesToRemove, skippedFileNames []string + + for _, file := range filesToRemove { + fileNamesToRemove = append(fileNamesToRemove, file.Name) + } + + for _, file := range skippedFiles { + skippedFileNames = append(skippedFileNames, file.Name) + } + + log.Info("cleanSnapshotFiles: Debug session active. Files to remove: %v, Skipped files: %v", fileNamesToRemove, skippedFileNames) + return nil } for _, file := range filesToRemove { log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber) if err := packages_service.DeletePackageFile(ctx, file); err != nil { - return fmt.Errorf("failed to delete file '%s': %w", file.Name, err) + return fmt.Errorf("Maven cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err) } } - log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID) + log.Debug("Completed Maven cleanSnapshotFiles for versionID: %d", versionID) return nil } -func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) { +func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) { pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID) if err != nil { - return 0, fmt.Errorf("failed to get package blob: %w", err) + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package blob: %w", err) } content, _, _, err := packages_service.GetPackageBlobStream(ctx, metadataFile, pb, nil, true) if err != nil { - return 0, fmt.Errorf("failed to get package file stream: %w", err) + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package file stream: %w", err) } defer content.Close() - buildNumberStr, err := maven.ParseMavenMetaData(content) + snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content) if err != nil { - return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err) + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to parse maven-metadata.xml: %w", err) } - buildNumber, err := strconv.Atoi(buildNumberStr) - if err != nil { - return 0, fmt.Errorf("invalid build number format: %w", err) - } + buildNumber := snapshotMetadata.BuildNumber + classifiers := snapshotMetadata.Classifiers - return buildNumber, nil + return buildNumber, classifiers, nil }