Skip to content

Commit f7960a4

Browse files
diana.strebkova@t-systems.comdianaStr7
authored andcommitted
Added cleanup method for files in Maven snapshot versions
1 parent 3bbc482 commit f7960a4

File tree

7 files changed

+233
-8
lines changed

7 files changed

+233
-8
lines changed

custom/conf/app.example.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,6 +2612,9 @@ LEVEL = Info
26122612
;LIMIT_SIZE_HELM = -1
26132613
;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
26142614
;LIMIT_SIZE_MAVEN = -1
2615+
;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive.
2616+
;; Cleanup expired packages/data then targets the files within all maven snapshots versions
2617+
;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1
26152618
;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
26162619
;LIMIT_SIZE_NPM = -1
26172620
;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)

models/packages/package_file.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ package packages
55

66
import (
77
"context"
8+
"errors"
9+
"fmt"
810
"strconv"
911
"strings"
1012
"time"
1113

1214
"code.gitea.io/gitea/models/db"
15+
"code.gitea.io/gitea/modules/log"
1316
"code.gitea.io/gitea/modules/timeutil"
1417
"code.gitea.io/gitea/modules/util"
1518

@@ -226,6 +229,62 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error)
226229
return db.Exist[PackageFile](ctx, opts.toConds())
227230
}
228231

232+
// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber.
233+
func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) {
234+
if maxBuildNumber < 0 {
235+
return nil, errors.New("maxBuildNumber must be a non-negative integer")
236+
}
237+
238+
files, err := GetFilesByVersionID(ctx, versionID)
239+
if err != nil {
240+
return nil, fmt.Errorf("failed to retrieve files: %w", err)
241+
}
242+
243+
var filteredFiles []*PackageFile
244+
for _, file := range files {
245+
buildNumber, err := extractBuildNumberFromFileName(file.Name)
246+
if err != nil {
247+
if err.Error() == "metadata file" {
248+
continue
249+
}
250+
log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err)
251+
continue
252+
}
253+
254+
if buildNumber <= maxBuildNumber {
255+
filteredFiles = append(filteredFiles, file)
256+
}
257+
}
258+
259+
log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber)
260+
return filteredFiles, nil
261+
}
262+
263+
// extractBuildNumberFromFileName extracts the build number from the file name.
264+
func extractBuildNumberFromFileName(filename string) (int, error) {
265+
// Skip metadata files
266+
if strings.Contains(filename, "maven-metadata.xml") {
267+
return 0, errors.New("metadata file")
268+
}
269+
270+
// Split filename by hyphens to extract the build number
271+
parts := strings.Split(filename, "-")
272+
if len(parts) < 3 {
273+
return 0, fmt.Errorf("invalid file name format: '%s'", filename)
274+
}
275+
276+
// Extract the last part before the extension
277+
buildNumberWithExt := parts[len(parts)-1]
278+
buildNumberStr := strings.Split(buildNumberWithExt, ".")[0]
279+
280+
buildNumber, err := strconv.Atoi(buildNumberStr)
281+
if err != nil {
282+
return 0, fmt.Errorf("failed to convert build number to integer: '%s'", buildNumberStr)
283+
}
284+
285+
return buildNumber, nil
286+
}
287+
229288
// CalculateFileSize sums up all blob sizes matching the search options.
230289
// It does NOT respect the deduplication of blobs.
231290
func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) {

models/packages/package_version.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType
120120

121121
// GetVersionsByPackageType gets all versions of a specific type
122122
func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) {
123-
pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{
124-
OwnerID: ownerID,
123+
opts := &PackageSearchOptions{
125124
Type: packageType,
126125
IsInternal: optional.Some(false),
127-
})
126+
}
127+
128+
if ownerID != 0 {
129+
opts.OwnerID = ownerID
130+
}
131+
132+
pvs, _, err := SearchVersions(ctx, opts)
128133
return pvs, err
129134
}
130135

modules/packages/maven/metadata.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package maven
55

66
import (
77
"encoding/xml"
8+
"errors"
89
"io"
910

1011
"code.gitea.io/gitea/modules/util"
@@ -61,6 +62,27 @@ type pomStruct struct {
6162
} `xml:"dependencies>dependency"`
6263
}
6364

65+
type MavenMetadata struct {
66+
XMLName xml.Name `xml:"metadata"`
67+
GroupID string `xml:"groupId"`
68+
ArtifactID string `xml:"artifactId"`
69+
Version string `xml:"version"`
70+
Versioning struct {
71+
LastUpdated string `xml:"lastUpdated"`
72+
Snapshot struct {
73+
Timestamp string `xml:"timestamp"`
74+
BuildNumber string `xml:"buildNumber"`
75+
} `xml:"snapshot"`
76+
SnapshotVersions []struct {
77+
SnapshotVersion struct {
78+
Extension string `xml:"extension"`
79+
Value string `xml:"value"`
80+
Updated string `xml:"updated"`
81+
} `xml:"snapshotVersion"`
82+
} `xml:"snapshotVersions>snapshotVersion"`
83+
} `xml:"versioning"`
84+
}
85+
6486
// ParsePackageMetaData parses the metadata of a pom file
6587
func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
6688
var pom pomStruct
@@ -109,3 +131,20 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
109131
Dependencies: dependencies,
110132
}, nil
111133
}
134+
135+
// ParseMavenMetadata parses the Maven metadata XML to extract the build number.
136+
func ParseMavenMetaData(r io.Reader) (string, error) {
137+
var metadata MavenMetadata
138+
139+
dec := xml.NewDecoder(r)
140+
dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding.
141+
if err := dec.Decode(&metadata); err != nil {
142+
return "", err
143+
}
144+
145+
if metadata.Versioning.Snapshot.BuildNumber == "" {
146+
return "", errors.New("no build number in snapshot metadata found")
147+
}
148+
149+
return metadata.Versioning.Snapshot.BuildNumber, nil
150+
}

modules/setting/packages.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ var (
4444
LimitSizeSwift int64
4545
LimitSizeVagrant int64
4646

47-
DefaultRPMSignEnabled bool
47+
DefaultRPMSignEnabled bool
48+
RetainMavenSnapshotBuilds int
4849
}{
49-
Enabled: true,
50-
LimitTotalOwnerCount: -1,
50+
Enabled: true,
51+
LimitTotalOwnerCount: -1,
52+
RetainMavenSnapshotBuilds: -1,
5153
}
5254
)
5355

@@ -101,7 +103,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) {
101103
Packages.LimitSizeRubyGems = mustBytes(sec, "LIMIT_SIZE_RUBYGEMS")
102104
Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT")
103105
Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT")
104-
Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false)
106+
Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds)
105107
return nil
106108
}
107109

services/packages/cleanup/cleanup.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright 2022 The Gitea Authors. All rights reserved.
22
// SPDX-License-Identifier: MIT
33

4-
package container
4+
package cleanup
55

66
import (
77
"context"
@@ -20,6 +20,7 @@ import (
2020
cargo_service "code.gitea.io/gitea/services/packages/cargo"
2121
container_service "code.gitea.io/gitea/services/packages/container"
2222
debian_service "code.gitea.io/gitea/services/packages/debian"
23+
maven_service "code.gitea.io/gitea/services/packages/maven"
2324
rpm_service "code.gitea.io/gitea/services/packages/rpm"
2425
)
2526

@@ -166,6 +167,10 @@ func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error
166167
return err
167168
}
168169

170+
if err := maven_service.CleanupSnapshotVersions(ctx); err != nil {
171+
return err
172+
}
173+
169174
ps, err := packages_model.FindUnreferencedPackages(ctx)
170175
if err != nil {
171176
return err

services/packages/maven/cleanup.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
package maven
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strconv"
7+
"strings"
8+
9+
"code.gitea.io/gitea/models/packages"
10+
"code.gitea.io/gitea/modules/log"
11+
"code.gitea.io/gitea/modules/packages/maven"
12+
"code.gitea.io/gitea/modules/setting"
13+
packages_service "code.gitea.io/gitea/services/packages"
14+
)
15+
16+
// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages.
17+
func CleanupSnapshotVersions(ctx context.Context) error {
18+
retainBuilds := setting.Packages.RetainMavenSnapshotBuilds
19+
log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds)
20+
21+
if retainBuilds == -1 {
22+
log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1")
23+
return nil
24+
}
25+
26+
if retainBuilds < 1 {
27+
return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds)
28+
}
29+
30+
versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven)
31+
if err != nil {
32+
return fmt.Errorf("failed to retrieve Maven package versions: %w", err)
33+
}
34+
35+
for _, version := range versions {
36+
log.Info("Processing version: %s (ID: %d)", version.Version, version.ID)
37+
38+
if !isSnapshotVersion(version.Version) {
39+
log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID)
40+
continue
41+
}
42+
43+
if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil {
44+
log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err)
45+
return err
46+
}
47+
}
48+
49+
log.Info("Completed CleanupSnapshotVersion")
50+
return nil
51+
}
52+
53+
func isSnapshotVersion(version string) bool {
54+
return strings.Contains(version, "-SNAPSHOT")
55+
}
56+
57+
func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error {
58+
log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds)
59+
60+
metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey)
61+
if err != nil {
62+
return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err)
63+
}
64+
65+
maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile)
66+
if err != nil {
67+
return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err)
68+
}
69+
70+
log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber)
71+
72+
thresholdBuildNumber := maxBuildNumber - retainBuilds
73+
if thresholdBuildNumber <= 0 {
74+
log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID)
75+
return nil
76+
}
77+
78+
filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber)
79+
if err != nil {
80+
return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err)
81+
}
82+
83+
for _, file := range filesToRemove {
84+
log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber)
85+
if err := packages_service.DeletePackageFile(ctx, file); err != nil {
86+
return fmt.Errorf("failed to delete file '%s': %w", file.Name, err)
87+
}
88+
}
89+
90+
log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID)
91+
return nil
92+
}
93+
94+
func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) {
95+
content, _, _, err := packages_service.GetPackageFileStream(ctx, metadataFile)
96+
if err != nil {
97+
return 0, fmt.Errorf("failed to get package file stream: %w", err)
98+
}
99+
defer content.Close()
100+
101+
buildNumberStr, err := maven.ParseMavenMetaData(content)
102+
if err != nil {
103+
return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err)
104+
}
105+
106+
buildNumber, err := strconv.Atoi(buildNumberStr)
107+
if err != nil {
108+
return 0, fmt.Errorf("invalid build number format: %w", err)
109+
}
110+
111+
return buildNumber, nil
112+
}

0 commit comments

Comments
 (0)