Skip to content

Commit

Permalink
feat(cleanup): keep import source images
Browse files Browse the repository at this point in the history
- Keep all related import sources by checksum.
- Skip retaining import sources for Git history-based policy, as it manages them more effectively.
- Enforce Git policy when an image is kept by multiple policies.

Signed-off-by: Aleksei Igrychev <aleksei.igrychev@palark.com>
  • Loading branch information
alexey-igrychev committed Jan 28, 2025
1 parent 8d700b0 commit 20940ea
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 44 deletions.
111 changes: 80 additions & 31 deletions pkg/cleaning/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type CleanupOptions struct {
LocalGit GitRepo
KubernetesContextClients []*kube.ContextClient
KubernetesNamespaceRestrictionByContext map[string]string
WithoutKube bool // legacy
WithoutKube bool // TODO: remove this legacy logic in v3.
ConfigMetaCleanup config.MetaCleanup
KeepStagesBuiltWithinLastNHours uint64
DryRun bool
Expand Down Expand Up @@ -59,7 +59,8 @@ func newCleanupManager(projectName string, storageManager *manager.StorageManage
type cleanupManager struct {
stageManager stage_manager.Manager

nonexistentImportMetadataIDs []string
checksumSourceStageIDs map[string][]string
sourceStageIDImportIDs map[string][]string

ProjectName string
StorageManager manager.StorageManagerInterface
Expand Down Expand Up @@ -158,12 +159,12 @@ func (m *cleanupManager) run(ctx context.Context) error {
if m.KeepStagesBuiltWithinLastNHours != 0 {
keepImagesBuiltWithinLastNHours = m.KeepStagesBuiltWithinLastNHours
}
stage_manager.ProtectionReasonBuiltWithinLastNHoursPolicy.SetDescription(fmt.Sprintf("built within last %d hours", keepImagesBuiltWithinLastNHours))

if !(m.ConfigMetaCleanup.DisableBuiltWithinLastNHoursPolicy || keepImagesBuiltWithinLastNHours == 0) {
reason := fmt.Sprintf("built within last %d hours", keepImagesBuiltWithinLastNHours)
for stageDescToDelete := range m.stageManager.GetStageDescSet().Iter() {
if (time.Since(stageDescToDelete.Info.GetCreatedAt()).Hours()) <= float64(keepImagesBuiltWithinLastNHours) {
m.stageManager.MarkStageDescAsProtected(stageDescToDelete, reason)
m.stageManager.MarkStageDescAsProtected(stageDescToDelete, stage_manager.ProtectionReasonBuiltWithinLastNHoursPolicy, false)
}
}
}
Expand Down Expand Up @@ -222,7 +223,7 @@ func (m *cleanupManager) skipStageIDsThatAreUsedInKubernetes(ctx context.Context
stageID := stageDesc.StageID.String()

handleTagFunc(tag, stageID, func() {
m.stageManager.MarkStageDescAsProtected(stageDesc, "used in the Kubernetes")
m.stageManager.MarkStageDescAsProtected(stageDesc, stage_manager.ProtectionReasonKubernetesBasedPolicy, false)
})
}

Expand All @@ -232,7 +233,7 @@ func (m *cleanupManager) skipStageIDsThatAreUsedInKubernetes(ctx context.Context
stageDesc := m.stageManager.GetStageDescByStageID(stageID)
if stageDesc != nil {
// keep existent stage and associated custom tags
m.stageManager.MarkStageDescAsProtected(stageDesc, "used in the Kubernetes")
m.stageManager.MarkStageDescAsProtected(stageDesc, stage_manager.ProtectionReasonKubernetesBasedPolicy, false)
} else {
// keep custom tags that do not have associated existent stage
m.stageManager.ForgetCustomTagsByStageID(stageID)
Expand All @@ -254,7 +255,7 @@ Loop:
for _, deployedDockerImage := range deployedDockerImages {
if deployedDockerImage.Name == dockerImageName {
if !handledDeployedFinalStages[stageID] {
m.stageManager.MarkFinalStageDescAsProtected(stageDesc, "used in the Kubernetes")
m.stageManager.MarkFinalStageDescAsProtected(stageDesc, stage_manager.ProtectionReasonKubernetesBasedPolicy, false)

logboek.Context(ctx).Default().LogFDetails(" tag: %s\n", stageID)
logboek.Context(ctx).LogOptionalLn()
Expand Down Expand Up @@ -485,7 +486,7 @@ func (m *cleanupManager) prepareStageIDTableRows(ctx context.Context, stageIDCus
func (m *cleanupManager) handleSavedStageIDs(ctx context.Context, savedStageIDs []string) {
logboek.Context(ctx).Default().LogBlock("Saved tags").Do(func() {
for _, stageID := range savedStageIDs {
m.stageManager.MarkStageDescAsProtectedByStageID(stageID, "found in the git history")
m.stageManager.MarkStageDescAsProtectedByStageID(stageID, stage_manager.ProtectionReasonGitPolicy, true)
logboek.Context(ctx).Default().LogFDetails(" tag: %s\n", stageID)
logboek.Context(ctx).LogOptionalLn()
}
Expand Down Expand Up @@ -713,10 +714,11 @@ func (m *cleanupManager) cleanupUnusedStages(ctx context.Context) error {

// skip kept stages and their relatives.
{
for stageDescToKeep := range m.stageManager.GetProtectedStageDescSet().Iter() {
relativeStageDescSetToKeep := m.getRelativeStageDescSetByStageDesc(stageDescToKeep)
for relativeStageDescToKeep := range relativeStageDescSetToKeep.Iter() {
m.stageManager.MarkStageDescAsProtected(relativeStageDescToKeep, "ancestors")
for protectionReason, stageDescToKeepSet := range m.stageManager.GetProtectedStageDescSetByReason() {
// Git history based policy keeps import sources more effectively, other policies do not keep them.
withImportSources := protectionReason != stage_manager.ProtectionReasonGitPolicy
for stageDescToKeep := range stageDescToKeepSet.Iter() {
m.protectRelativeStageDescSetByStageDesc(stageDescToKeep, withImportSources)
}
}

Expand Down Expand Up @@ -746,9 +748,34 @@ func (m *cleanupManager) cleanupUnusedStages(ctx context.Context) error {
return fmt.Errorf("unable to cleanup custom tags metadata: %w", err)
}

if len(m.nonexistentImportMetadataIDs) != 0 {
if err := logboek.Context(ctx).Default().LogProcess("Cleaning imports metadata (%d)", len(m.nonexistentImportMetadataIDs)).DoError(func() error {
return m.deleteImportsMetadata(ctx, m.nonexistentImportMetadataIDs)
if err := m.deleteUnusedImportsMetadata(ctx); err != nil {
return fmt.Errorf("unable to cleanup imports metadata: %w", err)
}

return nil
}

func (m *cleanupManager) deleteUnusedImportsMetadata(ctx context.Context) error {
if len(m.sourceStageIDImportIDs) == 0 {
return nil
}

var importMetadataIDsToDelete []string
outerLoop:
for sourceStageID, importMetadataIDs := range m.sourceStageIDImportIDs {
for protectedStageDesc := range m.stageManager.GetProtectedStageDescSet().Iter() {
// Skip existent/protected stages.
if sourceStageID == protectedStageDesc.StageID.String() {
continue outerLoop
}
}

importMetadataIDsToDelete = append(importMetadataIDsToDelete, importMetadataIDs...)
}

if len(importMetadataIDsToDelete) != 0 {
if err := logboek.Context(ctx).Default().LogProcess("Cleaning imports metadata (%d)", len(importMetadataIDsToDelete)).DoError(func() error {
return m.deleteImportsMetadata(ctx, importMetadataIDsToDelete)
}); err != nil {
return err
}
Expand All @@ -768,7 +795,7 @@ FilterOutFinalStages:
}
}

m.stageManager.MarkFinalStageDescAsProtected(finalStageDesc, "not exist in the repo")
m.stageManager.MarkFinalStageDescAsProtected(finalStageDesc, stage_manager.ProtectionReasonNotFoundInRepo, false)
}

finalStageDescSetToDelete := m.stageManager.GetFinalStageDescSet().Difference(m.stageManager.GetFinalProtectedStageDescSet())
Expand All @@ -786,6 +813,9 @@ FilterOutFinalStages:
}

func (m *cleanupManager) initImportsMetadata(ctx context.Context) error {
m.checksumSourceStageIDs = map[string][]string{}
m.sourceStageIDImportIDs = map[string][]string{}

importMetadataIDs, err := m.StorageManager.GetStagesStorage().GetImportMetadataIDs(ctx, m.ProjectName, storage.WithCache())
if err != nil {
return err
Expand Down Expand Up @@ -813,17 +843,10 @@ func (m *cleanupManager) initImportsMetadata(ctx context.Context) error {

importSourceID := metadata.ImportSourceID
sourceStageID := metadata.SourceStageID
if !m.stageManager.ContainsStageDescByStageID(sourceStageID) {
m.nonexistentImportMetadataIDs = append(m.nonexistentImportMetadataIDs, importSourceID)
return nil
}
checksum := metadata.Checksum

// TODO: remove this legacy logic in v3.
sourceImageID := metadata.SourceImageID
stage := findStageDescByImageID(m.stageManager.GetStageDescSet(), sourceImageID)
if stage == nil {
m.nonexistentImportMetadataIDs = append(m.nonexistentImportMetadataIDs, importSourceID)
}
m.checksumSourceStageIDs[checksum] = append(m.checksumSourceStageIDs[checksum], sourceStageID)
m.sourceStageIDImportIDs[sourceStageID] = append(m.sourceStageIDImportIDs[sourceStageID], importSourceID)

return nil
})
Expand Down Expand Up @@ -868,7 +891,7 @@ func findStageDescByImageID(stageDescSet image.StageDescSet, imageID string) *im
return nil
}

func (m *cleanupManager) getRelativeStageDescSetByStageDesc(targetStageDesc *image.StageDesc) image.StageDescSet {
func (m *cleanupManager) protectRelativeStageDescSetByStageDesc(targetStageDesc *image.StageDesc, withImportSource bool) {
targetStageDescSet := image.NewStageDescSet()
if targetStageDesc.Info.IsIndex {
for _, platformImageInfo := range targetStageDesc.Info.Index {
Expand All @@ -881,33 +904,59 @@ func (m *cleanupManager) getRelativeStageDescSetByStageDesc(targetStageDesc *ima
targetStageDescSet.Add(targetStageDesc)
}

handledStageDescSet := image.NewStageDescSet()
stageDescSet := m.stageManager.GetStageDescSet()
relativeStageDescSet := image.NewStageDescSet()
currentStageDescSet := targetStageDescSet
for !currentStageDescSet.IsEmpty() {
for _, currentStageDesc := range currentStageDescSet.ToSlice() {
relativeStageDescSet.Add(currentStageDesc)
currentStageDescSet.Remove(currentStageDesc)

// Avoid potential cyclical dependency.
if handledStageDescSet.Contains(currentStageDesc) {
continue
} else {
handledStageDescSet.Add(currentStageDesc)
}

// Import source checking.
if withImportSource {
for label, checksum := range currentStageDesc.Info.Labels {
if strings.HasPrefix(label, image.WerfImportChecksumLabelPrefix) {
sourceStageIDs, ok := m.checksumSourceStageIDs[checksum]
if !ok {
continue
}

for _, sourceStageID := range sourceStageIDs {
sourceStageDesc := m.stageManager.GetStageDescByStageID(sourceStageID)
if sourceStageDesc != nil {
currentStageDescSet.Add(sourceStageDesc)
m.stageManager.MarkStageDescAsProtected(sourceStageDesc, stage_manager.ProtectionReasonImportSource, false)
}
}
}
}
}

// Parent stage checking.
{
// TODO: remove this legacy check in v3.
for stageDesc := range stageDescSet.Iter() {
if currentStageDesc.Info.ParentID == stageDesc.Info.ID {
currentStageDescSet.Add(stageDesc)
m.stageManager.MarkStageDescAsProtected(stageDesc, stage_manager.ProtectionReasonAncestor, false)
break
}
}

parentStageDesc := m.stageManager.GetStageDescByStageID(currentStageDesc.Info.Labels[image.WerfParentStageID])
if parentStageDesc != nil {
m.stageManager.MarkStageDescAsProtected(parentStageDesc, stage_manager.ProtectionReasonAncestor, false)
currentStageDescSet.Add(parentStageDesc)
}
}
}
}

return relativeStageDescSet
}

func (m *cleanupManager) deleteUnusedCustomTags(ctx context.Context) error {
Expand Down
14 changes: 7 additions & 7 deletions pkg/cleaning/stage_manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,21 @@ func GetCustomTagsMetadata(ctx context.Context, storageManager manager.StorageMa
return stageIDCustomTagList, nil
}

func (m *Manager) MarkStageDescAsProtectedByStageID(stageID, reason string) {
func (m *Manager) MarkStageDescAsProtectedByStageID(stageID string, reason *protectionReason, forceReason bool) {
stageDesc := m.GetStageDescByStageID(stageID)
if stageDesc == nil {
panic(fmt.Sprintf("stage description %s not found", stageID))
}

m.stageDescSet.MarkStageDescAsProtected(m.GetStageDescByStageID(stageID), reason)
m.stageDescSet.MarkStageDescAsProtected(m.GetStageDescByStageID(stageID), reason, forceReason)
}

func (m *Manager) MarkStageDescAsProtected(stageDesc *image.StageDesc, reason string) {
m.stageDescSet.MarkStageDescAsProtected(stageDesc, reason)
func (m *Manager) MarkStageDescAsProtected(stageDesc *image.StageDesc, reason *protectionReason, forceReason bool) {
m.stageDescSet.MarkStageDescAsProtected(stageDesc, reason, forceReason)
}

func (m *Manager) MarkFinalStageDescAsProtected(stageDesc *image.StageDesc, reason string) {
m.finalStageDescSet.MarkStageDescAsProtected(stageDesc, reason)
func (m *Manager) MarkFinalStageDescAsProtected(stageDesc *image.StageDesc, reason *protectionReason, forceReason bool) {
m.finalStageDescSet.MarkStageDescAsProtected(stageDesc, reason, forceReason)
}

// GetImageStageIDCommitListToCleanup method returns existing stage IDs and related existing commits (for each managed image)
Expand Down Expand Up @@ -284,7 +284,7 @@ func (m *Manager) GetFinalProtectedStageDescSet() image.StageDescSet {
return m.finalStageDescSet.GetProtectedStageDescSet()
}

func (m *Manager) GetProtectedStageDescSetByReason() map[string]image.StageDescSet {
func (m *Manager) GetProtectedStageDescSetByReason() map[*protectionReason]image.StageDescSet {
return m.stageDescSet.GetProtectedStageDescSetByReason()
}

Expand Down
37 changes: 31 additions & 6 deletions pkg/cleaning/stage_manager/stages.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,34 @@ type managedStageDescSet struct {

type stageMeta struct {
isProtected bool
protectionReason string
protectionReason *protectionReason
}

type protectionReason struct {
description string
}

func (r *protectionReason) String() string {
return r.description
}

func newProtectionReason(desc string) *protectionReason {
return &protectionReason{description: desc}
}

func (r *protectionReason) SetDescription(desc string) {
r.description = desc
}

var (
ProtectionReasonKubernetesBasedPolicy = newProtectionReason("used in Kubernetes")
ProtectionReasonGitPolicy = newProtectionReason("git policy")
ProtectionReasonBuiltWithinLastNHoursPolicy = newProtectionReason("built within last N hours")
ProtectionReasonImportSource = newProtectionReason("import source")
ProtectionReasonAncestor = newProtectionReason("ancestor")
ProtectionReasonNotFoundInRepo = newProtectionReason("not found in repo")
)

func newManagedStageDescSet(set image.StageDescSet) managedStageDescSet {
return managedStageDescSet{
stageDescSet: set,
Expand All @@ -29,19 +54,19 @@ func (s *managedStageDescSet) DifferenceInPlace(stageDescSet image.StageDescSet)
s.stageDescSet = s.stageDescSet.Difference(stageDescSet)
}

func (s *managedStageDescSet) MarkStageDescAsProtected(stageDesc *image.StageDesc, protectionReason string) {
func (s *managedStageDescSet) MarkStageDescAsProtected(stageDesc *image.StageDesc, reason *protectionReason, forceReason bool) {
_, ok := s.stageDescMetaMap[stageDesc]
if !ok {
s.stageDescMetaMap[stageDesc] = &stageMeta{}
}

// If the stage is already protected, do not change the protection reason.
if s.stageDescMetaMap[stageDesc].isProtected {
if s.stageDescMetaMap[stageDesc].isProtected && !forceReason {
return
}

s.stageDescMetaMap[stageDesc].isProtected = true
s.stageDescMetaMap[stageDesc].protectionReason = protectionReason
s.stageDescMetaMap[stageDesc].protectionReason = reason
}

func (s *managedStageDescSet) GetProtectedStageDescSet() image.StageDescSet {
Expand All @@ -53,8 +78,8 @@ func (s *managedStageDescSet) GetProtectedStageDescSet() image.StageDescSet {
return stageDescSet
}

func (s *managedStageDescSet) GetProtectedStageDescSetByReason() map[string]image.StageDescSet {
stageDescSetByReason := make(map[string]image.StageDescSet)
func (s *managedStageDescSet) GetProtectedStageDescSetByReason() map[*protectionReason]image.StageDescSet {
stageDescSetByReason := make(map[*protectionReason]image.StageDescSet)
for stageDesc, meta := range s.stageDescMetaMap {
if !meta.isProtected {
continue
Expand Down

0 comments on commit 20940ea

Please sign in to comment.