From 734d1e320a9bff9ba13a9288e91ce9ef4a80d44b Mon Sep 17 00:00:00 2001 From: Anis Eleuch Date: Wed, 4 Dec 2024 00:12:04 +0100 Subject: [PATCH 1/2] heal: Single object heal to look for older versions as well (#203) (#20723) `mc admin heal ALIAS/bucket/object` does not have any flag to heal object noncurrent versions, this commit will make healing of the object noncurrent versions implicitly asked. This also fixes the 'mc admin heal ALIAS/bucket/object' that does not work correctly when the bucket is versioned. This has been broken since Apr 2023. --- cmd/admin-heal-ops.go | 11 +---------- cmd/erasure-healing.go | 12 +++++++++--- cmd/erasure-healing_test.go | 6 +++--- cmd/erasure-server-pool.go | 3 ++- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cmd/admin-heal-ops.go b/cmd/admin-heal-ops.go index 9b86539235a73..cfe8a814fb912 100644 --- a/cmd/admin-heal-ops.go +++ b/cmd/admin-heal-ops.go @@ -841,6 +841,7 @@ func (h *healSequence) healMinioSysMeta(objAPI ObjectLayer, metaPrefix string) f // NOTE: Healing on meta is run regardless // of any bucket being selected, this is to ensure that // meta are always upto date and correct. + h.settings.Recursive = true return objAPI.HealObjects(h.ctx, minioMetaBucket, metaPrefix, h.settings, func(bucket, object, versionID string, scanMode madmin.HealScanMode) error { if h.isQuitting() { return errHealStopSignalled @@ -896,16 +897,6 @@ func (h *healSequence) healBucket(objAPI ObjectLayer, bucket string, bucketsOnly return nil } - if !h.settings.Recursive { - if h.object != "" { - if err := h.healObject(bucket, h.object, "", h.settings.ScanMode); err != nil { - return err - } - } - - return nil - } - if err := objAPI.HealObjects(h.ctx, bucket, h.object, h.settings, h.healObject); err != nil { return errFnHealFromAPIErr(h.ctx, err) } diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index dda4e6c6dd02b..25b8de031a051 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -44,7 +44,8 @@ const ( healingMetricCheckAbandonedParts ) -func (er erasureObjects) listAndHeal(ctx context.Context, bucket, prefix string, scanMode madmin.HealScanMode, healEntry func(string, metaCacheEntry, madmin.HealScanMode) error) error { +// List a prefix or a single object versions and heal +func (er erasureObjects) listAndHeal(ctx context.Context, bucket, prefix string, recursive bool, scanMode madmin.HealScanMode, healEntry func(string, metaCacheEntry, madmin.HealScanMode) error) error { ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -77,11 +78,14 @@ func (er erasureObjects) listAndHeal(ctx context.Context, bucket, prefix string, bucket: bucket, path: path, filterPrefix: filterPrefix, - recursive: true, + recursive: recursive, forwardTo: "", minDisks: 1, reportNotFound: false, agreed: func(entry metaCacheEntry) { + if !recursive && prefix != entry.name { + return + } if err := healEntry(bucket, entry, scanMode); err != nil { cancel() } @@ -93,7 +97,9 @@ func (er erasureObjects) listAndHeal(ctx context.Context, bucket, prefix string, // proceed to heal nonetheless. entry, _ = entries.firstFound() } - + if !recursive && prefix != entry.name { + return + } if err := healEntry(bucket, *entry, scanMode); err != nil { cancel() return diff --git a/cmd/erasure-healing_test.go b/cmd/erasure-healing_test.go index 5a95e84a770a0..7a4f183c4eb25 100644 --- a/cmd/erasure-healing_test.go +++ b/cmd/erasure-healing_test.go @@ -733,7 +733,7 @@ func TestHealingDanglingObject(t *testing.T) { t.Fatalf("Expected versions 1, got %d", fileInfoPreHeal.NumVersions) } - if err = objLayer.HealObjects(ctx, bucket, "", madmin.HealOpts{Remove: true}, + if err = objLayer.HealObjects(ctx, bucket, "", madmin.HealOpts{Recursive: true, Remove: true}, func(bucket, object, vid string, scanMode madmin.HealScanMode) error { _, err := objLayer.HealObject(ctx, bucket, object, vid, madmin.HealOpts{ScanMode: scanMode, Remove: true}) return err @@ -780,7 +780,7 @@ func TestHealingDanglingObject(t *testing.T) { t.Fatalf("Expected versions 1, got %d", fileInfoPreHeal.NumVersions) } - if err = objLayer.HealObjects(ctx, bucket, "", madmin.HealOpts{Remove: true}, + if err = objLayer.HealObjects(ctx, bucket, "", madmin.HealOpts{Recursive: true, Remove: true}, func(bucket, object, vid string, scanMode madmin.HealScanMode) error { _, err := objLayer.HealObject(ctx, bucket, object, vid, madmin.HealOpts{ScanMode: scanMode, Remove: true}) return err @@ -829,7 +829,7 @@ func TestHealingDanglingObject(t *testing.T) { t.Fatalf("Expected versions 3, got %d", fileInfoPreHeal.NumVersions) } - if err = objLayer.HealObjects(ctx, bucket, "", madmin.HealOpts{Remove: true}, + if err = objLayer.HealObjects(ctx, bucket, "", madmin.HealOpts{Recursive: true, Remove: true}, func(bucket, object, vid string, scanMode madmin.HealScanMode) error { _, err := objLayer.HealObject(ctx, bucket, object, vid, madmin.HealOpts{ScanMode: scanMode, Remove: true}) return err diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index d99fe03224a25..6fa593aa11019 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -2478,6 +2478,7 @@ func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, re // HealObjectFn closure function heals the object. type HealObjectFn func(bucket, object, versionID string, scanMode madmin.HealScanMode) error +// List a prefix or a single object versions and heal func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix string, opts madmin.HealOpts, healObjectFn HealObjectFn) error { healEntry := func(bucket string, entry metaCacheEntry, scanMode madmin.HealScanMode) error { if entry.isDir() { @@ -2541,7 +2542,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str go func(idx int, set *erasureObjects) { defer wg.Done() - errs[idx] = set.listAndHeal(ctx, bucket, prefix, opts.ScanMode, healEntry) + errs[idx] = set.listAndHeal(ctx, bucket, prefix, opts.Recursive, opts.ScanMode, healEntry) }(idx, set) } wg.Wait() From eddbe6bca22621041346192a72c2e0b347d296a5 Mon Sep 17 00:00:00 2001 From: Anis Eleuch Date: Wed, 4 Dec 2024 00:12:25 +0100 Subject: [PATCH 2/2] heal: Report bucket healing result correctly (#20721) --- cmd/peer-rest-server.go | 9 +++++---- cmd/peer-s3-client.go | 25 +++++++++++++++---------- cmd/peer-s3-server.go | 4 ++-- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index e2700f2808353..d990abfff0a78 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -56,6 +56,7 @@ var ( aoBucketInfo = grid.NewArrayOf[*BucketInfo](func() *BucketInfo { return &BucketInfo{} }) aoMetricsGroup = grid.NewArrayOf[*MetricV2](func() *MetricV2 { return &MetricV2{} }) madminBgHealState = grid.NewJSONPool[madmin.BgHealState]() + madminHealResultItem = grid.NewJSONPool[madmin.HealResultItem]() madminCPUs = grid.NewJSONPool[madmin.CPUs]() madminMemInfo = grid.NewJSONPool[madmin.MemInfo]() madminNetInfo = grid.NewJSONPool[madmin.NetInfo]() @@ -97,7 +98,7 @@ var ( getSysErrorsRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.SysErrors]](grid.HandlerGetSysErrors, grid.NewMSS, madminSysErrors.NewJSON) getSysServicesRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.SysServices]](grid.HandlerGetSysServices, grid.NewMSS, madminSysServices.NewJSON) headBucketRPC = grid.NewSingleHandler[*grid.MSS, *VolInfo](grid.HandlerHeadBucket, grid.NewMSS, func() *VolInfo { return &VolInfo{} }) - healBucketRPC = grid.NewSingleHandler[*grid.MSS, grid.NoPayload](grid.HandlerHealBucket, grid.NewMSS, grid.NewNoPayload) + healBucketRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.HealResultItem]](grid.HandlerHealBucket, grid.NewMSS, madminHealResultItem.NewJSON) listBucketsRPC = grid.NewSingleHandler[*BucketOptions, *grid.Array[*BucketInfo]](grid.HandlerListBuckets, func() *BucketOptions { return &BucketOptions{} }, aoBucketInfo.New) loadBucketMetadataRPC = grid.NewSingleHandler[*grid.MSS, grid.NoPayload](grid.HandlerLoadBucketMetadata, grid.NewMSS, grid.NewNoPayload).IgnoreNilConn() loadGroupRPC = grid.NewSingleHandler[*grid.MSS, grid.NoPayload](grid.HandlerLoadGroup, grid.NewMSS, grid.NewNoPayload) @@ -1258,21 +1259,21 @@ func (s *peerRESTServer) NetSpeedTestHandler(w http.ResponseWriter, r *http.Requ peersLogIf(r.Context(), gob.NewEncoder(w).Encode(result)) } -func (s *peerRESTServer) HealBucketHandler(mss *grid.MSS) (np grid.NoPayload, nerr *grid.RemoteErr) { +func (s *peerRESTServer) HealBucketHandler(mss *grid.MSS) (np *grid.JSON[madmin.HealResultItem], nerr *grid.RemoteErr) { bucket := mss.Get(peerS3Bucket) if isMinioMetaBucket(bucket) { return np, grid.NewRemoteErr(errInvalidArgument) } bucketDeleted := mss.Get(peerS3BucketDeleted) == "true" - _, err := healBucketLocal(context.Background(), bucket, madmin.HealOpts{ + res, err := healBucketLocal(context.Background(), bucket, madmin.HealOpts{ Remove: bucketDeleted, }) if err != nil { return np, grid.NewRemoteErr(err) } - return np, nil + return madminHealResultItem.NewJSONWith(&res), nil } func (s *peerRESTServer) ListBucketsHandler(opts *BucketOptions) (*grid.Array[*BucketInfo], *grid.RemoteErr) { diff --git a/cmd/peer-s3-client.go b/cmd/peer-s3-client.go index a28dd1303ae49..ebfd872024bc7 100644 --- a/cmd/peer-s3-client.go +++ b/cmd/peer-s3-client.go @@ -178,13 +178,24 @@ func (sys *S3PeerSys) HealBucket(ctx context.Context, bucket string, opts madmin } } + if healBucketErr := reduceWriteQuorumErrs(ctx, errs, bucketOpIgnoredErrs, len(errs)/2+1); healBucketErr != nil { + return madmin.HealResultItem{}, toObjectErr(healBucketErr, bucket) + } + + res := madmin.HealResultItem{ + Type: madmin.HealItemBucket, + Bucket: bucket, + SetCount: -1, // explicitly set an invalid value -1, for bucket heal scenario + } + for i, err := range errs { if err == nil { - return healBucketResults[i], nil + res.Before.Drives = append(res.Before.Drives, healBucketResults[i].Before.Drives...) + res.After.Drives = append(res.After.Drives, healBucketResults[i].After.Drives...) } } - return madmin.HealResultItem{}, toObjectErr(errVolumeNotFound, bucket) + return res, nil } // ListBuckets lists buckets across all nodes and returns a consistent view: @@ -355,14 +366,8 @@ func (client *remotePeerS3Client) HealBucket(ctx context.Context, bucket string, ctx, cancel := context.WithTimeout(ctx, globalDriveConfig.GetMaxTimeout()) defer cancel() - _, err := healBucketRPC.Call(ctx, conn, mss) - - // Initialize heal result info - return madmin.HealResultItem{ - Type: madmin.HealItemBucket, - Bucket: bucket, - SetCount: -1, // explicitly set an invalid value -1, for bucket heal scenario - }, toStorageErr(err) + resp, err := healBucketRPC.Call(ctx, conn, mss) + return resp.ValueOrZero(), toStorageErr(err) } // GetBucketInfo returns bucket stat info from a peer diff --git a/cmd/peer-s3-server.go b/cmd/peer-s3-server.go index 03e1c297ad737..18a84e7c13899 100644 --- a/cmd/peer-s3-server.go +++ b/cmd/peer-s3-server.go @@ -101,7 +101,7 @@ func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) ( for i := range beforeState { res.Before.Drives = append(res.Before.Drives, madmin.HealDriveInfo{ UUID: "", - Endpoint: localDrives[i].String(), + Endpoint: localDrives[i].Endpoint().String(), State: beforeState[i], }) } @@ -149,7 +149,7 @@ func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) ( for i := range afterState { res.After.Drives = append(res.After.Drives, madmin.HealDriveInfo{ UUID: "", - Endpoint: localDrives[i].String(), + Endpoint: localDrives[i].Endpoint().String(), State: afterState[i], }) }