Skip to content

Commit b3accca

Browse files
Revert "fix(ingester): Keep read only when any number of compactions in progress" (grafana#12056)
Reverts grafana#11890, `cortex_ingester_tsdb_forced_compactions_in_progress` is included in mixin alerts. Will merge an updated PR with this metric re-added.
1 parent 47f7604 commit b3accca

File tree

5 files changed

+25
-51
lines changed

5 files changed

+25
-51
lines changed

CHANGELOG.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@
171171
* [BUGFIX] Ruler: Fix rare panic when the ruler is shutting down. #11781
172172
* [BUGFIX] Block-builder-scheduler: Fix data loss bug in job assignment. #11785
173173
* [BUGFIX] Compactor: start tracking `-compactor.max-compaction-time` after the initial compaction planning phase, to avoid rare cases where planning takes longer than `-compactor.max-compaction-time` and so actual compaction never runs for a tenant. #11834
174-
* [BUGFIX] Ingester: Fix issue where ingesters can exit read-only mode during idle compactions, resulting in write errors. #11890
175174

176175
### Mixin
177176

pkg/ingester/downscale.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ func (i *Ingester) PrepareInstanceRingDownscaleHandler(w http.ResponseWriter, r
5252
i.circuitBreaker.push.deactivate()
5353

5454
case http.MethodDelete:
55-
// Don't leave read-only mode if there is any compaction pending or in progress.
56-
if i.numCompactionsInProgress.Load() != 0 {
57-
msg := "cannot clear read-only mode while compaction is in progress"
55+
// Don't leave read-only mode if there's a forced compaction pending or in progress.
56+
if len(i.forceCompactTrigger) > 0 || i.forcedCompactionInProgress.Load() {
57+
msg := "cannot clear read-only mode while forced compaction is in progress"
5858
level.Warn(i.logger).Log("msg", msg)
5959
http.Error(w, msg, http.StatusConflict)
6060
return

pkg/ingester/downscale_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ func TestIngester_PrepareInstanceRingDownscaleHandler(t *testing.T) {
155155
require.Equal(t, http.StatusMethodNotAllowed, res.Code)
156156
})
157157

158-
t.Run("should return Conflict when any compaction is in progress", func(t *testing.T) {
158+
t.Run("should return Conflict when forced compaction is in progress", func(t *testing.T) {
159159
t.Parallel()
160160

161161
ingester, r := setup(true)
@@ -170,9 +170,7 @@ func TestIngester_PrepareInstanceRingDownscaleHandler(t *testing.T) {
170170
return inst.ReadOnly
171171
})
172172

173-
// Simulate a compation in progress.
174-
cleanup := ingester.prepareCompaction()
175-
defer cleanup()
173+
ingester.forcedCompactionInProgress.Store(true)
176174

177175
// Try to switch back to read-write while compaction is in progress
178176
res = httptest.NewRecorder()

pkg/ingester/ingester.go

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ type Ingester struct {
340340
seriesCount atomic.Int64
341341

342342
// Tracks if a forced compaction is in progress
343-
numCompactionsInProgress atomic.Uint32
343+
forcedCompactionInProgress atomic.Bool
344344

345345
// For storing metadata ingested.
346346
usersMetadataMtx sync.RWMutex
@@ -3252,10 +3252,6 @@ func (i *Ingester) compactionServiceRunning(ctx context.Context) error {
32523252
for ctx.Err() == nil {
32533253
select {
32543254
case <-tickerChan:
3255-
// Prepare compaction before the periodic head compaction is triggered.
3256-
cleanup := i.prepareCompaction()
3257-
defer cleanup()
3258-
32593255
// The forcedCompactionMaxTime has no meaning because force=false.
32603256
i.compactBlocks(ctx, false, 0, nil)
32613257

@@ -3273,12 +3269,6 @@ func (i *Ingester) compactionServiceRunning(ctx context.Context) error {
32733269
}
32743270

32753271
case req := <-i.forceCompactTrigger:
3276-
// Note:
3277-
// Prepare compaction is not done here but before the force compaction is triggered.
3278-
// This is because we want to track the number of compactions accurately before the
3279-
// downscale handler is called. This ensures that the ingester will never leave the
3280-
// read-only state. (See [Ingester.FlushHandler])
3281-
32823272
// Always pass math.MaxInt64 as forcedCompactionMaxTime because we want to compact the whole TSDB head.
32833273
i.compactBlocks(ctx, true, math.MaxInt64, req.users)
32843274
close(req.callback) // Notify back.
@@ -3311,28 +3301,19 @@ func (i *Ingester) compactionServiceInterval() (firstInterval, standardInterval
33113301
return
33123302
}
33133303

3314-
// prepareCompaction is incrementing the atomic counter of the number of compactions in progress.
3315-
// It also exposes a metric tracking the number of compactions in progress.
3316-
// It returns a callback that should be called when the compaction is finished, e.g. in defer statement.
3317-
// This callback is decrementing the atomic counter of the number of compactions in progress.
3318-
func (i *Ingester) prepareCompaction() func() {
3319-
// Increment the number of compactions in progress.
3320-
// This is used to ensure that the ingester will never leave the read-only state.
3321-
// (See [Ingester.PrepareInstanceRingDownscaleHandler])
3322-
i.numCompactionsInProgress.Inc()
3323-
3324-
// Expose a metric tracking whether there's a TSDB head compaction in progress.
3304+
// Compacts all compactable blocks. Force flag will force compaction even if head is not compactable yet.
3305+
func (i *Ingester) compactBlocks(ctx context.Context, force bool, forcedCompactionMaxTime int64, allowed *util.AllowList) {
3306+
// Expose a metric tracking whether there's a forced head compaction in progress.
33253307
// This metric can be used in alerts and when troubleshooting.
3326-
i.metrics.numCompactionsInProgress.Inc()
3327-
3328-
return func() {
3329-
i.numCompactionsInProgress.Dec()
3330-
i.metrics.numCompactionsInProgress.Dec()
3308+
if force {
3309+
i.metrics.forcedCompactionInProgress.Set(1)
3310+
i.forcedCompactionInProgress.Store(true)
3311+
defer func() {
3312+
i.metrics.forcedCompactionInProgress.Set(0)
3313+
i.forcedCompactionInProgress.Store(false)
3314+
}()
33313315
}
3332-
}
33333316

3334-
// Compacts all compactable blocks. Force flag will force compaction even if head is not compactable yet.
3335-
func (i *Ingester) compactBlocks(ctx context.Context, force bool, forcedCompactionMaxTime int64, allowed *util.AllowList) {
33363317
_ = concurrency.ForEachUser(ctx, i.getTSDBUsers(), i.cfg.BlocksStorageConfig.TSDB.HeadCompactionConcurrency, func(_ context.Context, userID string) error {
33373318
if !allowed.IsAllowed(userID) {
33383319
return nil
@@ -3660,10 +3641,6 @@ func (i *Ingester) FlushHandler(w http.ResponseWriter, r *http.Request) {
36603641
return
36613642
}
36623643

3663-
// Prepare compaction before the force compaction is triggered.
3664-
cleanup := i.prepareCompaction()
3665-
defer cleanup()
3666-
36673644
compactionCallbackCh := make(chan struct{})
36683645

36693646
level.Info(i.logger).Log("msg", "flushing TSDB blocks: triggering compaction")

pkg/ingester/metrics.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ type ingesterMetrics struct {
6565
maxLocalSeriesPerUser *prometheus.GaugeVec
6666

6767
// Head compactions metrics.
68-
compactionsTriggered prometheus.Counter
69-
compactionsFailed prometheus.Counter
70-
numCompactionsInProgress prometheus.Gauge
71-
appenderAddDuration prometheus.Histogram
72-
appenderCommitDuration prometheus.Histogram
73-
idleTsdbChecks *prometheus.CounterVec
68+
compactionsTriggered prometheus.Counter
69+
compactionsFailed prometheus.Counter
70+
forcedCompactionInProgress prometheus.Gauge
71+
appenderAddDuration prometheus.Histogram
72+
appenderCommitDuration prometheus.Histogram
73+
idleTsdbChecks *prometheus.CounterVec
7474

7575
// Open all existing TSDBs metrics
7676
openExistingTSDB prometheus.Counter
@@ -350,9 +350,9 @@ func newIngesterMetrics(
350350
Name: "cortex_ingester_tsdb_compactions_failed_total",
351351
Help: "Total number of compactions that failed.",
352352
}),
353-
numCompactionsInProgress: promauto.With(r).NewGauge(prometheus.GaugeOpts{
354-
Name: "cortex_ingester_tsdb_num_compactions_in_progress",
355-
Help: "Reports the number of TSDB head compactions in progress.",
353+
forcedCompactionInProgress: promauto.With(r).NewGauge(prometheus.GaugeOpts{
354+
Name: "cortex_ingester_tsdb_forced_compactions_in_progress",
355+
Help: "Reports 1 if there's a forced TSDB head compaction in progress, 0 otherwise.",
356356
}),
357357

358358
appenderAddDuration: promauto.With(r).NewHistogram(prometheus.HistogramOpts{

0 commit comments

Comments
 (0)