diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a9d4521a171..7ad9c69dc968 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 2.47.1 / 2023-10-04 + +* [BUGFIX] Fix duplicate sample detection at chunk size limit #12874 + ## 2.47.0 / 2023-09-06 This release adds an experimental OpenTelemetry (OTLP) Ingestion feature, diff --git a/VERSION b/VERSION index bb13a7e35459..24c6ede7ad05 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.47.0 +2.47.1 diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 9016943756a4..d1f4d3035e11 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -1282,9 +1282,6 @@ func (s *memSeries) appendPreprocessor(t int64, e chunkenc.Encoding, o chunkOpts // There is no head chunk in this series yet, create the first chunk for the sample. c = s.cutNewHeadChunk(t, e, o.chunkRange) chunkCreated = true - } else if len(c.chunk.Bytes()) > maxBytesPerXORChunk { - c = s.cutNewHeadChunk(t, e, o.chunkRange) - chunkCreated = true } // Out of order sample. @@ -1292,6 +1289,12 @@ func (s *memSeries) appendPreprocessor(t int64, e chunkenc.Encoding, o chunkOpts return c, false, chunkCreated } + // Check the chunk size, unless we just created it and if the chunk is too large, cut a new one. + if !chunkCreated && len(c.chunk.Bytes()) > maxBytesPerXORChunk { + c = s.cutNewHeadChunk(t, e, o.chunkRange) + chunkCreated = true + } + if c.chunk.Encoding() != e { // The chunk encoding expected by this append is different than the head chunk's // encoding. So we cut a new chunk with the expected encoding. diff --git a/tsdb/head_test.go b/tsdb/head_test.go index b501f5f3f3be..cd9ab651088a 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -5399,3 +5399,49 @@ func TestCuttingNewHeadChunks(t *testing.T) { }) } } + +// TestHeadDetectsDuplcateSampleAtSizeLimit tests a regression where a duplicate sample +// is appended to the head, right when the head chunk is at the size limit. +// The test adds all samples as duplicate, thus expecting that the result has +// exactly half of the samples. +func TestHeadDetectsDuplicateSampleAtSizeLimit(t *testing.T) { + numSamples := 1000 + baseTS := int64(1695209650) + + h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) + defer func() { + require.NoError(t, h.Close()) + }() + + a := h.Appender(context.Background()) + var err error + vals := []float64{math.MaxFloat64, 0x00} // Use the worst case scenario for the XOR encoding. Otherwise we hit the sample limit before the size limit. + for i := 0; i < numSamples; i++ { + ts := baseTS + int64(i/2)*10000 + a.Append(0, labels.FromStrings("foo", "bar"), ts, vals[(i/2)%len(vals)]) + err = a.Commit() + require.NoError(t, err) + a = h.Appender(context.Background()) + } + + indexReader, err := h.Index() + require.NoError(t, err) + + var ( + chunks []chunks.Meta + builder labels.ScratchBuilder + ) + require.NoError(t, indexReader.Series(1, &builder, &chunks)) + + chunkReader, err := h.Chunks() + require.NoError(t, err) + + storedSampleCount := 0 + for _, chunkMeta := range chunks { + chunk, err := chunkReader.Chunk(chunkMeta) + require.NoError(t, err) + storedSampleCount += chunk.NumSamples() + } + + require.Equal(t, numSamples/2, storedSampleCount) +}