Skip to content

Commit

Permalink
Add variable sized data blob erasure coding.
Browse files Browse the repository at this point in the history
  • Loading branch information
greywolve committed Dec 30, 2024
1 parent f6ef8e1 commit b7e00cb
Show file tree
Hide file tree
Showing 5 changed files with 366 additions and 20 deletions.
1 change: 1 addition & 0 deletions internal/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const (
MaxHistoricalTimeslotsForPreimageMeta = 3 // () Maximum number of historical timeslots for preimage metadata
SizeOfSegment = 4104 // WG = WP*WE = 4104: The size of a segment in octets.
MaxWorkPackageSize = 12 * 1 << 20 // WB = 12 MB: The maximum size of an encoded work-package in octets (including extrinsic data and import implications).
ErasureCodingChunkSize = 684 // WE = 684: The basic size of erasure-coded pieces in octets.
MaxAllocatedGasAccumulation = 100_000 // GA = 100,000: The gas allocated to invoke a work-report’s Accumulation logic.
WorkReportMaxSumOfDependencies = 8 // (J) The maximum sum of dependency items in a work-report.
)
3 changes: 2 additions & 1 deletion internal/common/constants_integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ const (
MaxTicketExtrinsicSize = 16
MaxHistoricalTimeslotsForPreimageMeta = 3
SizeOfSegment = 4104
MaxWorkPackageSize = 12 * 1 << 20 // 12 MB: The maximum size of an encoded work-package in octets (including extrinsic data and import implications).
MaxWorkPackageSize = 12 * 1 << 20
ErasureCodingChunkSize = 684
MaxAllocatedGasAccumulation = 100_000
WorkReportMaxSumOfDependencies = 8
)
167 changes: 167 additions & 0 deletions internal/erasurecoding/erasurecoding.go
Original file line number Diff line number Diff line change
@@ -1 +1,168 @@
package erasurecoding

import (
"bytes"
"errors"

"github.com/eigerco/strawberry/internal/common"
"github.com/eigerco/strawberry/internal/erasurecoding/reedsolomon"
)

// These parameters allow data to be retrieved even when only 1/3 of the
// validators are available.
const (
OriginalShards = 342 // Number of original data shards.
RecoveryShards = common.NumberOfValidators - OriginalShards // Number of recovery data shards.
ChunkShardSize = common.ErasureCodingChunkSize / OriginalShards // In bytes.
)

// Encode transforms input data into striped erasure-coded shards using
// Reed-Solomon encoding. It splits the data into chunks of ChunkSize bytes,
// then generates OriginalShards + RecoveryShards total shards. If data cannot
// be evenly split it will be padded with zeros. The resulting shards can
// tolerate the loss of up to RecoveryShards number of shards while still being
// able to reconstruct the original data.
// Implements equation H.6 from the graypaper (v0.5.3).
func Encode(data []byte) ([][]byte, error) {
if len(data) == 0 {
return nil, errors.New("data has length 0")
}
if len(data) > common.MaxWorkPackageSize {
return nil, errors.New("data length too long")
}

encoder, err := reedsolomon.New(OriginalShards, RecoveryShards)
if err != nil {
return nil, errors.New("error creating encoder")
}

chunks := unzip(common.ErasureCodingChunkSize, data)
if len(chunks) == 0 {
return nil, errors.New("couldn't unzip data")
}

finalShards := make([][]byte, OriginalShards+RecoveryShards)
for i := range finalShards {
finalShards[i] = make([]byte, len(chunks)*ChunkShardSize)
}
for i, c := range chunks {
shards, err := encoder.Chunk(c)
if err != nil {
return nil, err
}
err = encoder.Encode(shards)
if err != nil {
return nil, err
}

// Transpose and copy to result in one go.
for j := range finalShards {
start := i * ChunkShardSize
copy(finalShards[j][start:start+ChunkShardSize], shards[j])
}
}

return finalShards, nil
}

// Decode reconstructs the original data from striped erasure-coded shards using
// Reed-Solomon decoding. It requires at least OriginalShards number of shards
// to successfully recover the data. Missing or corrupted shards can be passed
// as nil or empty byte slices. The outSize parameter specifies the expected
// length of the original data.
// Implements equation H.7 from the graypaper (v0.5.3).
func Decode(shards [][]byte, outSize int) ([]byte, error) {
if reedsolomon.ShardCount(shards) < OriginalShards {
return nil, errors.New("too few shards")
}

shardSize := reedsolomon.ShardSize(shards)
if shardSize%ChunkShardSize != 0 {
return nil, errors.New("invalid shard size")
}

if outSize > shardSize*OriginalShards {
return nil, errors.New("out size too long")
}

encoder, err := reedsolomon.New(OriginalShards, RecoveryShards)
if err != nil {
return nil, errors.New("error creating encoder")
}

noChunks := shardSize / ChunkShardSize
chunkShards := make([][][]byte, noChunks)
for i := 0; i < len(chunkShards); i++ {
chunkShards[i] = make([][]byte, len(shards))
}

for i, s := range shards {
if len(s) == 0 {
continue
}
for j := range chunkShards {
start := j * ChunkShardSize
chunkShards[j][i] = s[start : start+ChunkShardSize]
}
}

unzippedChunks := make([][]byte, noChunks)
for i, c := range chunkShards {
err := encoder.Decode(c)
if err != nil {
return nil, err
}
unzippedChunks[i] = bytes.Join(c[0:OriginalShards], []byte(""))
}

return lace(unzippedChunks, outSize), nil
}

// unzip stripes data into n-sized chunks by distributing bytes column-wise.
// For input data [1,2,3,4,5,6] and n=2, it produces chunks: [[1,3,5], [2,4,6]]
// If the input data length is not perfectly divisible by n, the final chunk
// will be padded with zeros. For example, data [1,2,3,4,5] and n=2 produces:
// [[1,3,5], [2,4,0]] This transformation prepares data for erasure coding by
// ensuring even distribution across shards. Returns nil if n <= 0.
// Implements equation H.4 from the graypaper (v0.5.3).
func unzip(n int, data []byte) [][]byte {

if n <= 0 {
return nil
}

k := (len(data) + n - 1) / n

result := make([][]byte, k)

for i := range result {
result[i] = make([]byte, n)
}

for i, b := range data {
result[i%k][i/k] = b
}

return result
}

// lace combines multiple chunks back into a single byte slice by reading
// column-wise. It performs the inverse operation of unzip. For input chunks
// [[1,3,5], [2,4,6]] and outSize=6, it produces: [1,2,3,4,5,6]. The outSize
// parameter determines the length of the final output slice which should match
// the length of the original data (this removes any padding that was added by
// zip).
// Implements equation H.5 from the graypaper (v0.5.3).
func lace(chunks [][]byte, outSize int) []byte {
if len(chunks) == 0 {
return nil
}

k := len(chunks)
result := make([]byte, outSize)
for i := range result {
result[i] = chunks[i%k][i/k]
}

return result
}
180 changes: 180 additions & 0 deletions internal/erasurecoding/erasurecoding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package erasurecoding

import (
"encoding/json"
"fmt"
"os"
"testing"
"time"

"github.com/eigerco/strawberry/internal/common"
"github.com/eigerco/strawberry/internal/testutils"
"github.com/stretchr/testify/require"
"golang.org/x/exp/rand"
)

func TestUnzipLace(t *testing.T) {
testCases := []struct {
name string
data []byte
n int
want []byte
}{
{
name: "perfectlyDivsible",
data: []byte{1, 2, 3, 4, 5, 6},
n: 2,
want: []byte{1, 2, 3, 4, 5, 6},
},
{
name: "withPadding",
data: []byte{1, 2, 3, 4, 5},
n: 2,
want: []byte{1, 2, 3, 4, 5},
},
{
name: "singles",
data: []byte{1, 2, 3},
n: 3,
want: []byte{1, 2, 3},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
chunks := unzip(tc.n, tc.data)
data := lace(chunks, len(tc.data))

require.Equal(t, data, tc.want)
})
}
}

func TestEncodeDecodeRoundTripRandom(t *testing.T) {
dataSizes := []int{
1,
684,
1368,
2052,
4096,
4104,
}

seed := uint64(time.Now().UnixNano())
t.Logf("using random seed: %d", seed)
rng := rand.New(rand.NewSource(seed))

for _, size := range dataSizes {
t.Run(fmt.Sprintf("size%d", size), func(t *testing.T) {
// Generate some random data.
data := make([]byte, size)
_, err := rng.Read(data)
require.NoError(t, err)

// Encode the data
shards, err := Encode(data)
require.NoError(t, err)
require.Equal(t, len(shards), OriginalShards+RecoveryShards)

// Randomly remove RecoveryShards shards.
indices := rng.Perm(RecoveryShards)
for i := range indices {
shards[i] = nil
}

// Decode the data with the remaining shards.
dataOut, err := Decode(shards, size)
if err != nil {
t.Fatalf("Decode failed: %v", err)
}

require.Equal(t, data, dataOut)
})
}
}

func TestEncodeDecodeInitialTestVector(t *testing.T) {
jsonData, err := os.ReadFile("reedsolomon/vectors/initial.json")
require.NoError(t, err)

tv := TestVector{}
err = json.Unmarshal(jsonData, &tv)
require.NoError(t, err)
data := testutils.MustFromHex(t, tv.Data)

result, err := Encode(data)
require.NoError(t, err)

decodedData, err := Decode(result, len(data))
require.NoError(t, err)

require.Equal(t, decodedData, data)
}
func TestEncodeFailureCases(t *testing.T) {
tests := []struct {
name string
input []byte
}{
{
name: "emptyData",
input: []byte{},
},
{
name: "dataTooLarge",
input: make([]byte, common.MaxWorkPackageSize+1),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := Encode(tt.input)
require.Error(t, err)
})
}
}

func TestDecodeFailureCases(t *testing.T) {
tests := []struct {
name string
shards [][]byte
outSize int
}{
{
name: "tooFewShards",
shards: testShards(OriginalShards-1, ChunkShardSize),
},
{
name: "invalidShardSize",
shards: testShards(OriginalShards+RecoveryShards, ChunkShardSize+1),
},
{
name: "nilShards",
shards: nil,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := Decode(tt.shards, 100)
require.Error(t, err)
})
}
}

// Helper function to create test shards with specific size.
func testShards(count, size int) [][]byte {
shards := make([][]byte, count)
for i := range shards {
shards[i] = make([]byte, size)
}
return shards
}

type TestVector struct {
Data string `json:"data"`
Segment struct {
Segments []struct {
SegmentEC []string `json:"segment_ec"`
} `json:"segments"`
} `json:"segment"`
}
Loading

0 comments on commit b7e00cb

Please sign in to comment.