-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataloader.go
281 lines (246 loc) · 7.47 KB
/
dataloader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
package dataloader
import (
"context"
"fmt"
"os"
"runtime"
"sync"
"time"
"github.com/hashicorp/golang-lru/v2/expirable"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
)
// Interface defines a public API for loading data from a particular data source
type Interface[K comparable, V any] interface {
// Load loads a single key
Load(context.Context, K) Result[V]
// LoadMany loads multiple keys
LoadMany(context.Context, []K) []Result[V]
// LoadMap loads multiple keys and returns a map of results
LoadMap(context.Context, []K) map[K]Result[V]
// Clear removes an item from the cache
Clear(K) Interface[K, V]
// ClearAll clears the entire cache
ClearAll() Interface[K, V]
// Prime primes the cache with a key and value
Prime(ctx context.Context, key K, value V) Interface[K, V]
}
// Loader is the function type for loading data
type Loader[K comparable, V any] func(context.Context, []K) []Result[V]
// config holds the configuration for DataLoader
type config struct {
// BatchSize is the number of keys to batch together, Default is 100
BatchSize int
// Wait is the duration to wait before processing a batch, Default is 16ms
Wait time.Duration
// CacheSize is the size of the cache, Default is 1024
CacheSize int
// CacheExpire is the duration to expire cache items, Default is 1 minute
CacheExpire time.Duration
// TracerProvider is the tracer provider to use for tracing
TracerProvider trace.TracerProvider
}
// dataLoader is the main struct for the dataloader
type dataLoader[K comparable, V any] struct {
loader Loader[K, V]
cache *expirable.LRU[K, V]
config config
mu sync.Mutex
batch []K
batchCtx map[context.Context]struct{}
chs map[K][]chan Result[V]
stopSchedule chan struct{}
}
// New creates a new DataLoader with the given loader function and options
func New[K comparable, V any](loader Loader[K, V], options ...Option) Interface[K, V] {
config := config{
BatchSize: 100,
Wait: 16 * time.Millisecond,
CacheSize: 1024,
CacheExpire: time.Minute,
}
for _, option := range options {
option(&config)
}
dl := &dataLoader[K, V]{
loader: loader,
config: config,
stopSchedule: make(chan struct{}),
}
dl.reset()
// Create a cache if the cache size is greater than 0
if config.CacheSize > 0 {
dl.cache = expirable.NewLRU[K, V](config.CacheSize, nil, config.CacheExpire)
}
return dl
}
// Load loads a single key
func (d *dataLoader[K, V]) Load(ctx context.Context, key K) Result[V] {
ctx, span := d.startTrace(ctx, "dataLoader.Load")
defer span.End()
return <-d.goLoad(ctx, key)
}
// LoadMany loads multiple keys
func (d *dataLoader[K, V]) LoadMany(ctx context.Context, keys []K) []Result[V] {
ctx, span := d.startTrace(ctx, "dataLoader.LoadMany")
defer span.End()
chs := make([]<-chan Result[V], len(keys))
for i, key := range keys {
chs[i] = d.goLoad(ctx, key)
}
results := make([]Result[V], len(keys))
for i, ch := range chs {
results[i] = <-ch
}
return results
}
// LoadMap loads multiple keys and returns a map of results
func (d *dataLoader[K, V]) LoadMap(ctx context.Context, keys []K) map[K]Result[V] {
ctx, span := d.startTrace(ctx, "dataLoader.LoadMap")
defer span.End()
chs := make([]<-chan Result[V], len(keys))
for i, key := range keys {
chs[i] = d.goLoad(ctx, key)
}
results := make(map[K]Result[V], len(keys))
for i, ch := range chs {
results[keys[i]] = <-ch
}
return results
}
// Clear removes an item from the cache
func (d *dataLoader[K, V]) Clear(key K) Interface[K, V] {
if d.cache != nil {
d.cache.Remove(key)
}
return d
}
// ClearAll clears the entire cache
func (d *dataLoader[K, V]) ClearAll() Interface[K, V] {
if d.cache != nil {
d.cache.Purge()
}
return d
}
// Prime primes the cache with a key and value
func (d *dataLoader[K, V]) Prime(ctx context.Context, key K, value V) Interface[K, V] {
if d.cache != nil {
if _, ok := d.cache.Get(key); ok {
d.cache.Add(key, value)
}
}
return d
}
// goLoad loads a single key asynchronously
func (d *dataLoader[K, V]) goLoad(ctx context.Context, key K) <-chan Result[V] {
ch := make(chan Result[V], 1)
// Check if the key is in the cache
if d.cache != nil {
if v, ok := d.cache.Get(key); ok {
ch <- Result[V]{data: v}
close(ch)
return ch
}
}
// Lock the DataLoader
d.mu.Lock()
if d.config.TracerProvider != nil {
if _, ok := d.batchCtx[ctx]; !ok {
d.batchCtx[ctx] = struct{}{}
}
}
if len(d.batch) == 0 {
// If there are no keys in the current batch, schedule a new batch timer
d.stopSchedule = make(chan struct{})
go d.scheduleBatch(ctx, d.stopSchedule)
} else {
// Check if the key is in flight
if chs, ok := d.chs[key]; ok {
d.chs[key] = append(chs, ch)
d.mu.Unlock()
return ch
}
}
// Add the key and channel to the current batch
d.batch = append(d.batch, key)
d.chs[key] = []chan Result[V]{ch}
// If the current batch is full, start processing it
if len(d.batch) >= d.config.BatchSize {
// spawn a new goroutine to process the batch
go d.processBatch(ctx, d.batch, d.batchCtx, d.chs)
close(d.stopSchedule)
d.reset()
}
// Unlock the DataLoader
d.mu.Unlock()
return ch
}
// scheduleBatch schedules a batch to be processed
func (d *dataLoader[K, V]) scheduleBatch(ctx context.Context, stopSchedule <-chan struct{}) {
select {
case <-time.After(d.config.Wait):
d.mu.Lock()
if len(d.batch) > 0 {
go d.processBatch(ctx, d.batch, d.batchCtx, d.chs)
d.reset()
}
d.mu.Unlock()
case <-stopSchedule:
return
}
}
// processBatch processes a batch of keys
func (d *dataLoader[K, V]) processBatch(ctx context.Context, keys []K, batchCtx map[context.Context]struct{}, chs map[K][]chan Result[V]) {
defer func() {
if r := recover(); r != nil {
buf := make([]byte, 64<<10)
buf = buf[:runtime.Stack(buf, false)]
err := fmt.Errorf("dataloader: panic received in loader function: %v", r)
fmt.Fprintf(os.Stderr, "%v\n%s", err, buf)
for _, chs := range chs {
d.sendResult(chs, Result[V]{err: err})
}
}
}()
if d.config.TracerProvider != nil {
// Create a span with links to the batch contexts, which enables trace propagation
// We should deduplicate identical batch contexts to avoid creating duplicate links.
links := make([]trace.Link, 0, len(keys))
for bCtx := range batchCtx {
links = append(links, trace.Link{SpanContext: trace.SpanContextFromContext(bCtx)})
}
var span trace.Span
ctx, span = d.startTrace(ctx, "dataLoader.Batch", trace.WithLinks(links...))
defer span.End()
}
results := d.loader(ctx, keys)
for i, key := range keys {
if results[i].err == nil && d.cache != nil {
d.cache.Add(key, results[i].data)
}
d.sendResult(chs[key], results[i])
}
}
// reset resets the DataLoader state
func (d *dataLoader[K, V]) reset() {
d.batch = make([]K, 0, d.config.BatchSize)
d.batchCtx = make(map[context.Context]struct{}, d.config.BatchSize)
d.chs = make(map[K][]chan Result[V], d.config.BatchSize)
}
// sendResult sends a result to channels
func (d *dataLoader[K, V]) sendResult(chs []chan Result[V], result Result[V]) {
for _, ch := range chs {
ch <- result
close(ch)
}
}
var noopSpan = noop.Span{}
// startTrace starts a trace span
func (d *dataLoader[K, V]) startTrace(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
if d.config.TracerProvider != nil {
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
return d.config.TracerProvider.Tracer("dataLoader").Start(ctx, name, opts...)
}
}
return ctx, noopSpan
}