forked from asg017/sqlite-vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reference.yaml
396 lines (366 loc) · 13.6 KB
/
reference.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
sections:
constructors:
title: Constructors
desc: |
SQL functions that "construct" vectors with different element types.
Currently, only `float32`, `int8`, and `bit` vectors are supported.
op:
title: Operations
desc: |
Different operations and utilities for working with vectors.
distance:
title: Distance functions
desc: Various algorithms to calculate distance between two vectors.
quantization:
title: Quantization
desc: Various techniques to "compress" a vector by reducing precision and accuracy.
numpy:
title: "NumPy Utilities"
desc: Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
meta:
title: Meta
desc: Helper functions to debug `sqlite-vec` installations.
entrypoints:
title: Entrypoints
desc: All the named entrypoints that load in different `sqlite-vec` functions and options.
# vec0:
# title: "vec0 Virtual Table"
# desc: TODO
meta:
vec_version:
params: []
desc: Returns a version string of the current `sqlite-vec` installation.
example: select vec_version();
vec_debug:
params: []
desc: Returns debugging information of the current `sqlite-vec` installation.
example: select vec_debug();
constructors:
vec_f32:
params: [vector]
desc: |
Creates a float vector from a BLOB or JSON text. If a BLOB is provided,
the length must be divisible by 4, as a float takes up 4 bytes of space each.
The returned value is a BLOB with 4 bytes per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
of `223`.
example:
- select vec_f32('[.1, .2, .3, 4]');
- select subtype(vec_f32('[.1, .2, .3, 4]'));
- select vec_f32(X'AABBCCDD');
- select vec_to_json(vec_f32(X'AABBCCDD'));
- select vec_f32(X'AA');
vec_int8:
params: [vector]
desc: |
Creates a 8-bit integer vector from a BLOB or JSON text. If a BLOB is provided,
the length must be divisible by 4, as a float takes up 4 bytes of space each.
If JSON text is provided, each element must be an integer between -128 and 127 inclusive.
The returned value is a BLOB with 1 byte per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
of `225`.
example:
- select vec_int8('[1, 2, 3, 4]');
- select subtype(vec_int8('[1, 2, 3, 4]'));
- select vec_int8(X'AABBCCDD');
- select vec_to_json(vec_int8(X'AABBCCDD'));
- select vec_int8('[999]');
vec_bit:
params: [vector]
desc: |
Creates a binary vector from a BLOB.
The returned value is a BLOB with 1 byte per 8 elements, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
of `224`.
example:
- select vec_bit(X'F0');
- select subtype(vec_bit(X'F0'));
- select vec_to_json(vec_bit(X'F0'));
op:
vec_length:
params: [vector]
desc: |
Returns the number of elements in the given vector.
The vector can be `JSON`, `BLOB`, or the result of a [constructor function](#constructors).
This function will return an error if `vector` is invalid.
example:
- select vec_length('[.1, .2]');
- select vec_length(X'AABBCCDD');
- select vec_length(vec_int8(X'AABBCCDD'));
- select vec_length(vec_bit(X'AABBCCDD'));
- select vec_length(X'CCDD');
vec_type:
params: [vector]
desc: |
Returns the name of the type of `vector` as text. One of `'float32'`, `'int8'`, or `'bit'`.
This function will return an error if `vector` is invalid.
example:
- select vec_type('[.1, .2]');
- select vec_type(X'AABBCCDD');
- select vec_type(vec_int8(X'AABBCCDD'));
- select vec_type(vec_bit(X'AABBCCDD'));
- select vec_type(X'CCDD');
vec_add:
params: [a, b]
desc: |
Adds every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
must be of the same type and same length. Only `float32` and `int8` vectors are supported.
An error is raised if either `a` or `b` are invalid, or if they are not the same type or same length.
See also [`vec_sub()`](#vec_sub).
example:
- |
select vec_add(
'[.1, .2, .3]',
'[.4, .5, .6]'
);
- |
select vec_to_json(
vec_add(
'[.1, .2, .3]',
'[.4, .5, .6]'
)
);
- |
select vec_to_json(
vec_add(
vec_int8('[1, 2, 3]'),
vec_int8('[4, 5, 6]')
)
);
- select vec_add('[.1]', vec_int8('[1]'));
- select vec_add(vec_bit(X'AA'), vec_bit(X'BB'));
vec_sub:
params: [a, b]
desc: |
Subtracts every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
must be of the same type and same length. Only `float32` and `int8` vectors are supported.
An error is raised if either `a` or `b` are invalid, or if they are not the same type or same length.
See also [`vec_add()`](#vec_add).
example:
- |
select vec_sub(
'[.1, .2, .3]',
'[.4, .5, .6]'
);
- |
select vec_to_json(
vec_sub(
'[.1, .2, .3]',
'[.4, .5, .6]'
)
);
- |
select vec_to_json(
vec_sub(
vec_int8('[1, 2, 3]'),
vec_int8('[4, 5, 6]')
)
);
- select vec_sub('[.1]', vec_int8('[1]'));
- select vec_sub(vec_bit(X'AA'), vec_bit(X'BB'));
vec_normalize:
params: [vector]
desc: |
Performs L2 normalization on the given vector. Only float32 vectors are currently supported.
Returns an error if the input is an invalid vector or not a float32 vector.
example:
- select vec_normalize('[2, 3, 1, -4]');
- |
select vec_to_json(
vec_normalize('[2, 3, 1, -4]')
);
- |
-- for matryoshka embeddings - slice then normalize
select vec_to_json(
vec_normalize(
vec_slice('[2, 3, 1, -4]', 0, 2)
)
);
vec_slice:
params: [vector, start, end]
desc: |
Extract a subset of `vector` from the `start` element (inclusive) to the `end` element (exclusive). TODO check
This is especially useful for [Matryoshka embeddings](#TODO), also known as "adaptive length" embeddings.
Use with [`vec_normalize()`](#vec_normalize) to get proper results.
Returns an error in the following conditions:
- If `vector` is not a valid vector
- If `start` is less than zero or greater than or equal to `end`
- If `end` is greater than the length of `vector`, or less than or equal to `start`.
- If `vector` is a bitvector, `start` and `end` must be divisible by 8.
example:
- select vec_slice('[1, 2,3, 4]', 0, 2);
- |
select vec_to_json(
vec_slice('[1, 2,3, 4]', 0, 2)
);
- |
select vec_to_json(
vec_slice('[1, 2,3, 4]', 2, 4)
);
- |
select vec_to_json(
vec_slice('[1, 2,3, 4]', -1, 4)
);
- |
select vec_to_json(
vec_slice('[1, 2,3, 4]', 0, 5)
);
- |
select vec_to_json(
vec_slice('[1, 2,3, 4]', 0, 0)
);
vec_to_json:
params: [vector]
desc: |
Represents a vector as JSON text. The input vector can be a vector BLOB or JSON text.
Returns an error if `vector` is an invalid vector, or when memory cannot be allocated.
example:
- select vec_to_json(X'AABBCCDD');
- select vec_to_json(vec_int8(X'AABBCCDD'));
- select vec_to_json(vec_bit(X'AABBCCDD'));
- select vec_to_json('[1,2,3,4]');
- select vec_to_json('invalid');
vec_each:
params: [vector]
desc: |
A table function to iterate through every element in a vector. One row id returned per element in a vector.
```sql
CREATE TABLE vec_each(
rowid int, -- The
vector HIDDEN -- input parameter: A well-formed vector value
)
```
Returns an error if `vector` is not a valid vector.
example:
- select rowid, value from vec_each('[1,2,3,4]');
- select rowid, value from vec_each(X'AABBCCDD00112233');
- select rowid, value from vec_each(vec_int8(X'AABBCCDD'));
- select rowid, value from vec_each(vec_bit(X'F0'));
distance:
vec_distance_L2:
params: [a, b]
desc: |
Calculates the L2 euclidian distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
Returns an error under the following conditions:
- `a` or `b` are invalid vectors
- `a` or `b` do not share the same vector element types (ex float32 or int8)
- `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
- `a` or `b` do not have the same length.
example:
- select vec_distance_L2('[1, 1]', '[2, 2]');
- select vec_distance_L2('[1, 1]', '[-2, -2]');
- select vec_distance_L2('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
- select vec_distance_L2(X'AABBCCDD', X'00112233');
- select vec_distance_L2('[1, 1]', vec_int8('[2, 2]'));
- select vec_distance_L2(vec_bit(X'AA'), vec_bit(X'BB'));
vec_distance_cosine:
params: [a, b]
desc: |
Calculates the cosine distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
Returns an error under the following conditions:
- `a` or `b` are invalid vectors
- `a` or `b` do not share the same vector element types (ex float32 or int8)
- `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
- `a` or `b` do not have the same length.
example:
- select vec_distance_cosine('[1, 1]', '[2, 2]');
- select vec_distance_cosine('[1, 1]', '[-2, -2]');
- select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
- select vec_distance_cosine(X'AABBCCDD', X'00112233');
- select vec_distance_cosine('[1, 1]', vec_int8('[2, 2]'));
- select vec_distance_cosine(vec_bit(X'AA'), vec_bit(X'BB'));
vec_distance_hamming:
params: [a, b]
desc: |
Calculates the hamming distance between two bitvectors `a` and `b`. Only valid for bitvectors.
Returns an error under the following conditions:
- `a` or `b` are not bitvectors
- `a` and `b` do not share the same length
- Memory cannot be allocated
example:
- select vec_distance_hamming(vec_bit(X'00'), vec_bit(X'FF'));
- select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
- select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
- select vec_distance_hamming('[1, 1]', '[0, 0]');
quantization:
vec_quantize_binary:
params: [vector]
desc: |
Quantize a float32 or int8 vector into a bitvector.
For every element in the vector, a `1` is assigned to positive numbers and a `0` is assigned to negative numbers.
These values are then packed into a bit vector.
Returns an error if `vector` is invalid, or if `vector` is not a float32 or int8 vector.
example:
- select vec_quantize_binary('[1, 2, 3, 4, 5, 6, 7, 8]');
- select vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
- select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
- select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
- select vec_quantize_binary(vec_int8(X'11223344'));
- select vec_quantize_binary(vec_bit(X'FF'));
vec_quantize_i8:
params: [vector, "[start]", "[end]"]
desc: x
example: select 'todo';
numpy:
vec_npy_each:
params: [npy_array]
desc: |
xxx
example:
- |
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
select
rowid,
vector,
vec_type(vector),
vec_to_json(vector)
from vec_npy_each(
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
)
- |
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
select
rowid,
vector,
vec_type(vector),
vec_to_json(vector)
from vec_npy_each(
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
)
vec0:
vec0:
params: []
desc: TODO
example:
- |
create virtual table vec_items using vec0(
contents_embedding float[4]
);
- |
insert into vec_items(rowid, contents_embedding)
values (1, '[1, 1, 1, 1]'),
(2, '[2, 2, 2, 2]'),
(3, '[3, 3, 3, 3]');
entrypoints:
{}
#sqlite3_vec_init:
# desc: |
# asdf
#sqlite3_vec_fs_read_init:
# desc: |
# asdf
#table_functions:
# vec_each:
# columns: [rowid, value]
# inputs: ["vector"]
# desc:
# example:
#virtual_tables:
# vec0:
# desc:
# example:
#entrypoints:
# sqlite3_vec_init: {}
# sqlite3_vec_fs_read_init: {}
#compile_options:
# - SQLITE_VEC_ENABLE_AVX
# - SQLITE_VEC_ENABLE_NEON
# - SQLITE_VEC_OMIT_FS
#