Skip to content

Commit

Permalink
Add support for reading from Uint32 List into Roaring Bitmap
Browse files Browse the repository at this point in the history
  • Loading branch information
jsjant committed Dec 27, 2024
1 parent fa346a5 commit b333ddc
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 1 deletion.
53 changes: 53 additions & 0 deletions ydb/library/yql/udfs/common/roaring/roaring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <contrib/libs/croaring/include/roaring/memory.h>
#include <contrib/libs/croaring/include/roaring/roaring.h>

#include <util/generic/array_ref.h>
#include <util/generic/vector.h>
#include <util/string/builder.h>
#include <util/system/yassert.h>
Expand All @@ -30,6 +31,11 @@ namespace {
{
}

TRoaringWrapper(roaring_bitmap_t* bitmap)
: Roaring(bitmap)
{
}

~TRoaringWrapper() {
roaring_bitmap_free(Roaring);
}
Expand Down Expand Up @@ -223,6 +229,46 @@ namespace {
TSourcePosition Pos_;
};

class TRoaringFromUint32List: public TBoxedValue {
public:
TRoaringFromUint32List(TSourcePosition pos)
: Pos_(pos)
{
}

static TStringRef Name() {
return TStringRef::Of("FromUint32List");
}

private:
TUnboxedValue Run(const IValueBuilder* valueBuilder,
const TUnboxedValuePod* args) const override {
Y_UNUSED(valueBuilder);
try {
auto *b = roaring_bitmap_create();

const auto vector = args[0];
const auto* elements = vector.GetElements();
if (elements) {
for (auto& value : TArrayRef<ui32>{elements, vector.GetListLength()}) {
roaring_bitmap_add(b, value.Get<ui32>());
}
} else {
TUnboxedValue value;
const auto it = vector.GetListIterator();
while (it.Next(value)) {
roaring_bitmap_add(b, value.Get<ui32>());
}
}

return TUnboxedValuePod(new TRoaringWrapper(b));
} catch (const std::exception& e) {
UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data());
}
}
TSourcePosition Pos_;
};

class TRoaringSerialize: public TBoxedValue {
public:
TRoaringSerialize() {
Expand Down Expand Up @@ -282,6 +328,7 @@ namespace {
void GetAllFunctions(IFunctionsSink& sink) const final {
sink.Add(TRoaringSerialize::Name());
sink.Add(TRoaringDeserialize::Name());
sink.Add(TRoaringFromUint32List::Name());

sink.Add(TRoaringCardinality::Name());

Expand Down Expand Up @@ -312,6 +359,12 @@ namespace {
if (!typesOnly) {
builder.Implementation(new TRoaringDeserialize(builder.GetSourcePosition()));
}
} else if (TRoaringFromUint32List::Name() == name) {
builder.Returns<TResource<RoaringResourceName>>().Args()->Add<TListType<ui32>>();

if (!typesOnly) {
builder.Implementation(new TRoaringFromUint32List(builder.GetSourcePosition()));
}
} else if (TRoaringSerialize::Name() == name) {
builder.Returns(builder.SimpleType<char*>())
.Args()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,5 +172,42 @@
]
}
]
}
};
{
"Write" = [
{
"Type" = [
"ListType";
[
"StructType";
[
[
"DeserializedList";
[
"OptionalType";
[
"ListType";
[
"DataType";
"Uint32"
]
]
]
]
]
]
];
"Data" = [
[
[
[
"10";
"567"
]
]
]
]
}
]
};
]
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ FROM Input;

SELECT ListTake(ListSkip(Roaring::Uint32List(Roaring::Deserialize(binaryString)), 10), 1) AS EmptyList
FROM Input;

SELECT Roaring::Uint32List(Roaring::FromUint32List(AsList(10, 567))) AS DeserializedList
FROM Input;

0 comments on commit b333ddc

Please sign in to comment.