Skip to content

Commit

Permalink
- Fix FlatVector copy memory issue (#11483)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11483

Pull Request resolved: #11482

When copy-ing a FlatVector the source memory pool is used. This is a problem when the vector needs to live beyond the lifetime of the source pool.

For strings this requires recomputing new StringViews over newly copied string buffers. This is useful in cases where you need to maintain result rows beyond the lifetime of a given presto task / query.

Reviewed By: xiaoxmeng, arhimondr

Differential Revision: D65306907

fbshipit-source-id: 2ca69a808131d97f78483166fa5736ca0f4a4034
  • Loading branch information
Joe Giardino authored and facebook-github-bot committed Nov 15, 2024
1 parent 0357500 commit b40cec3
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 5 deletions.
4 changes: 2 additions & 2 deletions velox/common/memory/MemoryPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1269,8 +1269,8 @@ void MemoryPoolImpl::leakCheckDbg() {
}
std::stringbuf buf;
std::ostream oss(&buf);
oss << "Detected total of " << debugAllocRecords_.size()
<< " leaked allocations:\n";
oss << "[MemoryPool] : " << name_ << " - Detected total of "
<< debugAllocRecords_.size() << " leaked allocations:\n";
struct AllocationStats {
uint64_t size{0};
uint64_t numAllocations{0};
Expand Down
74 changes: 74 additions & 0 deletions velox/vector/FlatVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,80 @@ void FlatVector<StringView>::copy(
}
}

// For strings if backing memory pool is not the same as the vector pool, we
// need to perform a deep copy and reconstruct the string views against the
// updated stringBuffers.
template <>
VectorPtr FlatVector<StringView>::copyPreserveEncodings(
velox::memory::MemoryPool* pool) const {
const auto allocPool = pool ? pool : BaseVector::pool_;
// If the backing memory pool is the same as the vector pool
// we can do a shallow copy as string buffers can be shared.
if (pool == BaseVector::pool_) {
return std::make_shared<FlatVector<StringView>>(
allocPool,
BaseVector::type_,
AlignedBuffer::copy(allocPool, BaseVector::nulls_),
BaseVector::length_,
AlignedBuffer::copy(allocPool, values_),
std::vector<BufferPtr>(stringBuffers_),
SimpleVector<StringView>::stats_,
BaseVector::distinctValueCount_,
BaseVector::nullCount_,
SimpleVector<StringView>::isSorted_,
BaseVector::representedByteCount_,
BaseVector::storageByteCount_);
} else {
size_t totalBytes = 0;
auto newValuesBuffer =
AlignedBuffer::allocate<StringView>(BaseVector::size(), allocPool);
auto* rawCopyValues = newValuesBuffer->asMutable<StringView>();
// Copy non Null StringViews to value buffer.
for (vector_size_t i = 0; i < BaseVector::size(); i++) {
if (!BaseVector::isNullAt(i)) {
auto v = valueAt(i);
if (v.isInline()) {
rawCopyValues[i] = v;
} else {
totalBytes += v.size();
}
}
}

std::vector<BufferPtr> newStringBuffers;
if (totalBytes > 0) {
newStringBuffers.emplace_back(
AlignedBuffer::allocate<char>(totalBytes, allocPool));
char* rawBuffer = newStringBuffers.back()->asMutable<char>();

for (vector_size_t i = 0; i < BaseVector::size(); i++) {
if (!BaseVector::isNullAt(i)) {
auto v = valueAt(i);
if (!v.isInline()) {
memcpy(rawBuffer, v.data(), v.size());
rawCopyValues[i] = StringView(rawBuffer, v.size());
rawBuffer += v.size();
}
}
}
}

return std::make_shared<FlatVector<StringView>>(
allocPool,
BaseVector::type_,
AlignedBuffer::copy(allocPool, BaseVector::nulls_),
BaseVector::length_,
newValuesBuffer,
std::move(newStringBuffers),
SimpleVector<StringView>::stats_,
BaseVector::distinctValueCount_,
BaseVector::nullCount_,
SimpleVector<StringView>::isSorted_,
BaseVector::representedByteCount_,
BaseVector::storageByteCount_);
}
}

// For strings, we also verify if they point to valid memory locations inside
// the string buffers.
template <>
Expand Down
11 changes: 8 additions & 3 deletions velox/vector/FlatVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,12 +274,13 @@ class FlatVector final : public SimpleVector<T> {

VectorPtr copyPreserveEncodings(
velox::memory::MemoryPool* pool = nullptr) const override {
const auto allocPool = pool ? pool : BaseVector::pool_;
return std::make_shared<FlatVector<T>>(
pool ? pool : BaseVector::pool_,
allocPool,
BaseVector::type_,
AlignedBuffer::copy(BaseVector::pool_, BaseVector::nulls_),
AlignedBuffer::copy(allocPool, BaseVector::nulls_),
BaseVector::length_,
AlignedBuffer::copy(BaseVector::pool_, values_),
AlignedBuffer::copy(allocPool, values_),
std::vector<BufferPtr>(stringBuffers_),
SimpleVector<T>::stats_,
BaseVector::distinctValueCount_,
Expand Down Expand Up @@ -638,6 +639,10 @@ char* FlatVector<StringView>::getRawStringBufferWithSpace(
template <>
void FlatVector<StringView>::prepareForReuse();

template <>
VectorPtr FlatVector<StringView>::copyPreserveEncodings(
velox::memory::MemoryPool* pool) const;

template <typename T>
using FlatVectorPtr = std::shared_ptr<FlatVector<T>>;

Expand Down

0 comments on commit b40cec3

Please sign in to comment.