Skip to content

Commit

Permalink
Construct mmap object only once
Browse files Browse the repository at this point in the history
  • Loading branch information
tkrupa-intel committed Sep 20, 2024
1 parent 5c3b493 commit 20eabb9
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 3 deletions.
41 changes: 38 additions & 3 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct data : public primitive_base<data> {
ob << make_data(&data_size, sizeof(size_t));

bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size && !weights_path.empty();

if (is_cache_without_weights) {
ob << true;
ob << bin_offset;
Expand All @@ -75,7 +76,9 @@ struct data : public primitive_base<data> {

void load(BinaryInputBuffer& ib) override {
primitive_base<data>::load(ib);
}

void load_weights(BinaryInputBuffer& ib, std::shared_ptr<ov::MappedMemory> mapped_weights) {
layout output_layout = layout();
ib >> output_layout;

Expand All @@ -89,19 +92,22 @@ struct data : public primitive_base<data> {

bool is_cache_without_weights;
ib >> is_cache_without_weights;
if (is_cache_without_weights && mapped_weights == nullptr) {
OPENVINO_THROW("mmap object is null");
}

std::shared_ptr<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>> shared_buf;
if (is_cache_without_weights) {
ib >> bin_offset;
ib >> weights_path;
original_size = data_size;

auto mapped_memory = ov::load_mmap_object(weights_path);
shared_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(
mapped_memory->data() + bin_offset,
mapped_weights->data() + bin_offset,
data_size,
mapped_memory);
mapped_weights);
}

if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
if (is_cache_without_weights) {
std::memcpy(reinterpret_cast<uint8_t*>(mem->buffer_ptr()), shared_buf->get_ptr<uint8_t>(), data_size);
Expand Down Expand Up @@ -170,5 +176,34 @@ struct data : public primitive_base<data> {
}
}
}

bool operator==(const data& rhs) const {
auto _allocation_type = mem->get_allocation_type();

if (original_size != rhs.original_size
|| weights_path != rhs.weights_path
|| _allocation_type != rhs.mem->get_allocation_type()) {
return false;
}

if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
if (!std::equal(reinterpret_cast<uint8_t*>(mem->buffer_ptr()),
reinterpret_cast<uint8_t*>(mem->buffer_ptr()) + original_size,
reinterpret_cast<uint8_t*>(rhs.mem->buffer_ptr()))) {
return false;
}
} else {
std::vector<uint8_t> _buf, _rhs_buf;
_buf.resize(original_size);
_rhs_buf.resize(original_size);
auto& strm = mem->get_engine()->get_service_stream();
auto& rhs_strm = rhs.mem->get_engine()->get_service_stream();
mem->copy_to(strm, _buf.data());
rhs.mem->copy_to(rhs_strm, _rhs_buf.data());
if (!std::equal(_buf.begin(), _buf.end(), _rhs_buf.begin())) {
return false;
}
}
}
};
} // namespace cldnn
9 changes: 9 additions & 0 deletions src/plugins/intel_gpu/src/graph/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,12 @@ void program::save(cldnn::BinaryOutputBuffer& ob) const {
void program::load(cldnn::BinaryInputBuffer& ib) {
init_program();

std::shared_ptr<ov::MappedMemory> mapped_memory = nullptr;
std::string weights_path = _config.get_property(ov::intel_gpu::weights_path);
if (!weights_path.empty()) {
mapped_memory = ov::load_mmap_object(weights_path);
}

size_t num_nodes;
ib >> num_nodes;
bool is_valid_data_node;
Expand All @@ -1882,6 +1888,9 @@ void program::load(cldnn::BinaryInputBuffer& ib) {

std::shared_ptr<cldnn::primitive> prim;
ib >> prim;
if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
data_prim->load_weights(ib, mapped_memory);
}
get_or_create(prim);
}

Expand Down

0 comments on commit 20eabb9

Please sign in to comment.