diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index 6fc2fd36b3f3ce..d039a06cf8cfaf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -55,6 +55,7 @@ struct data : public primitive_base { ob << make_data(&data_size, sizeof(size_t)); bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size && !weights_path.empty(); + if (is_cache_without_weights) { ob << true; ob << bin_offset; @@ -75,7 +76,9 @@ struct data : public primitive_base { void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); + } + void load_weights(BinaryInputBuffer& ib, std::shared_ptr mapped_weights) { layout output_layout = layout(); ib >> output_layout; @@ -89,6 +92,9 @@ struct data : public primitive_base { bool is_cache_without_weights; ib >> is_cache_without_weights; + if (is_cache_without_weights && mapped_weights == nullptr) { + OPENVINO_THROW("mmap object is null"); + } std::shared_ptr>> shared_buf; if (is_cache_without_weights) { @@ -96,12 +102,12 @@ struct data : public primitive_base { ib >> weights_path; original_size = data_size; - auto mapped_memory = ov::load_mmap_object(weights_path); shared_buf = std::make_shared>>( - mapped_memory->data() + bin_offset, + mapped_weights->data() + bin_offset, data_size, - mapped_memory); + mapped_weights); } + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { if (is_cache_without_weights) { std::memcpy(reinterpret_cast(mem->buffer_ptr()), shared_buf->get_ptr(), data_size); @@ -170,5 +176,34 @@ struct data : public primitive_base { } } } + + bool operator==(const data& rhs) const { + auto _allocation_type = mem->get_allocation_type(); + + if (original_size != rhs.original_size + || weights_path != rhs.weights_path + || _allocation_type != rhs.mem->get_allocation_type()) { + return false; + } + + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + if (!std::equal(reinterpret_cast(mem->buffer_ptr()), + reinterpret_cast(mem->buffer_ptr()) + original_size, + reinterpret_cast(rhs.mem->buffer_ptr()))) { + return false; + } + } else { + std::vector _buf, _rhs_buf; + _buf.resize(original_size); + _rhs_buf.resize(original_size); + auto& strm = mem->get_engine()->get_service_stream(); + auto& rhs_strm = rhs.mem->get_engine()->get_service_stream(); + mem->copy_to(strm, _buf.data()); + rhs.mem->copy_to(rhs_strm, _rhs_buf.data()); + if (!std::equal(_buf.begin(), _buf.end(), _rhs_buf.begin())) { + return false; + } + } + } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 6d2c04c7664b7d..82ab6c3b056487 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1872,6 +1872,12 @@ void program::save(cldnn::BinaryOutputBuffer& ob) const { void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); + std::shared_ptr mapped_memory = nullptr; + std::string weights_path = _config.get_property(ov::intel_gpu::weights_path); + if (!weights_path.empty()) { + mapped_memory = ov::load_mmap_object(weights_path); + } + size_t num_nodes; ib >> num_nodes; bool is_valid_data_node; @@ -1882,6 +1888,9 @@ void program::load(cldnn::BinaryInputBuffer& ib) { std::shared_ptr prim; ib >> prim; + if (auto data_prim = dynamic_cast(prim.get())) { + data_prim->load_weights(ib, mapped_memory); + } get_or_create(prim); }