Skip to content

Commit

Permalink
enable large initializer offset align for save external data in ORT (m…
Browse files Browse the repository at this point in the history
…icrosoft#21604)

### Description
Address issue microsoft#21524 
Enable offset align for model saved as external data format

python data convertor fix here: onnx/onnx#6248

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
  • Loading branch information
frank-dong-ms authored Aug 23, 2024
1 parent 27a6890 commit 4c4ae1e
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 18 deletions.
34 changes: 33 additions & 1 deletion include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1139,16 +1139,48 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
ONNX_NAMESPACE::GraphProto ToGraphProto() const;

// Options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external initializers.
// To use mmap, external initializer need to be offset aligned.
// ORT saves external initializers into signle data file, each initializer is accessed with
// offset(start position of initializer) and length(byte length of initializer) of the data file.
// To use mmap, each offset need to be aligned which means offset need to divisible by
// allocation granularity(64KB for windows and 4K for other OSes).
// With align_offset to true, ORT will align offset for large initializer when
// save ONNX model with external data file.
struct OffsetAlignmentInfo {
// Offset will always be page aligned and allocation granularity aligned for mmap support.
// This is done by padding previous tensor data with zeros keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force aligned.
// Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
int64_t allocation_granularity = 65536;
};

/** Gets the GraphProto representation of this Graph
@param external_file_path File path of the binary file to use for initializers.
@param model_file_path path of the model file.
@param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
in the external file. Initializer smaller than this threshold are included in the onnx file.
@param align_info offset alignment info.
@returns GraphProto serialization of the graph.
*/
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const;
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const;

ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
OffsetAlignmentInfo default_options;
return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options);
}

/** Gets the ISchemaRegistry instances being used with this Graph. */
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
Expand Down
24 changes: 23 additions & 1 deletion onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4021,7 +4021,8 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {

ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const {
GraphProto result;
ToGraphProtoInternal(result);
ORT_ENFORCE(external_file_path.is_relative());
Expand Down Expand Up @@ -4059,6 +4060,27 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
continue;
}

// update external_offset for alignment
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and alloction granularity aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
if (align_info.align_offset && static_cast<int64_t>(tensor_bytes_size) > align_info.align_threshold) {
// Align to the larger of the page size or the allocation granularity
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), align_info.allocation_granularity);
// Align to the next page or alloc granularity boundary
int64_t new_external_offset = static_cast<int64_t>(
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) *
alignment_factor;

// padding tensor with zeros for alignment
for (int64_t index = external_offset; index != new_external_offset; ++index) {
external_stream << '0';
}

external_offset = new_external_offset;
}

for (size_t index = 0; index != tensor_bytes_size; ++index) {
external_stream << raw_data[index];
}
Expand Down
24 changes: 16 additions & 8 deletions onnxruntime/core/graph/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,14 @@ ModelProto Model::ToProto() const {

ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) const {
ModelProto result(model_proto_);
const auto& graph = *graph_;
*(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
file_path,
initializer_size_threshold);
initializer_size_threshold,
align_info);
return result;
}

Expand Down Expand Up @@ -605,14 +607,16 @@ template <typename T>
static Status SaveModelWithExternalInitializers(Model& model,
const T& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
int fd = 0;
Status status = Env::Default().FileOpenWr(file_path, fd);
ORT_RETURN_IF_ERROR(status);

ORT_TRY {
status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
initializer_size_threshold);
initializer_size_threshold,
align_info);
}
ORT_CATCH(const std::exception& ex) {
ORT_HANDLE_EXCEPTION([&]() {
Expand Down Expand Up @@ -642,8 +646,10 @@ Status Model::Load(const PathString& file_path, std::shared_ptr<Model>& p_model,

Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold,
align_info);
}

Status Model::LoadFromBytes(int count, const void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) {
Expand Down Expand Up @@ -759,15 +765,17 @@ Status Model::SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
if (fd < 0) {
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "<fd> is less than 0.");
}

ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());

auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path,
initializer_size_threshold);
initializer_size_threshold,
align_info);
google::protobuf::io::FileOutputStream output(fd);
const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
if (result) {
Expand Down
35 changes: 32 additions & 3 deletions onnxruntime/core/graph/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,54 @@ class Model {
// Get model's serialization proto data.
// Save initializer larger than the given threshold (in bytes) into an external binary file
// with the given name. This function is useful to avoid hitting the size limit of protobuf files.
// initializer offset could be page aligned and allocation granularity aligned for mmap support.
ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const;
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) const;

ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const {
Graph::OffsetAlignmentInfo default_align_info;
return ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold, default_align_info);
}

static common::Status Save(Model& model, const PathString& file_path);

static common::Status Save(Model& model, int fd);

// Save the model to file using an external file for initializers larger than the given threshold (in bytes).
// Initializer offset could be page aligned and allocation granularity aligned for mmap support.
static common::Status SaveWithExternalInitializers(Model& model,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info);

static common::Status SaveWithExternalInitializers(Model& model,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold) {
Graph::OffsetAlignmentInfo default_align_info;
return SaveWithExternalInitializers(model, file_path, external_file_path, initializer_size_threshold, default_align_info);
}

static common::Status SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold);
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info);

static common::Status SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold);
size_t initializer_size_threshold) {
Graph::OffsetAlignmentInfo default_align_info;
return SaveWithExternalInitializers(model, fd, file_path, external_file_path, initializer_size_threshold, default_align_info);
}

static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto);

Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2054,10 +2054,13 @@ common::Status InferenceSession::Initialize() {
const size_t optimized_model_external_initializers_min_size_in_bytes =
ParseStringWithClassicLocale<size_t>(session_options_.config_options.GetConfigOrDefault(
kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, "1024"));
Graph::OffsetAlignmentInfo align_info;
align_info.align_offset = true;
ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_,
session_options_.optimized_model_filepath,
optimized_model_external_initializers_file_name,
optimized_model_external_initializers_min_size_in_bytes));
optimized_model_external_initializers_min_size_in_bytes,
align_info));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
const std::filesystem::path& input_external_init_file,
const std::filesystem::path& output_onnx,
const std::filesystem::path& output_external_init_file,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel");
std::shared_ptr<Model> model;
ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger));
std::filesystem::remove(output_onnx);
std::filesystem::remove(output_external_init_file);
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold));
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold,
align_info));

std::shared_ptr<Model> model_from_external;
ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger));
Expand Down Expand Up @@ -75,6 +77,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,

ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");
ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch");

if (align_info.align_offset) {
for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) {
if (entry.has_key() && entry.has_value() && entry.key() == "offset") {
size_t tensor_offset;
std::stringstream stream(entry.value());
stream >> tensor_offset;
ORT_RETURN_IF_NOT(tensor_offset % align_info.allocation_granularity == 0, "tensor offset not align");
}
}
}
}
// Cleanup.
ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed");
Expand All @@ -84,12 +97,22 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,

// Original model does not have external initializers
TEST(SaveWithExternalInitializers, Mnist) {
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100));
Graph::OffsetAlignmentInfo align_info;
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100, align_info));
}

// Original model has external initializers
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) {
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0));
Graph::OffsetAlignmentInfo align_info;
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info));
}

// Original model has external initializers, align offset
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) {
Graph::OffsetAlignmentInfo align_info;
align_info.align_offset = true;
align_info.align_threshold = 0;
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info));
}

} // namespace test
Expand Down

0 comments on commit 4c4ae1e

Please sign in to comment.