triton-inference-server · pskiran1 · Jul 27, 2024 · Jul 14, 2024 · Jul 15, 2024 · Jul 21, 2024
diff --git a/docs/user_guide/model_configuration.md b/docs/user_guide/model_configuration.md
@@ -598,6 +598,40 @@ input1: [4, 4, 6] <== shape of this tensor [3]
 Currently, only TensorRT supports shape tensors. Read [Shape Tensor I/O](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#shape_tensor_io)
 to learn more about shape tensors.
 
+## Non-Linear I/O Formats
+
+For models that process input or output data in non-linear formats, the _is_non_linear_format_io_ property
+must be set. The following example model configuration shows how to specify that INPUT0 and INPUT1 use non-linear I/O data formats.
+
+```
+  name: "mytensorrtmodel"
+  platform: "tensorrt_plan"
+  max_batch_size: 8
+  input [
+    {
+      name: "INPUT0"
+      data_type: TYPE_FP16
+      dims: [ 3,224,224 ]
+      is_non_linear_format_io: true
+    },
+    {
+      name: "INPUT1"
+      data_type: TYPE_FP16
+      dims: [ 3,224,224 ]
+      is_non_linear_format_io: true
+    }
+  ]
+  output [
+    {
+      name: "OUTPUT0"
+      data_type: TYPE_FP16
+      dims: [ 1,3 ]
+     }
+  ]
+```
+
+Currently, only TensorRT supports this property. To learn more about I/O formats, refer to the [I/O Formats documentation](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#reformat-free-network-tensors).
+
 ## Version Policy
 
 Each model can have one or more

diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_non_linear_format_io/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_non_linear_format_io/config.pbtxt
@@ -0,0 +1,26 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    is_non_linear_format_io: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_non_linear_format_io/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_non_linear_format_io/expected
@@ -0,0 +1 @@
+'INPUT0' uses a linear IO format, but 'is_non_linear_format_io' is incorrectly set to true in the model configuration.
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/config.pbtxt
@@ -0,0 +1,26 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    is_non_linear_format_io: true
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/expected
@@ -0,0 +1 @@
+'OUTPUT1' uses a linear IO format, but 'is_non_linear_format_io' is incorrectly set to true in the model configuration.
diff --git a/...model_config/autofill_noplatform_success/tensorrt/no_config_non_linear_format_io/expected b/...model_config/autofill_noplatform_success/tensorrt/no_config_non_linear_format_io/expected
@@ -0,0 +1,57 @@
+name: "no_config_non_linear_format_io"
+platform: "tensorrt_plan"
+backend: "tensorrt"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 2
+  dims: 1
+  is_non_linear_format_io: true
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 2
+  dims: 1
+  is_non_linear_format_io: true
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 2
+  dims: 1
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 2
+  dims: 1
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+instance_group {
+  name: "no_config_non_linear_format_io"
+  kind: KIND_GPU
+  count: 1
+  gpus: 0
+}
+default_model_filename: "model.plan"
diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh
@@ -56,10 +56,12 @@ for modelpath in \
         autofill_noplatform/tensorrt/bad_input_shape/1 \
         autofill_noplatform/tensorrt/bad_input_type/1 \
         autofill_noplatform/tensorrt/bad_input_shape_tensor/1 \
+        autofill_noplatform/tensorrt/bad_input_non_linear_format_io/1 \
         autofill_noplatform/tensorrt/bad_output_dims/1 \
         autofill_noplatform/tensorrt/bad_output_shape/1 \
         autofill_noplatform/tensorrt/bad_output_type/1 \
         autofill_noplatform/tensorrt/bad_output_shape_tensor/1 \
+        autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/1 \
         autofill_noplatform/tensorrt/too_few_inputs/1 \
         autofill_noplatform/tensorrt/too_many_inputs/1 \
         autofill_noplatform/tensorrt/unknown_input/1 \
@@ -92,6 +94,14 @@ for modelpath in \
        $modelpath/.
 done
 
+# Copy TensorRT plans with non-linear format IO into the test model repositories.
+for modelpath in \
+        autofill_noplatform_success/tensorrt/no_config_non_linear_format_io/1 ; do
+    mkdir -p $modelpath
+    cp /data/inferenceserver/${REPO_VERSION}/qa_trt_format_model_repository/plan_CHW32_LINEAR_float32_float32_float32/1/model.plan \
+       $modelpath/.
+done
+
 # Copy variable-sized TensorRT plans into the test model repositories.
 for modelpath in \
         autofill_noplatform_success/tensorrt/no_name_platform_variable/1 \
@@ -593,7 +603,8 @@ for TARGET_DIR in `ls -d autofill_noplatform_success/*/*`; do
     # that the directory is an entire model repository.
     rm -fr models && mkdir models
     if [ -f ${TARGET_DIR}/config.pbtxt ] || [ "$TARGET" = "no_config" ] \
-            || [ "$TARGET" = "no_config_variable" ] || [ "$TARGET" = "no_config_shape_tensor" ] ; then
+            || [ "$TARGET" = "no_config_variable" ] || [ "$TARGET" = "no_config_shape_tensor" ] \
+            || [ "$TARGET" = "no_config_non_linear_format_io" ] ; then
         cp -r ${TARGET_DIR} models/.
     else
         cp -r ${TARGET_DIR}/* models/.