From 03a52f1a5fae28d90b133478c2ffe1d567a03dba Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 31 Oct 2023 01:54:44 -0700 Subject: [PATCH] Do not use deprecated usePropertiesAsAttributes=0 for tf_device ops. PiperOrigin-RevId: 578101119 --- .../compiler/mlir/tensorflow/ir/tf_device.cc | 4 +- .../mlir/tensorflow/ir/tf_device_ops.td | 2 - .../tensorflow/tests/breakup-islands.mlir | 7 +- .../tensorflow/tests/cluster_formation.mlir | 30 +-- .../tensorflow/tests/cluster_outlining.mlir | 8 +- .../tests/device_attribute_to_launch.mlir | 4 +- ...extract_head_tail_outside_compilation.mlir | 46 ++--- .../tests/extract_outside_compilation.mlir | 6 +- ...xtract_tpu_copy_with_dynamic_shape_op.mlir | 8 +- .../host_launch_to_outside_compiled.mlir | 2 +- .../tensorflow/tests/launch_outlining.mlir | 8 +- .../outside_compiled_to_host_launch.mlir | 8 +- .../replicate_invariant_op_hoisting.mlir | 20 +- .../tensorflow/tests/replicate_to_island.mlir | 44 +++-- .../tests/replicate_to_island_legacy.mlir | 12 +- .../tests/tpu-dynamic-layout-pass.mlir | 10 +- .../tpu-merge-variables-with-execute.mlir | 22 ++- .../tpu-variable-runtime-reformatting.mlir | 12 +- .../tpu_colocate_composite_resource_ops.mlir | 8 +- .../mlir/tensorflow/tests/tpu_rewrite.mlir | 177 ++++++------------ .../mlir/tensorflow/tests/xla_rewrite_v2.mlir | 22 +-- .../mlir/tfrt/tests/runtime_lowering_tpu.mlir | 4 +- .../mlir/tests/move_compilation_to_host.mlir | 16 +- .../mlir/tests/multi_device_expansion.mlir | 32 ++-- .../mlir/tests/update_tpu_metadata.mlir | 18 +- 25 files changed, 252 insertions(+), 278 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc index f5a6cf45a4fb17..46cfa429c42573 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc @@ -387,7 +387,7 @@ void ReplicateOp::print(OpAsmPrinter& p) { // packed_input // %b as %block_arg1: type const int32_t n = this->getN(); - const int32_t num_replicated_inputs = getOperandSegmentSizes()[0]; + const int32_t num_replicated_inputs = getProperties().operandSegmentSizes[0]; const int32_t num_replicated_block_args = num_replicated_inputs / n; if (getNumOperands()) { @@ -502,7 +502,7 @@ LogicalResult ReplicateOp::verify() { Block& block = op.getBody().front(); - auto operandSegmentSizes = op.getOperandSegmentSizes(); + auto operandSegmentSizes = op.getProperties().operandSegmentSizes; const int32_t num_replicated_inputs = operandSegmentSizes[0]; const int32_t num_packed_inputs = operandSegmentSizes[1]; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td index c0147386ab56d3..343127301d4057 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td @@ -39,7 +39,6 @@ def TfDevice_Dialect : Dialect { }]; let cppNamespace = "::mlir::tf_device"; - let usePropertiesForAttributes = 0; } //===----------------------------------------------------------------------===// @@ -263,7 +262,6 @@ For example: Variadic:$replicated_inputs, Variadic:$packed_inputs, - DenseI32ArrayAttr:$operandSegmentSizes, ConfinedAttr]>:$n, OptionalAttr:$devices ); diff --git a/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir b/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir index e94cb5f859ec34..2704fc38b4d569 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir @@ -544,16 +544,17 @@ func.func @island_not_direct_parent_of_user() -> () { tf_executor.yield %0 : tensor } // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK: "tf.OpC"(%[[VAL_0]]) : (tensor) -> () // CHECK: "tf.OpD"() : () -> () // CHECK: tf_device.return - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> () + // CHECK: }) : () -> () %island2 = tf_executor.island { - "tf_device.launch"() ({ + "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> ({ "tf.OpC"(%island1#0) : (tensor) -> () "tf.OpD"() : () -> () tf_device.return - }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> () + }) : () -> () tf_executor.yield } // CHECK: tf_executor.fetch diff --git a/tensorflow/compiler/mlir/tensorflow/tests/cluster_formation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/cluster_formation.mlir index 16acdcec5eda10..5f045823323f21 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/cluster_formation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/cluster_formation.mlir @@ -10,6 +10,7 @@ module { %2 = "tf.A"(%arg0) : (tensor) -> tensor // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK-SAME: <{device = "tpu0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[A_OUTPUT]]) : (tensor) -> tensor %3 = "tf.B"(%2) {device = "tpu0"} : (tensor) -> tensor @@ -17,7 +18,7 @@ module { %4 = "tf.C"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[C_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[TPU0_OUTPUT]]) %5 = "tf.D"(%4) : (tensor) -> tensor @@ -40,6 +41,7 @@ module { %2 = "tf.A"(%arg0) : (tensor) -> tensor // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK-SAME: <{device = "tpu0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[A_OUTPUT]]) : (tensor) -> tensor %3 = "tf.B"(%2) {device = "tpu0"} : (tensor) -> tensor @@ -47,7 +49,7 @@ module { %4 = "tf.C"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[C_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[TPU0_OUTPUT]]) %5 = "tf.D"(%4) : (tensor) -> tensor @@ -71,6 +73,7 @@ module { %1:2 = tf_executor.island { // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK-SAME: <{device = "tpu0"}> // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]]) : (tensor) -> tensor %3 = "tf.A"(%arg0) {device = "tpu0"} : (tensor) -> tensor @@ -78,7 +81,7 @@ module { %4 = "tf.B"(%3, %arg0) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[B_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[TPU0_OUTPUT]]) %5 = "tf.C"(%4) : (tensor) -> tensor @@ -104,6 +107,7 @@ module { %2:2 = tf_executor.island { // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]]) : (tensor) -> tensor %3 = "tf.A"(%arg0) {device = "tpu0"} : (tensor) -> tensor @@ -111,7 +115,7 @@ module { %4 = "tf.B"(%3, %1#0) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[B_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[TPU0_OUTPUT]]) %5 = "tf.C"(%4) : (tensor) -> tensor @@ -135,11 +139,12 @@ module { %1:2 = tf_executor.island { // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"() : () -> tensor %3 = "tf.A"() {device = "tpu0"} : () -> tensor // CHECK: tf_device.return %[[A_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[TPU0_OUTPUT]]) %4 = "tf.B"(%3) : (tensor) -> tensor @@ -166,6 +171,7 @@ module { %2 = "tf.A"(%arg0) : (tensor) -> tensor // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[A_OUTPUT]]) : (tensor) -> tensor %3 = "tf.B"(%2) {device = "tpu0"} : (tensor) -> tensor @@ -173,7 +179,7 @@ module { %4 = "tf.C"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[C_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[GPU0_OUTPUT:[0-9]*]] = "tf_device.launch" // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[TPU0_OUTPUT]]) : (tensor) -> tensor @@ -204,6 +210,7 @@ module { %2 = "tf.A"(%arg0) : (tensor) -> tensor // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[A_OUTPUT]]) : (tensor) -> tensor %3 = "tf.B"(%2) {device = "tpu0"} : (tensor) -> tensor @@ -211,7 +218,7 @@ module { %4 = "tf.C"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[C_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor + // CHECK: : () -> tensor // CHECK: %[[GPU0_OUTPUT:[0-9]*]] = "tf_device.launch" // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[A_OUTPUT]]) : (tensor) -> tensor @@ -248,6 +255,7 @@ module { // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[ARG_0]]) // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[A_OUTPUT]]) : (tensor) -> tensor %3 = "tf.B"(%2) {device = "tpu0"} : (tensor) -> tensor @@ -257,7 +265,6 @@ module { %5 = "tf.D"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[D_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor // CHECK: %[[E_OUTPUT:[0-9]*]] = "tf.E"(%[[C_OUTPUT]], %[[TPU0_OUTPUT]]) : (tensor, tensor) -> tensor %6 = "tf.E"(%4, %5) : (tensor, tensor) -> tensor @@ -296,12 +303,11 @@ module { %4 = "tf.C"(%arg0) : (tensor) -> tensor // CHECK: %[[TPU0_OUTPUT1:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[A_OUTPUT]], %[[TPU0_OUTPUT0]]) : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[D_OUTPUT]] %5 = "tf.D"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor - // CHECK: {device = "tpu0"} : () -> tensor - // CHECK: %[[E_OUTPUT:[0-9]*]] = "tf.E"(%[[C_OUTPUT]], %[[TPU0_OUTPUT1]]) : (tensor, tensor) -> tensor %6 = "tf.E"(%4, %5) : (tensor, tensor) -> tensor @@ -358,11 +364,12 @@ module { %2 = "tf.A"(%arg0) : (tensor) -> tensor // CHECK: %[[GPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "gpu0"}> // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[ARG_0]]) // CHECK: tf_device.return %[[C_OUTPUT]] - // CHECK: {device = "gpu0"} : () -> tensor // CHECK: %[[TPU0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: <{device = "tpu0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[A_OUTPUT]]) : (tensor) -> tensor %3 = "tf.B"(%2) {device = "tpu0"} : (tensor) -> tensor @@ -372,7 +379,6 @@ module { %5 = "tf.D"(%2, %3) {device = "tpu0"} : (tensor, tensor) -> tensor // CHECK: tf_device.return %[[D_OUTPUT]] - // CHECK: {device = "tpu0"} : () -> tensor // CHECK: %[[E_OUTPUT:[0-9]*]] = "tf.E"(%[[GPU0_OUTPUT]], %[[TPU0_OUTPUT]]) : (tensor, tensor) -> tensor %6 = "tf.E"(%4, %5) : (tensor, tensor) -> tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir index a77e449b03de90..90f1cfc2fd5027 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir @@ -10,7 +10,7 @@ func.func @single_cluster(%arg0: tensor) -> tensor { // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]]) %2 = "tf.A"(%arg0) : (tensor) -> tensor - // CHECK: %[[CLUSTER_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[A_OUTPUT]]) {func = @[[CLUSTER:.*]]} + // CHECK: %[[CLUSTER_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[A_OUTPUT]]) <{func = @[[CLUSTER:.*]]}> %3 = "tf_device.cluster"() ({ %4 = "tf.B"(%2) : (tensor) -> tensor tf_device.return %4 : tensor @@ -42,7 +42,7 @@ func.func @multiple_clusters(%arg0: tensor) -> tensor { // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]]) %2 = "tf.A"(%arg0) : (tensor) -> tensor - // CHECK: %[[CLUSTER_0_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[A_OUTPUT]]) {func = @[[CLUSTER_0:.*]]} + // CHECK: %[[CLUSTER_0_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[A_OUTPUT]]) <{func = @[[CLUSTER_0:.*]]}> %3 = "tf_device.cluster"() ({ %6 = "tf.B"(%2) : (tensor) -> tensor tf_device.return %6 : tensor @@ -51,7 +51,7 @@ func.func @multiple_clusters(%arg0: tensor) -> tensor { // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[CLUSTER_0_OUTPUT]]) %4 = "tf.D"(%3) : (tensor) -> tensor - // CHECK: %[[CLUSTER_1_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[CLUSTER_0_OUTPUT]], %[[D_OUTPUT]]) {func = @[[CLUSTER_1:.*]]} + // CHECK: %[[CLUSTER_1_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[CLUSTER_0_OUTPUT]], %[[D_OUTPUT]]) <{func = @[[CLUSTER_1:.*]]}> %5 = "tf_device.cluster"() ({ %6 = "tf.E"(%3) : (tensor) -> tensor %7 = "tf.F"(%4, %6) : (tensor, tensor) -> tensor @@ -86,7 +86,7 @@ func.func @multiple_clusters(%arg0: tensor) -> tensor { func.func @cluster_operands(%arg0: tensor) -> tensor { %0 = tf_executor.graph { %1:2 = tf_executor.island wraps - // CHECK: %[[CLUSTER_OUTPUT:[a-z0-9]*]], %{{.*}} = {{.*}} "tf_device.cluster_func"() {func = @[[CLUSTER:.*]]} + // CHECK: %[[CLUSTER_OUTPUT:[a-z0-9]*]], %{{.*}} = {{.*}} "tf_device.cluster_func"() <{func = @[[CLUSTER:.*]]}> "tf_device.cluster"() ({ %3 = "tf.A"() : () -> tensor tf_device.return %3 : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/device_attribute_to_launch.mlir b/tensorflow/compiler/mlir/tensorflow/tests/device_attribute_to_launch.mlir index 3384c6529067db..499688411f55ac 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/device_attribute_to_launch.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/device_attribute_to_launch.mlir @@ -4,10 +4,10 @@ // CHECK-LABEL: func @single_op_launch func.func @single_op_launch() { // CHECK: "tf_device.launch" + // CHECK: device = "CPU:0" // CHECK: "tf.opA" // CHECK-NOT device // CHECK: tf_device.return - // CHECK: device = "CPU:0" "tf.opA"() {device = "CPU:0"} : () -> tensor func.return } @@ -16,10 +16,10 @@ func.func @single_op_launch() { // CHECK-LABEL: func @launch_return func.func @launch_return() -> tensor { // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK: device = "CPU:0" // CHECK: %[[A_OUT:.*]] = "tf.opA" // CHECK-NOT device // CHECK: tf_device.return %[[A_OUT]] - // CHECK: device = "CPU:0" // CHECK: return %[[LAUNCH_OUT]] %a = "tf.opA"() {device = "CPU:0"} : () -> tensor func.return %a : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/extract_head_tail_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/extract_head_tail_outside_compilation.mlir index 5f0821a0271092..5f48061760a51f 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/extract_head_tail_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/extract_head_tail_outside_compilation.mlir @@ -6,10 +6,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @head_single_outside_compiled_op func.func @head_single_outside_compiled_op(%arg0: tensor) { // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.A" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.B" @@ -27,10 +27,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @head_single_outside_compiled_op_no_operands func.func @head_single_outside_compiled_op_no_operands() { // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[A_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.B"(%[[LAUNCH_OUT]]) @@ -50,10 +50,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[A_OUT:.*]] = "tf.A" %a = "tf.A"() : () -> tensor // CHECK-NEXT: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[B_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.C"(%[[LAUNCH_OUT]]) @@ -71,10 +71,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @head_aliased_output func.func @head_aliased_output() -> (tensor, tensor, tensor) { // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[A_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: %[[CLUSTER_OUT:.*]]:2 = "tf_device.cluster" // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[LAUNCH_OUT]]) @@ -98,6 +98,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @head_all_cluster_op func.func @head_all_cluster_op(%arg0: tensor) -> tensor { // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[A_OUT]]) @@ -105,7 +106,6 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-NEXT: %[[C_OUT:.*]] = "tf.C"(%[[B_OUT]], %arg0) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[C_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: tf_device.return @@ -122,6 +122,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @head_multiple_outside_compiled_ops func.func @head_multiple_outside_compiled_ops(%arg0: tensor) { // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[A_OUT]]) @@ -129,7 +130,6 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-NEXT: "tf.C" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[B_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.D"(%[[LAUNCH_OUT]]) @@ -149,10 +149,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: tf_device.replicate([%arg0, %arg1] as %[[RI:.*]]: tensor) // // CHECK-NEXT: %[[LAUNCH_OUT:.*]] = "tf_device.launch"() + // CHECK-SAME: device = "TPU_REPLICATED_HOST_0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A"(%[[RI]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[A_OUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_HOST_0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.B"(%[[LAUNCH_OUT]]) @@ -215,10 +215,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B"(%[[CLUSTER_OUT]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" "tf_device.cluster"() ({ %a = "tf.A"() : () -> tensor "tf.B"(%a) {_xla_outside_compilation = "cluster1"} : (tensor) -> () @@ -241,10 +241,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[CLUSTER_OUT]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[B_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" %cluster = "tf_device.cluster"() ({ %a = "tf.A"() : () -> tensor %b = "tf.B"(%a) {_xla_outside_compilation = "cluster1"} : (tensor) -> tensor @@ -268,12 +268,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[C_OUT:.*]] = "tf.C"(%arg0, %[[CLUSTER_OUT]]#1) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: "tf.D"(%[[C_OUT]], %arg0, %[[CLUSTER_OUT]]#0) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" "tf_device.cluster"() ({ %a = "tf.A"() : () -> tensor %b = "tf.B"(%arg0) : (tensor) -> tensor @@ -299,13 +299,13 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[C_OUT:.*]] = "tf.C"(%arg0, %[[CLUSTER_OUT]]#2) // CHECK-NOT: _xla_outside_compilation // CHECK "tf.IfRegion" // CHECK: "tf.D"(%[[C_OUT]], %arg0, %[[CLUSTER_OUT]]#0) // CHECK-NOT: _xla_outside_compilation // CHECK: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" "tf_device.cluster"() ({ %0 = "tf.Const"() {value = dense : tensor} : () -> tensor %a = "tf.A"() : () -> tensor @@ -339,10 +339,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[D_OUT:.*]] = "tf.D"(%[[CLUSTER_OUT]]#0, %[[A_OUT]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" %cluster:5 = "tf_device.cluster"() ({ %c = "tf.C"() : () -> tensor %d = "tf.D"(%c, %a) {_xla_outside_compilation = "cluster1"} : (tensor, tensor) -> tensor @@ -367,10 +367,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK-NEXT: "tf_device.launch"() + // CHECK-SAME: device = "TPU_REPLICATED_HOST_0" // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[CLUSTER_OUT]], %[[RI]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_HOST_0" tf_device.replicate([%arg0, %arg1] as %ri : tensor) {n = 2 : i32} { "tf_device.cluster"() ({ %a = "tf.A"(%ri) : (tensor) -> tensor @@ -402,10 +402,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @head_tail_simple_extraction func.func @head_tail_simple_extraction(%arg0: tensor) -> tensor { // CHECK: %[[HEAD_LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A"(%arg0) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[A_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // // CHECK: %[[CLUSTER_OUT:.*]] = "tf_device.cluster" // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[HEAD_LAUNCH_OUT]]) @@ -417,10 +417,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK: %[[TAIL_LAUNCH_OUT:.*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: %[[C_OUT:.*]] = "tf.C"(%[[CLUSTER_OUT]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[C_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" %cluster = "tf_device.cluster"() ({ %a = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor) -> tensor %b = "tf.B"(%a) : (tensor) -> tensor @@ -436,10 +436,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: tf_device.replicate([%arg0, %arg1] as %[[RI:.*]]: tensor) // // CHECK-NEXT: %[[HEAD_LAUNCH_OUT:.*]] = "tf_device.launch"() + // CHECK-SAME: device = "TPU_REPLICATED_HOST_0" // CHECK-NEXT: %[[A_OUT:.*]] = "tf.A"(%[[RI]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return %[[A_OUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_HOST_0" // // CHECK: %[[CLUSTER_OUT:.*]] = "tf_device.cluster" // CHECK-NEXT: %[[B_OUT:.*]] = "tf.B" @@ -453,10 +453,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-DAG: device_assignment = [] // // CHECK-NEXT: "tf_device.launch"() + // CHECK-SAME: device = "TPU_REPLICATED_HOST_0" // CHECK-NEXT: "tf.D"(%[[HEAD_LAUNCH_OUT]], %[[CLUSTER_OUT]], %[[RI]]) // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_HOST_0" tf_device.replicate([%arg0, %arg1] as %ri : tensor) {n = 2 : i32} { "tf_device.cluster"() ({ %a = "tf.A"(%ri) {_xla_outside_compilation = "cluster1"} : (tensor) -> tensor @@ -490,10 +490,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @side_effect_head_no_operand func.func @side_effect_head_no_operand() { // CHECK: %[[HEAD_LAUNCH_OUT:.*]] = "tf_device.launch"() + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B" // CHECK-NEXT: %[[C_OUT:.*]] = "tf.C" // CHECK-NEXT: tf_device.return %[[C_OUT]] - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.Const" @@ -518,10 +518,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-NEXT: tf_device.return %[[A_OUT]] // CHECK: "tf_device.launch"() + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B"(%[[CLUSTER_OUT]]) // CHECK-NEXT: "tf.C" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" "tf_device.cluster"() ({ %a = "tf.A"() : () -> tensor "tf.B"(%a) {_xla_outside_compilation = "cluster1"} : (tensor) -> () @@ -538,10 +538,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @embedding_head_extraction func.func @embedding_head_extraction(%arg0: tensor) { // CHECK: "tf_device.launch"() + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.EnqueueTPUEmbeddingRaggedTensorBatch" // CHECK-NEXT: "tf.EnqueueTPUEmbeddingArbitraryTensorBatch" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.UnknownOp" @@ -560,9 +560,9 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @op_after_embedding_head_extraction func.func @op_after_embedding_head_extraction() { // CHECK: "tf_device.launch"() + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.A" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.RecvTPUEmbeddingActivations" @@ -588,9 +588,9 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-NEXT: tf_device.return // CHECK: "tf_device.launch"() + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.A" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" "tf_device.cluster"() ({ "tf.UnknownOp"() : () -> () "tf.A"() {_xla_outside_compilation = "cluster1"} : () -> () @@ -607,10 +607,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-LABEL: func @head_single_outside_compiled_op_in_generic_pipeline func.func @head_single_outside_compiled_op_in_generic_pipeline(%arg0: tensor) { // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.A" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" // // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.B" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/extract_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/extract_outside_compilation.mlir index 87acd459ed1350..cbd9942a0f8a73 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/extract_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/extract_outside_compilation.mlir @@ -32,10 +32,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @nodep_single_outside_compilation() -> () { // CHECK: "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.A" // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" @@ -102,9 +102,9 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_HOST_0" // CHECK: "tf.B" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_HOST_0" // CHECK: %[[TPU_CLUSTER_OUTPUT:[0-9]*]] = "tf_device.cluster" // CHECK: tf_device.return // CHECK: tf_device.return %[[TPU_CLUSTER_OUTPUT]] @@ -1839,10 +1839,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @outside_compilation_model_parallelism() -> () { // CHECK: "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.cluster" // CHECK-NEXT: "tf.A" // CHECK: num_cores_per_replica = 2 : i64 diff --git a/tensorflow/compiler/mlir/tensorflow/tests/extract_tpu_copy_with_dynamic_shape_op.mlir b/tensorflow/compiler/mlir/tensorflow/tests/extract_tpu_copy_with_dynamic_shape_op.mlir index 2c2b36c59121d3..ec3fedf987ab37 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/extract_tpu_copy_with_dynamic_shape_op.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/extract_tpu_copy_with_dynamic_shape_op.mlir @@ -6,10 +6,10 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:COMPOSI // CHECK-LABEL: func @valid_copy_op_in_replicated_host // CHECK: "tf_device.launch" - // CHECK: "TPU_REPLICATED_HOST_0" + // CHECK-SAME: "TPU_REPLICATED_HOST_0" // CHECK: "tf_device.launch" + // CHECK-SAME: "TPU_REPLICATED_CORE_0" // CHECK: "tf.TPUCopyWithDynamicShape" - // CHECK: "TPU_REPLICATED_CORE_0" func.func @valid_copy_op_in_replicated_host( %arg0: tensor<2048xi64> {tf.device = "/job:localhost/replica:0/task:0/device:CPU:0"}, %arg1: tensor<2048xi64> {tf.device = "/job:localhost/replica:0/task:0/device:CPU:0"}) -> (tensor<2048xi32>, tensor<2048xi32>) { @@ -26,10 +26,10 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:COMPOSI // CHECK-LABEL: func @valid_copy_op_in_non_replicated_host // CHECK: "tf_device.launch" - // CHECK: "/job:localhost/replica:0/task:0/device:CPU:0" + // CHECK-SAME: "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.launch" + // CHECK-SAME: "/job:localhost/replica:0/task:0/device:TPU:0" // CHECK: "tf.TPUCopyWithDynamicShape" - // CHECK: "/job:localhost/replica:0/task:0/device:TPU:0" func.func @valid_copy_op_in_non_replicated_host( %arg0: tensor<2048xi64> {tf.device = "/job:localhost/replica:0/task:0/device:CPU:0"}, %arg1: tensor<2048xi64> {tf.device = "/job:localhost/replica:0/task:0/device:CPU:0"}) -> (tensor<2048xi32>, tensor<2048xi32>) { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/host_launch_to_outside_compiled.mlir b/tensorflow/compiler/mlir/tensorflow/tests/host_launch_to_outside_compiled.mlir index b6a0a2b48567d3..d7867332a4812c 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/host_launch_to_outside_compiled.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/host_launch_to_outside_compiled.mlir @@ -28,9 +28,9 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @single_op_launch_not_host() -> () { // CHECK: "tf.A" // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:TPU:0" // CHECK: "tf.B" // CHECK-NOT: _xla_outside_compilation - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:0" // CHECK: "tf.C" // CHECK-NEXT: tf_device.return "tf_device.cluster"() ({ diff --git a/tensorflow/compiler/mlir/tensorflow/tests/launch_outlining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/launch_outlining.mlir index 84825bae4aaea4..91d58dff11f5b8 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/launch_outlining.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/launch_outlining.mlir @@ -10,7 +10,7 @@ func.func @single_launch(%arg0: tensor) -> tensor { // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]]) %2 = "tf.A"(%arg0) : (tensor) -> tensor - // CHECK: %[[LAUNCH_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[A_OUTPUT]]) {device = "/device:test_device:0", func = @[[LAUNCH:.*]]} + // CHECK: %[[LAUNCH_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[A_OUTPUT]]) <{device = "/device:test_device:0", func = @[[LAUNCH:.*]]}> %3 = "tf_device.launch"() ({ %4 = "tf.B"(%2) : (tensor) -> tensor tf_device.return %4 : tensor @@ -42,7 +42,7 @@ func.func @multiple_launches(%arg0: tensor) -> tensor { // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]]) %2 = "tf.A"(%arg0) : (tensor) -> tensor - // CHECK: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[A_OUTPUT]]) {device = "/device:test_device:0", func = @[[LAUNCH_0:.*]]} + // CHECK: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[A_OUTPUT]]) <{device = "/device:test_device:0", func = @[[LAUNCH_0:.*]]}> %3 = "tf_device.launch"() ({ %6 = "tf.B"(%2) : (tensor) -> tensor tf_device.return %6 : tensor @@ -51,7 +51,7 @@ func.func @multiple_launches(%arg0: tensor) -> tensor { // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[LAUNCH_0_OUTPUT]]) %4 = "tf.D"(%3) : (tensor) -> tensor - // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[LAUNCH_0_OUTPUT]], %[[D_OUTPUT]]) {device = "/device:test_device:0", func = @[[LAUNCH_1:.*]]} + // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[LAUNCH_0_OUTPUT]], %[[D_OUTPUT]]) <{device = "/device:test_device:0", func = @[[LAUNCH_1:.*]]}> %5 = "tf_device.launch"() ({ %6 = "tf.E"(%3) : (tensor) -> tensor %7 = "tf.F"(%4, %6) : (tensor, tensor) -> tensor @@ -86,7 +86,7 @@ func.func @multiple_launches(%arg0: tensor) -> tensor { func.func @launch_operands(%arg0: tensor) -> tensor { %0 = tf_executor.graph { %1:2 = tf_executor.island wraps - // CHECK: %[[LAUNCH_OUTPUT:[a-z0-9]*]], %{{.*}} = {{.*}} "tf_device.launch_func"() {device = "/device:test_device:0", func = @[[LAUNCH:.*]]} + // CHECK: %[[LAUNCH_OUTPUT:[a-z0-9]*]], %{{.*}} = {{.*}} "tf_device.launch_func"() <{device = "/device:test_device:0", func = @[[LAUNCH:.*]]}> "tf_device.launch"() ({ %3 = "tf.A"() : () -> tensor tf_device.return %3 : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/outside_compiled_to_host_launch.mlir b/tensorflow/compiler/mlir/tensorflow/tests/outside_compiled_to_host_launch.mlir index 2f744534abd3e9..c0230b43d1db04 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/outside_compiled_to_host_launch.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/outside_compiled_to_host_launch.mlir @@ -22,10 +22,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @nodep_single_outside_compilation() -> () { // CHECK: "tf.A" // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" "tf_device.cluster"() ({ "tf.A"() : () -> () @@ -45,10 +45,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-NEXT: "tf_device.cluster" // CHECK-NEXT: "tf.B" // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_HOST_0" // CHECK-NEXT: "tf.C" // CHECK-NOT: _xla_outside_compilation // CHECK: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_HOST_0" // CHECK: device_assignment = [], num_cores_per_replica = 1 : i64, topology = "" %0 = "tf.A"(%arg0) : (tensor) -> tensor tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { @@ -136,10 +136,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @called_outside_compilation_callee() -> () { // CHECK: "tf.A" // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" "tf.A"() : () -> () "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> () "tf.C"() : () -> () @@ -178,10 +178,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @outside_compilation_model_parallelism() -> () { // CHECK: "tf.A" // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK-NEXT: "tf.B" // CHECK-NOT: _xla_outside_compilation // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: num_cores_per_replica = 2 : i64 %0 = "tf_device.cluster"() ({ "tf.A"() : () -> () diff --git a/tensorflow/compiler/mlir/tensorflow/tests/replicate_invariant_op_hoisting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/replicate_invariant_op_hoisting.mlir index 024caf9297bd3e..ec30a7bdc906d3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/replicate_invariant_op_hoisting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/replicate_invariant_op_hoisting.mlir @@ -156,19 +156,19 @@ func.func @nested_ops(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) { // CHECK: %[[SHAPE:[0-9]*]] = "tf.Shape"(%[[ARG_0]]) // CHECK-NEXT: %[[LAUNCH_A:[0-9]*]] = "tf_device.launch" +// CHECK-SAME: device = "a" // CHECK-NEXT: %[[OP_A:[0-9]*]] = "tf.opA"(%[[SHAPE]]) // CHECK-NEXT: tf_device.return %[[OP_A]] -// CHECK-NEXT: device = "a" -// CHECK-NEXT: %[[LAUNCH_B:[0-9]*]] = "tf_device.launch" +// CHECK: %[[LAUNCH_B:[0-9]*]] = "tf_device.launch" +// CHECK-SAME: device = "b" // CHECK-NEXT: %[[OP_B:[0-9]*]] = "tf.opB"(%[[SHAPE]], %[[LAUNCH_A]]) // CHECK-NEXT: tf_device.return %[[OP_B]] -// CHECK-NEXT: device = "b" -// CHECK-NEXT: tf_device.replicate([{{.*}}] as %[[RI:[a-z0-9]+]]: tensor<*xf32>) +// CHECK: tf_device.replicate([{{.*}}] as %[[RI:[a-z0-9]+]]: tensor<*xf32>) // CHECK-NEXT: %[[LAUNCH_C:[0-9]*]] = "tf_device.launch" +// CHECK-SAME: device = "c" // CHECK-NEXT: %[[OP_C:[0-9]*]] = "tf.opC"(%[[RI]], %[[LAUNCH_B]]) // CHECK-NEXT: tf_device.return %[[OP_C]] -// CHECK-NEXT: device = "c" -// CHECK-NEXT: tf_device.return %[[SHAPE]], %[[LAUNCH_A]], %[[LAUNCH_B]], %[[LAUNCH_C]] +// CHECK: tf_device.return %[[SHAPE]], %[[LAUNCH_A]], %[[LAUNCH_B]], %[[LAUNCH_C]] // CHECK-LABEL: func @do_not_hoist_ops_with_virtual_device @@ -193,14 +193,14 @@ func.func @do_not_hoist_ops_with_virtual_device(%arg0: tensor<*xf32>, %arg1: ten // CHECK: [[SHAPE:%.*]] = "tf.Shape"([[VAL_0]]) // CHECK: tf_device.replicate({{\[}}[[VAL_0]], [[VAL_1]]] as [[VAL_4:%.*]]: tensor<*xf32>) {devices = {TPU_REPLICATED_CORE_0 = ["/device:TPU:0", "/device:TPU:1"]}, n = 2 : i32} { // CHECK: [[OP_A:%.*]] = "tf.opA"([[SHAPE]]) {device = "TPU_REPLICATED_CORE_0"} : (tensor) -> tensor<*xi32> -// CHECK: [[LAUNCH_B:%.*]] = "tf_device.launch"() ({ +// CHECK: [[LAUNCH_B:%.*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> ({ // CHECK: [[OP_B:%.*]] = "tf.opB"([[SHAPE]]) : (tensor) -> tensor<*xi32> // CHECK: tf_device.return [[OP_B]] : tensor<*xi32> -// CHECK: }) {device = "TPU_REPLICATED_CORE_0"} : () -> tensor<*xi32> -// CHECK: [[LAUNCH_C:%.*]] = "tf_device.launch"() ({ +// CHECK: }) : () -> tensor<*xi32> +// CHECK: [[LAUNCH_C:%.*]] = "tf_device.launch"() <{device = "c"}> ({ // CHECK: [[OP_C:%.*]] = "tf.opC"([[SHAPE]]) {device = "TPU_REPLICATED_CORE_0"} : (tensor) -> tensor<*xi32> // CHECK: tf_device.return [[OP_C]] : tensor<*xi32> -// CHECK: }) {device = "c"} : () -> tensor<*xi32> +// CHECK: }) : () -> tensor<*xi32> // CHECK: tf_device.return [[SHAPE]], [[OP_A]], [[LAUNCH_B]], [[LAUNCH_C]] diff --git a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir index 8fec2a5bb55223..a27a0ff5785ae2 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir @@ -44,9 +44,11 @@ func.func @no_devices() { } // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:0", device = "CORE_0" +// device = "CORE_0" +// CHECK: _parallel_execution_ids = "r0:0" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:1", device = "CORE_0" +// device = "CORE_0" +// CHECK: _parallel_execution_ids = "r0:1" // Tests devices are not remapped if device is not in replicate devices. @@ -69,9 +71,11 @@ func.func @no_override_device() { } // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:0", device = "/TPU:2" +// device = "/TPU:2" +// CHECK: _parallel_execution_ids = "r0:0" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:1", device = "/TPU:2" +// device = "/TPU:2" +// CHECK: _parallel_execution_ids = "r0:1" // Tests devices are remapped if device is in replicate devices. @@ -94,9 +98,11 @@ func.func @remap_device() { } // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:0", device = "/CPU:0" +// device = "/CPU:0" +// CHECK: _parallel_execution_ids = "r0:0" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:1", device = "/GPU:1" +// device = "/GPU:1" +// CHECK: _parallel_execution_ids = "r0:1" // Tests replicate with control dependency output has each expanded replica @@ -305,20 +311,20 @@ func.func @nested_parallel_execute(%arg0: tensor, %arg1: tensor) { // CHECK: tf_executor.island // CHECK: tf_device.parallel_execute // CHECK: tf_device.launch +// CHECK: <{device = "/TPU:1"}> // CHECK: tf.OpA -// CHECK: {device = "/TPU:1"} // CHECK: tf_device.launch +// CHECK: <{device = "/TPU:2"}> // CHECK: tf.OpB -// CHECK: {device = "/TPU:2"} // CHECK: _parallel_execution_ids = "r0:0" // CHECK: tf_executor.island // CHECK: tf_device.parallel_execute // CHECK: tf_device.launch +// CHECK: <{device = "/TPU:1"}> // CHECK: tf.OpA -// CHECK: {device = "/TPU:1"} // CHECK: tf_device.launch +// CHECK: <{device = "/TPU:2"}> // CHECK: tf.OpB -// CHECK: {device = "/TPU:2"} // CHECK: _parallel_execution_ids = "r0:1" // CHECK: tf_executor.fetch @@ -343,9 +349,11 @@ func.func @merge_of_parallel_group_attr() { } // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r4:5,r0:0", device = "/CPU:0" +// device = "/CPU:0" +// CHECK: _parallel_execution_ids = "r4:5,r0:0" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r4:5,r0:1", device = "/GPU:1" +// device = "/GPU:1" +// CHECK: _parallel_execution_ids = "r4:5,r0:1" // ----- @@ -418,10 +426,14 @@ func.func @no_override_device_new() { func.return } // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:0", device = "/TPU:0" +// device = "/TPU:0" +// CHECK: _parallel_execution_ids = "r0:0" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r0:1", device = "/TPU:0" +// device = "/TPU:0" +// CHECK: _parallel_execution_ids = "r0:1" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r1:0", device = "/TPU:1" +// device = "/TPU:1" +// CHECK: _parallel_execution_ids = "r1:0" // CHECK: "tf.opA" -// CHECK: _parallel_execution_ids = "r1:1", device = "/TPU:1" +// device = "/TPU:1" +// CHECK: _parallel_execution_ids = "r1:1" diff --git a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island_legacy.mlir b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island_legacy.mlir index 24d498ebe88601..2c47b0835d7459 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island_legacy.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island_legacy.mlir @@ -43,10 +43,12 @@ func.func @no_devices() { func.return } -// CHECK: "tf.opA" +// CHECK: "tf_device.launch" // CHECK: device = "CORE_0" // CHECK: "tf.opA" +// CHECK: "tf_device.launch" // CHECK: device = "CORE_0" +// CHECK: "tf.opA" // Tests devices are not remapped if device is not in replicate devices. @@ -68,10 +70,12 @@ func.func @no_override_device() { func.return } -// CHECK: "tf.opA" +// CHECK: "tf_device.launch" // CHECK: device = "/TPU:2" // CHECK: "tf.opA" +// CHECK: "tf_device.launch" // CHECK: device = "/TPU:2" +// CHECK: "tf.opA" // Tests devices are remapped if device is in replicate devices. @@ -93,10 +97,12 @@ func.func @remap_device() { func.return } -// CHECK: "tf.opA" +// CHECK: "tf_device.launch" // CHECK: device = "/CPU:0" // CHECK: "tf.opA" +// CHECK: "tf_device.launch" // CHECK: device = "/GPU:1" +// CHECK: "tf.opA" // Tests replicate with control dependency output has each expanded replica diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-dynamic-layout-pass.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-dynamic-layout-pass.mlir index 553bfa0955106b..75bf23d23519b2 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-dynamic-layout-pass.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-dynamic-layout-pass.mlir @@ -429,9 +429,9 @@ func.func @parallel_execute(%arg0: tensor<*x!tf_type.resource> {tf.device = "/de // CHECK-NEXT: %[[COPY0:.*]] = "tf.TPUCopyWithLayout"(%[[ITER]]#0, %[[LAYOUT0]]) // CHECK-SAME: device = "/device:TPU:0" // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: device = "/device:TPU:0" // CHECK-NEXT: "tf.TPUExecute"(%[[COPY0]], %[[COMPILE]]#1) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/device:TPU:0" "tf_device.launch"() ({ "tf.TPUExecute"(%2#0, %compile#1) : (tensor<128xf32>, tensor<2x!tf_type.string>) -> () tf_device.return @@ -442,9 +442,9 @@ func.func @parallel_execute(%arg0: tensor<*x!tf_type.resource> {tf.device = "/de // CHECK: %[[COPY1:.*]] = "tf.TPUCopyWithLayout"(%[[ITER]]#1, %[[LAYOUT1]]) // CHECK-SAME: device = "/device:TPU:1" // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: device = "/device:TPU:1" // CHECK-NEXT: "tf.TPUExecute"(%[[COPY1]], %[[COMPILE]]#2) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/device:TPU:1" "tf_device.launch"() ({ "tf.TPUExecute"(%2#1, %compile#2) : (tensor<128xf32>, tensor<2x!tf_type.string>) -> () tf_device.return @@ -501,9 +501,10 @@ func.func @replicated_parallel_execute(%arg0: tensor<*x!tf_type.resource> {tf.de tf_device.replicate([%2#0, %3#0] as %r0: tensor<128xf32>, [%2#1, %3#1] as %r1: tensor<128xf32>) {n = 2 : i32, devices = {TPU_REPLICATED_CORE_0 = ["/device:TPU:0", "/device:TPU:1"], TPU_REPLICATED_CORE_1 = ["/device:TPU:2", "/device:TPU:3"]}} { // CHECK: "tf_device.parallel_execute" "tf_device.parallel_execute"() ({ + // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK: "tf.TPUExecute"(%[[R0]], %[[COMPILE]]#1) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" "tf_device.launch"() ({ "tf.TPUExecute"(%r0, %compile#1) : (tensor<128xf32>, tensor<2x!tf_type.string>) -> () tf_device.return @@ -511,9 +512,10 @@ func.func @replicated_parallel_execute(%arg0: tensor<*x!tf_type.resource> {tf.de tf_device.return }, { + // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_1" // CHECK: "tf.TPUExecute"(%[[R1]], %[[COMPILE]]#2) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_1" "tf_device.launch"() ({ "tf.TPUExecute"(%r1, %compile#2) : (tensor<128xf32>, tensor<2x!tf_type.string>) -> () tf_device.return diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir index e3191b5d9b6d6e..880703afd0fe5b 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-merge-variables-with-execute.mlir @@ -27,6 +27,7 @@ func.func @merge_same_device_variables( tf_device.return %0#0, %0#1 : tensor, tensor<2x!tf_type.string> }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<2x!tf_type.string>) // CHECK: %[[EXE:.*]] = "tf_device.launch" + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[ID_0]], %[[ARG_1]], %[[READ_2]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [0, 1], // CHECK-SAME: device_var_updates_indices = [0, -1] @@ -38,7 +39,7 @@ func.func @merge_same_device_variables( tf_device.return %0#0, %0#1 : tensor<32xf32>, tensor<16xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> (tensor<32xf32>, tensor<16xf32>) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} + // CHECK-NEXT: }) "tf.AssignVariableOp"(%id0, %execute#0) : (tensor<*x!tf_type.resource>>, tensor<32xf32>) -> () // CHECK-NEXT: "tf.AssignVariableOp"(%[[ARG_2]], %[[EXE]]) "tf.AssignVariableOp"(%arg2, %execute#1) : (tensor<*x!tf_type.resource>>, tensor<16xf32>) -> () @@ -71,6 +72,7 @@ func.func @merge_replicated_variables( // CHECK: tf_device.replicate([%[[ARG_1]], %[[ARG_2]]] as %[[R_ARG:.*]]: tensor<*x!tf_type.resource>>) tf_device.replicate([%arg1, %arg2] as %r: tensor<*x!tf_type.resource>>) {n = 2 : i32} { // CHECK-NEXT: "tf_device.launch" + // CHECK-SAME: <{device = ""}> // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[READ_0]], %[[R_ARG]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [1], // CHECK-SAME: device_var_updates_indices = [0] @@ -81,7 +83,7 @@ func.func @merge_replicated_variables( tf_device.return %0 : tensor<32xf32> }) {device = ""} : () -> tensor<32xf32> // CHECK-NEXT: tf_device.return - // CHECK-NEXT: }) {device = ""} + // CHECK-NEXT: }) "tf.AssignVariableOp"(%r, %execute) : (tensor<*x!tf_type.resource>>, tensor<32xf32>) -> () // CHECK-NEXT: tf_device.return tf_device.return @@ -130,6 +132,7 @@ func.func @interfering_accesses( tf_device.return %0#0, %0#1 : tensor, tensor<2x!tf_type.string> }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<2x!tf_type.string>) // CHECK: %[[EXE:.*]]:2 = "tf_device.launch" + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[READ_0]], %[[ARG_1]], %[[ARG_4]], %[[READ_5]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [1, 2], // CHECK-SAME: device_var_updates_indices = [1, -1] @@ -142,7 +145,7 @@ func.func @interfering_accesses( tf_device.return %0#0, %0#1, %0#2 : tensor<32xf32>, tensor<64xf32>, tensor<8xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> (tensor<32xf32>, tensor<64xf32>, tensor<8xf32>) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} + // CHECK-NEXT: }) "tf.AssignVariableOp"(%arg1, %execute#1) : (tensor<*x!tf_type.resource>>, tensor<64xf32>) -> () // CHECK-NEXT: "tf.AssignVariableOp"(%[[ARG_0]], %[[EXE]]#0) "tf.AssignVariableOp"(%arg0, %execute#0) : (tensor<*x!tf_type.resource>>, tensor<32xf32>) -> () @@ -197,6 +200,7 @@ func.func @non_interfering_accesses( tf_device.return %0#0, %0#1 : tensor, tensor<2x!tf_type.string> }) {device = "/job:worker/replica:0/task:0/device:CPU:0"} : () -> (tensor, tensor<2x!tf_type.string>) // CHECK: %[[EXE:.*]] = "tf_device.launch" + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[ARG_0]], %[[ARG_1]], %[[ARG_3]], %[[ARG_4]], %[[COMPILE]]#1) // CHECK-SAME: device_var_reads_indices = [0, 1, 2, 3], // CHECK-SAME: device_var_updates_indices = [0, 1, -1, -1] @@ -209,7 +213,7 @@ func.func @non_interfering_accesses( tf_device.return %0#0, %0#1, %0#2 : tensor<32xf32>, tensor<64xf32>, tensor<8xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> (tensor<32xf32>, tensor<64xf32>, tensor<8xf32>) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} + // CHECK-NEXT: }) // CHECK-NEXT: %[[READ:.*]] = "tf.ReadVariableOp"(%[[ARG_3]]) %read3 = "tf.ReadVariableOp"(%arg3) : (tensor<*x!tf_type.resource>>) -> tensor<8xf32> // CHECK-NEXT: "tf.AssignVariableOp"(%[[ARG_3]], %[[EXE]]) @@ -236,6 +240,7 @@ func.func @do_not_merge_multi_read( // CHECK-NEXT: %[[READ_1:.*]] = "tf.ReadVariableOp"(%[[ARG_0]]) %read1 = "tf.ReadVariableOp"(%arg0) : (tensor<*x!tf_type.resource>>) -> tensor<32xf32> // CHECK-NEXT: %[[EXE:.*]] = "tf_device.launch" + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecute"(%[[READ_0]], %[[READ_1]], %[[ARG_1]]) %execute = "tf_device.launch"() ({ %0 = "tf.TPUExecute"(%read0, %read1, %arg1) { @@ -244,7 +249,7 @@ func.func @do_not_merge_multi_read( tf_device.return %0 : tensor<32xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> tensor<32xf32> // CHECK-NEXT: tf_device.return - // CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} + // CHECK-NEXT: }) // CHECK-NEXT: "tf.AssignVariableOp"(%[[ARG_0]], %[[EXE]]) "tf.AssignVariableOp"(%arg0, %execute) : (tensor<*x!tf_type.resource>>, tensor<32xf32>) -> () // CHECK-NEXT: return @@ -265,6 +270,7 @@ func.func @do_not_merge_multi_assign( // CHECK-NEXT: %[[READ_0:.*]] = "tf.ReadVariableOp"(%[[ARG_0]]) %read0 = "tf.ReadVariableOp"(%arg0) : (tensor<*x!tf_type.resource>>) -> tensor<32xf32> // CHECK-NEXT: %[[EXE:.*]]:2 = "tf_device.launch" + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecute"(%[[READ_0]], %[[ARG_1]]) %execute:2 = "tf_device.launch"() ({ %0:2 = "tf.TPUExecute"(%read0, %arg1) { @@ -273,7 +279,7 @@ func.func @do_not_merge_multi_assign( tf_device.return %0#0, %0#1 : tensor<32xf32>, tensor<32xf32> }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> (tensor<32xf32>, tensor<32xf32>) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} + // CHECK-NEXT: }) // CHECK-NEXT: "tf.AssignVariableOp"(%[[ARG_0]], %[[EXE]]#0) "tf.AssignVariableOp"(%arg0, %execute#0) : (tensor<*x!tf_type.resource>>, tensor<32xf32>) -> () // CHECK-NEXT: "tf.AssignVariableOp"(%[[ARG_0]], %[[EXE]]#1) @@ -301,22 +307,22 @@ func.func @parallel_execute( // CHECK: "tf_device.parallel_execute" %pe:2 = "tf_device.parallel_execute"() ({ // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:localhost/replica:0/task:0/device:TPU:0" %execute0 = "tf_device.launch"() ({ // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[ARG_0]], %[[ARG_2]]) %0 = "tf.TPUExecute"(%read0, %arg2) : (tensor<32xf32>, tensor) -> tensor<32xf32> // CHECK-NEXT: tf_device.return tf_device.return %0 : tensor<32xf32> - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:TPU:0" }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> tensor<32xf32> tf_device.return %execute0 : tensor<32xf32> }, { // CHECK: "tf_device.launch" + // CHECK-SAME: device = "/job:localhost/replica:0/task:0/device:TPU:1" %execute1 = "tf_device.launch"() ({ // CHECK-NEXT: "tf.TPUExecuteAndUpdateVariables"(%[[ARG_1]], %[[ARG_2]]) %1 = "tf.TPUExecute"(%read1, %arg2) : (tensor<64xf32>, tensor) -> tensor<64xf32> // CHECK-NEXT: tf_device.return tf_device.return %1 : tensor<64xf32> - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:TPU:1" }) {device = "/job:localhost/replica:0/task:0/device:TPU:1"} : () -> tensor<64xf32> tf_device.return %execute1 : tensor<64xf32> }) : () -> (tensor<32xf32>, tensor<64xf32>) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir index 55949182f5d291..9a903d73b76b8b 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir @@ -61,9 +61,9 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK: %[[ID:.*]] = "tf.Identity"(%[[R0]]) %id = "tf.Identity"(%arg30) : (tensor<*x!tf_type.resource>>) -> tensor<*x!tf_type.resource>> // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK-NEXT: "tf.TPUReshardVariables"(%[[ID]], %[[R1]], %[[COMPILE]]#1, %[[R_STATE]]) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" // CHECK: "tf.TPUExecuteAndUpdateVariables"(%[[ID]], %[[R1]], %[[COMPILE]]#1) "tf_device.launch"() ({ "tf.TPUExecuteAndUpdateVariables"(%id, %arg31, %compile#1) @@ -84,9 +84,9 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-SAME: [%[[STATE0]], %[[STATE1]]] as %[[STATE:.*]]: tensor>> // CHECK-SAME: devices = {TPU_REPLICATED_CORE_0 = ["/device:TPU:0", "/device:TPU:1"] // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK-NEXT: "tf.TPUReshardVariables"(%[[V0]], %[[V1]], %[[DEFAULT]], %[[STATE]]) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" func.return } } @@ -296,9 +296,9 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr %id = "tf.Identity"(%arg30) : (tensor<*x!tf_type.resource>>) -> tensor<*x!tf_type.resource>> // CHECK: "tf_device.parallel_execute" // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK-NEXT: "tf.TPUReshardVariables"(%[[ID]], %[[R1]], %[[COMPILE]]#1, %[[R_STATE]]) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" // CHECK: "tf.TPUExecuteAndUpdateVariables"(%[[ID]], %[[R1]], %[[COMPILE]]#1) "tf_device.parallel_execute"() ({ "tf_device.launch"() ({ @@ -324,9 +324,9 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-SAME: [%[[STATE0]], %[[STATE1]]] as %[[STATE:.*]]: tensor>> // CHECK-SAME: devices = {TPU_REPLICATED_CORE_0 = ["/device:TPU:0", "/device:TPU:1"] // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK-NEXT: "tf.TPUReshardVariables"(%[[V0]], %[[V1]], %[[DEFAULT]], %[[STATE]]) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" func.return } } @@ -391,9 +391,9 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK: %[[ID:.*]] = "tf.Identity"(%[[R0]]) %id = "tf.Identity"(%arg30) : (tensor<*x!tf_type.resource>>) -> tensor<*x!tf_type.resource>> // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK-NEXT: "tf.TPUReshardVariables"(%[[ID]], %[[R1]], %[[COMPILE]]#1, %[[R_STATE]]) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" // CHECK: "tf.TPUExecuteAndUpdateVariables"(%[[ID]], %[[R1]], %[[COMPILE]]#1) "tf_device.launch"() ({ "tf.TPUExecuteAndUpdateVariables"(%id, %arg31, %compile#1) @@ -414,9 +414,9 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-SAME: %[[ARG2]] as %[[V1:.*]]: tensor<*x!tf_type.resource>> // CHECK-SAME: devices = {TPU_REPLICATED_CORE_0 = ["/device:TPU:0", "/device:TPU:1"] // CHECK: "tf_device.launch" + // CHECK-SAME: device = "TPU_REPLICATED_CORE_0" // CHECK-NEXT: "tf.TPUReshardVariables"(%[[V0]], %[[V1]], %[[DEFAULT]], %[[STATE]]) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" func.return } } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_colocate_composite_resource_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_colocate_composite_resource_ops.mlir index b2896fa543f31d..62fe231555b647 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_colocate_composite_resource_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_colocate_composite_resource_ops.mlir @@ -13,9 +13,9 @@ func.func @testReadVariableOpColocated(%arg0: tensor<*x!tf_type.resource>>) -> tensor<4xf32> %1 = "tf.A"() : () -> (tensor<2x!tf_type.string>) "tf_device.launch"() ({ @@ -43,9 +43,9 @@ func.func @testReadVariableOpAfterIdentityColocated(%arg0: tensor<*x!tf_type.res n = 2 : i32} { // CHECK: %[[IDENTITY_OUT:.*]] = "tf.Identity"(%[[RI_0]]) // CHECK: %[[RESOURCE_OUT:.*]] = "tf_device.launch"() + // CHECK-SAME: TPU_REPLICATED_CORE_0 // CHECK-NEXT: %[[READ_OUT:.*]] = "tf.ReadVariableOp"(%[[IDENTITY_OUT]]) // CHECK-NEXT: tf_device.return %[[READ_OUT]] - // CHECK-NEXT: TPU_REPLICATED_CORE_0 %0 = "tf.Identity"(%arg1) : (tensor<*x!tf_type.resource>>) -> tensor<*x!tf_type.resource>> %1 = "tf.ReadVariableOp"(%0) : (tensor<*x!tf_type.resource>>) -> tensor<4xf32> %2 = "tf.A"() : () -> (tensor<2x!tf_type.string>) @@ -77,9 +77,9 @@ func.func @testAssignVariableOpColocated(%arg0: tensor<*x!tf_type.resource tensor<4xf32> // CHECK: "tf_device.launch"() + // CHECK-SAME: TPU_REPLICATED_CORE_0 // CHECK-NEXT: "tf.AssignVariableOp"(%[[RI_0]], %[[VAL_OUT]]) - // CHECK-NEXT: tf_device.return - // CHECK-NEXT: TPU_REPLICATED_CORE_0 + // CHECK: tf_device.return %1 = "tf.A"() : () -> (tensor<4xf32>) "tf.AssignVariableOp"(%arg1, %1) : (tensor<*x!tf_type.resource>>, tensor<4xf32>) -> () %2 = "tf.B"() : () -> (tensor<2x!tf_type.string>) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir index 8b128e5495a1f1..9796913d50cc15 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir @@ -611,9 +611,9 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @no_replication_device func.func @no_replication_device() { "tf_device.cluster_func"() {_xla_compile_device_type = "TPU", _replication_info = "__no_replication_cluster", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device = "/job:worker/replica:0/task:0/device:TPU:1", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = [], use_spmd_for_xla_partitioning = false} : () -> () + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:1"}> // CHECK: tf.TPUExecute // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:TPU:1" func.return } func.func @empty_func() { @@ -629,9 +629,9 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: func @no_replication_device func.func @no_replication_device() { "tf_device.cluster_func"() {_xla_compile_device_type = "TPU", _replication_info = "__no_replication_cluster", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device = "/job:worker/replica:0/task:0/device:CPU:0", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = [], use_spmd_for_xla_partitioning = false} : () -> () + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:0"}> // CHECK: tf.TPUExecute // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:worker/replica:0/task:0/device:TPU:0" func.return } func.func @empty_func() { @@ -709,20 +709,18 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor %1 = "tf_device.cluster_func"(%0) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor) -> tensor // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]]) - // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]]) // CHECK-SAME: metadata // CHECK-SAME: mlir_module // CHECK-SAME: func @main // CHECK-SAME: tf.B // CHECK-NOT: func = @tpu0_func - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK-SAME: device = "/job:worker/replica:0/task:0/device:TPU:0" // CHECK-NEXT: "tf.TPUExecute"(%[[A_OUTPUT]], %[[COMPILE_OUTPUT]]#1) - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:0" %2 = "tf.C"(%1) : (tensor) -> tensor // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[EXECUTE_OUTPUT]]) @@ -756,17 +754,15 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-SAME: n = 2 %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[RI_0]]) - // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]]) // CHECK-SAME: metadata // CHECK-SAME: mlir_module // CHECK-SAME: func @main // CHECK-SAME: tf.B // CHECK-NOT: func = @tpu0_func - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" // CHECK: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf_device.launch" // CHECK-NEXT: "tf.TPUExecute"(%[[RI_0]], %[[COMPILE_OUTPUT]]#1) %2 = "tf_device.cluster_func"(%ri_0) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor) -> tensor @@ -799,8 +795,8 @@ module attributes {tf.versions = {producer = 888 : i32}} { %1 = "tf_device.cluster_func"(%0) {device = "gpu0", func = @gpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor) -> tensor // CHECK: tf_device.cluster_func - // CHECK-SAME: device = "gpu0" // CHECK-SAME: func = @gpu0_func + // CHECK-SAME: device = "gpu0" // CHECK-SAME: num_cores_per_replica = 1 // CHECK-SAME: step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP" // CHECK-NOT: metadata @@ -826,7 +822,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor %1 = "tf_device.cluster_func"(%0) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor) -> tensor // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]]) - // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]]) // CHECK-SAME: metadata // CHECK-SAME: mlir_module @@ -835,13 +831,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-SAME: func private @nested_func // CHECK-SAME: tf.D // CHECK-NOT: func = @tpu0_func - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) - // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0" - // CHECK: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecute"(%[[A_OUTPUT]], %[[COMPILE_OUTPUT]]#1) - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:0" %2 = "tf.C"(%1) : (tensor) -> tensor // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[EXECUTE_OUTPUT]]) @@ -1198,14 +1191,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:3 = "tf_device.launch" // CHECK-NEXT: "tf._TPUCompileMlir"() // CHECK: "tf_device.launch" - // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) + // CHECK: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) // CHECK: [[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecute"(%[[READ_VAR_0]], %[[COMPILE_OUTPUT]]#1) - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:1"}> // CHECK-NEXT: "tf.TPUExecute"(%[[READ_VAR_1]], %[[COMPILE_OUTPUT]]#2) - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:1" %computation = "tf_device.cluster_func"(%partitioned_input) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @computation, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\01\01\02\10\01\18\02\22\08\00\00\00\00\00\00\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1], input_sharding_configuration = [""], output_sharding_configuration = [""], use_spmd_for_xla_partitioning = true} : (tensor) -> tensor // CHECK-NOT: tf.TPUPartitionedOutputV2 %partitioned_output:2 = "tf.TPUPartitionedOutputV2"(%computation) {N = 2 : i64, partition_dims = []} : (tensor) -> (tensor, tensor) @@ -1238,12 +1229,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: "tf_device.launch" // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUTPUT]]#0) // CHECK: [[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecute"(%[[READ_VAR_0]], %[[COMPILE_OUTPUT]]#1) - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:worker/replica:0/task:0/device:TPU:1"}> // CHECK-NEXT: "tf.TPUExecute"(%[[READ_VAR_1]], %[[COMPILE_OUTPUT]]#2) - // CHECK: device = "/job:worker/replica:0/task:0/device:TPU:1" %computation = "tf_device.cluster_func"(%partitioned_input) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @computation, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\01\01\02\10\01\18\02\22\08\00\00\00\00\00\00\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1], input_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01"], output_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01"], use_spmd_for_xla_partitioning = true} : (tensor<3x4xf32>) -> tensor<3x4xf32> // CHECK-NOT: tf.TPUPartitionedOutputV2 %partitioned_output:2 = "tf.TPUPartitionedOutputV2"(%computation) {_XlaSharding = "\08\03\1A\02\01\02\22\02\00\01", partition_dims = [1, 2]} : (tensor<3x4xf32>) -> (tensor<3x2xf32>, tensor<3x2xf32>) @@ -1443,10 +1432,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: "tf_device.parallel_execute" // CHECK-NOT:"tf._XlaCompileMlirPlaceholderProgramKey" // CHECK: "tf.D"(%[[COMPILE_OUTPUT]]#1 + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK: "tf.TPUExecute" - // CHECK: device = "TPU_REPLICATED_CORE_0" + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK: "tf.TPUExecute" - // CHECK: device = "TPU_REPLICATED_CORE_1" // CHECK-NOT: "tf.TPUExecute" %3 = "tf_device.parallel_execute"() ({ %program = "tf._XlaCompileMlirPlaceholderProgramKey"() : () -> tensor<3x!tf_type.string> @@ -1485,10 +1474,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: "tf._TPUCompileMlir" // CHECK: "tf.TPUCompileSucceededAssert" // CHECK: "tf_device.parallel_execute" + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK: "tf.TPUExecute" - // CHECK: device = "TPU_REPLICATED_CORE_0" + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK: "tf.TPUExecute" - // CHECK: device = "TPU_REPLICATED_CORE_1" // CHECK-NOT: "tf.TPUExecute" // CHECK-NOT:"tf._XlaCompileMlirPlaceholderProgramKey" // CHECK: "tf.D"(%[[COMPILE_OUTPUT]]#1 @@ -1524,23 +1513,19 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:0/device:TPU_SYSTEM:0"]} { // CHECK-LABEL: func @non_replicated_parallel_execute func.func @non_replicated_parallel_execute(%arg0: tensor<8xi32>) -> tensor<8xi32> { - // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"() // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.parallel_execute" - // CHECK-NEXT: "tf_device.launch" + // CHECK-NEXT: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: "tf.TPUExecute" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:TPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:1"}> // CHECK-NEXT: "tf.TPUExecute" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:TPU:1" %0 = "tf_device.cluster_func"(%arg0) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\01\01\02\10\01\18\02\22\08\00\00\00\00\00\00\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor<8xi32>) -> tensor<8xi32> func.return %0 : tensor<8xi32> } @@ -1587,23 +1572,19 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK: tf_device.replicate // CHECK-SAME: devices = {TPU_REPLICATED_CORE_0 = ["/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:1/device:TPU:1"], TPU_REPLICATED_CORE_1 = ["/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:1/device:TPU:0"], TPU_REPLICATED_HOST_0 = ["/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:1/device:CPU:0"], TPU_REPLICATED_HOST_1 = ["/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:1/device:CPU:0"]} %0:2 = tf_device.replicate([%arg0, %arg1] as %ri: tensor<8xi32>) {n = 2 : i32} { - // CHECK-NEXT: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK-NEXT: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"() // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: "tf_device.parallel_execute" - // CHECK-NEXT: "tf_device.launch" + // CHECK-NEXT: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK-NEXT: "tf.TPUExecute" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: "tf.TPUExecute" // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_1" %1 = "tf_device.cluster_func"(%ri) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor<8xi32>) -> tensor<8xi32> tf_device.return %1 : tensor<8xi32> } @@ -1632,13 +1613,11 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" // CHECK: "tf._TPUCompileMlir" // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" - // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK-NEXT: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf.TPUExecute"(%[[RI_0]], %[[RI_1]], %[[RI_2]], %[[COMPILE]]#1) // CHECK-NEXT: tf_device.return %[[EXECUTE_OUTPUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: "tf.TPUExecute"(%[[RI_1]], %[[RI_2]], %[[COMPILE]]#2) - // CHECK: device = "TPU_REPLICATED_CORE_1" %1 = "tf_device.cluster_func"(%ri, %ri2, %ri3) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "", ""], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor<8xi32>, tensor<*xi1>, tensor<*xi32>) -> tensor<8xi32> tf_device.return %1 : tensor<8xi32> } @@ -1663,20 +1642,16 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: TPU_REPLICATED_CORE_0 = ["/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:1/device:TPU:1"] // CHECK-SAME: TPU_REPLICATED_CORE_1 = ["/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:1/device:TPU:0"] %0:2 = tf_device.replicate([%arg0, %arg1] as %ri: tensor<8xi32>) {n = 2 : i32} { - // CHECK-NEXT: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK-NEXT: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"() - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" - // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK-NEXT: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_OUTPUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: "tf.TPUExecute" - // CHECK: device = "TPU_REPLICATED_CORE_1" %1 = "tf_device.cluster_func"(%ri) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor<8xi32>) -> tensor<8xi32> tf_device.return %1 : tensor<8xi32> } @@ -1700,21 +1675,17 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: TPU_REPLICATED_CORE_0 = ["/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:1/device:TPU:1"] // CHECK-SAME: TPU_REPLICATED_CORE_1 = ["/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:1/device:TPU:0"] %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri: tensor<8xi32>) {n = 2 : i32} { - // CHECK-NEXT: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK-NEXT: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir"() - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:3 = "tf_device.parallel_execute" - // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK-NEXT: %[[EXECUTE_0_OUTPUT:[0-9]*]]:2 = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_0_OUTPUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_1_OUTPUT]] - // CHECK: device = "TPU_REPLICATED_CORE_1" %1, %2 = "tf_device.cluster_func"(%ri) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""], use_spmd_for_xla_partitioning = false} : (tensor<8xi32>) -> (tensor<*xi32>, tensor<*xi1>) tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1> } @@ -1763,25 +1734,21 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: TPU_REPLICATED_CORE_0 = ["/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:1/device:TPU:1"] // CHECK-SAME: TPU_REPLICATED_CORE_1 = ["/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:1/device:TPU:0"] %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // // CHECK: %[[CONST_SPLIT_DIM:.*]] = "tf.Const"() // CHECK: %[[SPLIT_OUT:[a-z0-9]+]]:2 = "tf.Split"(%[[CONST_SPLIT_DIM]], %[[RI_0]]) // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:3 = "tf_device.parallel_execute" - // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // // CHECK-NEXT: %[[EXECUTE_0_OUTPUT:[0-9]*]]:2 = "tf.TPUExecute"(%[[SPLIT_OUT]]#0, %[[COMPILE]]#1) // CHECK-NEXT: tf_device.return %[[EXECUTE_0_OUTPUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute"(%[[SPLIT_OUT]]#1, %[[RI_1]], %[[COMPILE]]#2) // CHECK-NEXT: tf_device.return %[[EXECUTE_1_OUTPUT]] - // CHECK: device = "TPU_REPLICATED_CORE_1" %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""], use_spmd_for_xla_partitioning = false} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>) tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1> } @@ -1830,22 +1797,18 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: TPU_REPLICATED_CORE_0 = ["/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:1/device:TPU:1"] // CHECK-SAME: TPU_REPLICATED_CORE_1 = ["/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:1/device:TPU:0"] %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:3 = "tf_device.parallel_execute" - // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK-NEXT: %[[EXECUTE_0_OUTPUT:[0-9]*]]:2 = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_0_OUTPUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_1_OUTPUT]] - // CHECK: device = "TPU_REPLICATED_CORE_1" // // CHECK: %[[CONST_CONCAT_DIM:.*]] = "tf.Const"() // CHECK: %[[CONCAT_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT_DIM]], %[[PARALLEL_EXECUTE_OUTPUT]]#0, %[[PARALLEL_EXECUTE_OUTPUT]]#2 @@ -1899,22 +1862,18 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: TPU_REPLICATED_CORE_0 = ["/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:1/device:TPU:1"] // CHECK-SAME: TPU_REPLICATED_CORE_1 = ["/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:1/device:TPU:0"] %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:3 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:3 = "tf_device.parallel_execute" - // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch" + // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> // CHECK-NEXT: %[[EXECUTE_0_OUTPUT:[0-9]*]]:2 = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_0_OUTPUT]] - // CHECK-NEXT: device = "TPU_REPLICATED_CORE_0" - // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> // CHECK-NEXT: %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute" // CHECK-NEXT: tf_device.return %[[EXECUTE_1_OUTPUT]] - // CHECK: device = "TPU_REPLICATED_CORE_1" // // CHECK: %[[CONST_CONCAT_DIM:.*]] = "tf.Const"() // CHECK: %[[CONCAT_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT_DIM]], %[[PARALLEL_EXECUTE_OUTPUT]]#1, %[[PARALLEL_EXECUTE_OUTPUT]]#2 @@ -2091,12 +2050,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: [%[[ARG_0]], %[[ARG_1]]] as %[[RI_0:[a-z0-9]*]]: tensor<128x10xf32> // CHECK-SAME: [%[[ARG_2]], %[[ARG_3]]] as %[[RI_1:[a-z0-9]*]]: tensor<*xi32> %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[CONST_SPLIT_0_DIM:.*]] = "tf.Const"() // CHECK: %[[SPLIT_0_OUT:[a-z0-9]+]]:2 = "tf.Split"(%[[CONST_SPLIT_0_DIM]], %[[RI_0]]) // CHECK: %[[CONST_SPLIT_1_DIM:.*]] = "tf.Const"() @@ -2198,12 +2155,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: [%[[ARG_0]], %[[ARG_1]]] as %[[RI_0:[a-z0-9]*]]: tensor<128x10xf32> // CHECK-SAME: [%[[ARG_2]], %[[ARG_3]]] as %[[RI_1:[a-z0-9]*]]: tensor<*xi32> %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[CONST_SPLIT_0_DIM:.*]] = "tf.Const"() // CHECK: %[[SPLIT_0_OUT:[a-z0-9]+]]:2 = "tf.Split"(%[[CONST_SPLIT_0_DIM]], %[[RI_0]]) // CHECK: %[[CONST_SPLIT_1_DIM:.*]] = "tf.Const"() @@ -2282,12 +2237,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: [%[[ARG_0]], %[[ARG_1]]] as %[[RI_0:[a-z0-9]*]]: tensor<128x10xf32> // CHECK-SAME: [%[[ARG_2]], %[[ARG_3]]] as %[[RI_1:[a-z0-9]*]]: tensor<*xi32> %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:5 = "tf_device.parallel_execute" // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch" // CHECK-NEXT: %[[EXECUTE_0_OUTPUT:[0-9]*]]:2 = "tf.TPUExecute" @@ -2367,12 +2320,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: [%[[ARG_0]], %[[ARG_1]]] as %[[RI_0:[a-z0-9]*]]: tensor<128x10xf32> // CHECK-SAME: [%[[ARG_2]], %[[ARG_3]]] as %[[RI_1:[a-z0-9]*]]: tensor<*xi32> %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[CONST_SPLIT_0_DIM:.*]] = "tf.Const"() // CHECK: %[[SPLIT_0_OUT:[a-z0-9]+]]:2 = "tf.Split"(%[[CONST_SPLIT_0_DIM]], %[[RI_0]]) // CHECK: %[[CONST_SPLIT_1_DIM:.*]] = "tf.Const"() @@ -2451,12 +2402,10 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc // CHECK-SAME: [%[[ARG_0]], %[[ARG_1]]] as %[[RI_0:[a-z0-9]*]]: tensor<128x10xf32> // CHECK-SAME: [%[[ARG_2]], %[[ARG_3]]] as %[[RI_1:[a-z0-9]*]]: tensor<*xi32> %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} { - // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch" + // CHECK: %[[COMPILE:[a-z0-9]+]]:5 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf._TPUCompileMlir" - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: "tf_device.launch" + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE]]#0) - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:5 = "tf_device.parallel_execute" // CHECK-NEXT: %[[LAUNCH_0_OUTPUT:[0-9]*]]:2 = "tf_device.launch" // CHECK-NEXT: %[[EXECUTE_0_OUTPUT:[0-9]*]]:2 = "tf.TPUExecute" @@ -2613,14 +2562,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:0/device:TPU_SYSTEM:0"}, tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 1199 : i32}} { func.func @return_from_host_and_tpu() -> (tensor, tensor) attributes {tf._construction_context = "kEagerRuntime", tf.signature.is_stateful} { // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2 = "tf_device.parallel_execute" - // CHECK: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[LAUNCH_0_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" // CHECK: tf_device.return %[[B_OUTPUT:[0-9]*]] - // CHECK: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch" + // CHECK: %[[LAUNCH_1_OUTPUT:[0-9]*]] = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> // CHECK-NEXT: %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute" // CHECK: tf_device.return %[[EXECUTE_1_OUTPUT]] - // CHECK: device = "/job:localhost/replica:0/task:0/device:TPU:0" // CHECK: return %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]#1, %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]#0 %0:2 = "tf_device.parallel_execute"() ({ %1 = "tf_device.launch"() ({ diff --git a/tensorflow/compiler/mlir/tensorflow/tests/xla_rewrite_v2.mlir b/tensorflow/compiler/mlir/tensorflow/tests/xla_rewrite_v2.mlir index e79eb9f0b1fbab..e36bdaa72e41b8 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/xla_rewrite_v2.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/xla_rewrite_v2.mlir @@ -5,11 +5,11 @@ module attributes {tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/ // CHECK-LABEL: func.func @convert_cluster_func func.func @convert_cluster_func(%arg0: tensor) -> tensor { // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaCompile"(%arg0) <{function = @func, must_compile = true, operandSegmentSizes = array}> : (tensor) -> (tensor<3x!tf_type.string>, tensor) - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaRun"(%arg0, %0#0) : (tensor, tensor<3x!tf_type.string>) -> tensor - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} : () -> tensor %0 = "tf_device.cluster_func"(%arg0) {func = @func, device = "/job:localhost/replica:0/task:0/device:GPU:0"} : (tensor) -> tensor func.return %0 : tensor } @@ -25,11 +25,11 @@ module attributes {tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/ // CHECK-LABEL: func.func @convert_cluster_func_with_resources_in_order func.func @convert_cluster_func_with_resources_in_order(%arg0: tensor, %arg1: tensor) -> tensor { // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaCompile"(%arg1, %arg0) <{function = @func_with_resources_in_order, must_compile = true, operandSegmentSizes = array}> : (tensor, tensor) - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaRun"(%arg1, %arg0, %0#0) : (tensor, tensor, tensor<3x!tf_type.string>) -> tensor - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} : () -> tensor %0 = "tf_device.cluster_func"(%arg1, %arg0) {func = @func_with_resources_in_order, device = "/job:localhost/replica:0/task:0/device:GPU:0"} : (tensor, tensor) -> (tensor) func.return %0 : tensor } @@ -45,18 +45,18 @@ module attributes {tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/ // CHECK-LABEL: func.func @convert_cluster_func_with_resources func.func @convert_cluster_func_with_resources(%arg0: tensor, %arg1: tensor) -> tensor { // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaCompile"(%arg1, %arg0) <{function = @func_with_resources_1, must_compile = true, operandSegmentSizes = array}> : (tensor, tensor) -> (tensor<3x!tf_type.string>, tensor) - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaRun"(%arg1, %arg0, %0#0) : (tensor, tensor, tensor<3x!tf_type.string>) -> tensor - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} : () -> tensor %0 = "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_resources_1, device = "/job:localhost/replica:0/task:0/device:GPU:0"} : (tensor, tensor) -> tensor // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaCompile"(%arg1, %arg0) <{function = @func_with_resources_2, must_compile = true, operandSegmentSizes = array}> : (tensor, tensor) -> (tensor<3x!tf_type.string>, tensor) - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} // CHECK: "tf_device.launch"() + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaRun"(%arg1, %arg0, %2#0) : (tensor, tensor, tensor<3x!tf_type.string>) -> tensor - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} : () -> tensor %1 = "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_resources_2, device = "/job:localhost/replica:0/task:0/device:GPU:0"} : (tensor, tensor) -> tensor return %0 : tensor } @@ -77,16 +77,16 @@ module attributes {tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/ module attributes {tf.devices = ["/job:localhost/replica:0/task:0/device:CPU:0"], tf.versions = {producer = 888 : i32}} { func.func @outside_compilation_in_generic_pipeline(%arg0: tensor<2xi32>) -> tensor<2xi32> { // CHECK: tf_device.launch + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: "tf._XlaCompile"() <{function = @func, must_compile = true, operandSegmentSizes = array}> - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} // CHECK: tf_device.parallel_execute // CHECK: tf_device.launch + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK: tf.B // CHECK: tf._XlaSendFromHost - // CHECK: {device = "/job:localhost/replica:0/task:0/device:CPU:0"} // CHECK: tf_device.launch + // CHECK-SAME: <{device = "/job:localhost/replica:0/task:0/device:GPU:0"}> // CHECK: tf._XlaRun - // CHECK: {device = "/job:localhost/replica:0/task:0/device:GPU:0"} %0 = "tf_device.parallel_execute"() ({ "tf_device.launch"() ({ %1 = "tf._XlaCompileMlirPlaceholderProgramKey"() : () -> tensor<3x!tf_type.string> diff --git a/tensorflow/compiler/mlir/tfrt/tests/runtime_lowering_tpu.mlir b/tensorflow/compiler/mlir/tfrt/tests/runtime_lowering_tpu.mlir index 5225c2eed9bb09..d6ffe03d2708e5 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/runtime_lowering_tpu.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/runtime_lowering_tpu.mlir @@ -4,7 +4,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK-LABEL: @converts_cluster func.func @converts_cluster() { - // CHECK: %0:2 = "tf_device.launch"() ({ + // CHECK: %0:2 = "tf_device.launch"() <{{.*}}> ({ // CHECK: %compilation_status, %program = "tf._TPUCompileMlir"() "tf_device.cluster_func"() {_xla_compile_device_type = "TPU", _replication_info = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = [], use_spmd_for_xla_partitioning = false} : () -> () func.return @@ -26,4 +26,4 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor func.func @empty_func() { func.return } -} \ No newline at end of file +} diff --git a/tensorflow/dtensor/mlir/tests/move_compilation_to_host.mlir b/tensorflow/dtensor/mlir/tests/move_compilation_to_host.mlir index 165e037c9a3541..06f9b2fe103d14 100644 --- a/tensorflow/dtensor/mlir/tests/move_compilation_to_host.mlir +++ b/tensorflow/dtensor/mlir/tests/move_compilation_to_host.mlir @@ -53,7 +53,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-LABEL: func private @_func_1 // CHECK-SAME: %[[ARG0:.*]]: tensor func.func private @_func_1(%arg0: tensor) -> tensor { - // CHECK: %[[COMPILE_OUT:.*]]:2 = "tf_device.launch"() + // CHECK: %[[COMPILE_OUT:.*]]:2 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: %[[COMPILATION_STATUS:.*]], %[[PROGRAM_KEY:.*]] = "tf._TPUCompileMlir"() // CHECK-NEXT: "tf._HostSend"(%[[PROGRAM_KEY]]) // CHECK-SAME: recv_device = "/job:localhost/replica:0/task:0/device:CPU:0" @@ -72,12 +72,10 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-SAME: tensor_name = "compilation_send_recv_key_1 // CHECK-SAME: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK-NEXT: tf_device.return %[[COMPILATION_STATUS]], %[[PROGRAM_KEY]] - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK-NEXT: "tf_device.launch"() + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUT]]#0) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK-NEXT: %[[ID_TO_ORDINAL:.*]] = "tf.Const" + // CHECK: %[[ID_TO_ORDINAL:.*]] = "tf.Const" // CHECK-SAME: value = dense<0> // CHECK-NEXT: %[[SIZE_TYPE:.*]] = "tf.Const" // CHECK-SAME: value = dense<1> @@ -165,7 +163,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-LABEL: func private @_func_1 // CHECK-SAME: %[[ARG0:.*]]: tensor func.func private @_func_1(%arg0: tensor) -> tensor { - // CHECK: %[[COMPILE_OUT:.*]]:2 = "tf_device.launch"() + // CHECK: %[[COMPILE_OUT:.*]]:2 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: %[[COMPILATION_STATUS:.*]], %[[PROGRAM_KEY:.*]] = "tf._TPUCompileMlir"() // CHECK-NEXT: "tf._HostSend"(%[[PROGRAM_KEY]]) // CHECK-SAME: recv_device = "/job:localhost/replica:0/task:0/device:CPU:0" @@ -185,12 +183,10 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr // CHECK-SAME: tensor_name = "compilation_send_recv_key_1 // CHECK-SAME: device = "/job:localhost/replica:0/task:0/device:CPU:0" // CHECK-NEXT: tf_device.return %[[COMPILATION_STATUS]], %[[PROGRAM_KEY]] - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK-NEXT: "tf_device.launch"() + // CHECK: "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:CPU:0"}> // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%[[COMPILE_OUT]]#0) // CHECK-NEXT: tf_device.return - // CHECK-NEXT: device = "/job:localhost/replica:0/task:0/device:CPU:0" - // CHECK-NEXT: %[[ID_TO_ORDINAL:.*]] = "tf.Const" + // CHECK: %[[ID_TO_ORDINAL:.*]] = "tf.Const" // CHECK-SAME: value = dense<0> // CHECK-NEXT: %[[SIZE_TYPE:.*]] = "tf.Const" // CHECK-SAME: value = dense<1> diff --git a/tensorflow/dtensor/mlir/tests/multi_device_expansion.mlir b/tensorflow/dtensor/mlir/tests/multi_device_expansion.mlir index 339da803cf4e58..091a9b2d1ef6fd 100644 --- a/tensorflow/dtensor/mlir/tests/multi_device_expansion.mlir +++ b/tensorflow/dtensor/mlir/tests/multi_device_expansion.mlir @@ -133,25 +133,25 @@ module @test_inferred_resource_attributes attributes {dtensor.all_reduce_combine // CHECK-SAME: %arg0: tensor<1x2xi32> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"} // CHECK-SAME: %arg1: tensor<1x2xi32> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:1"} // CHECK-SAME: -> (tensor<2xi32>, tensor<2xi32>) { -// CHECK-NEXT: %0:2 = "tf_device.launch"() ({ +// CHECK-NEXT: %0:2 = "tf_device.launch"() <{device = ""}> ({ // CHECK-NEXT: %compilation_status, %program = "tf._TPUCompileMlir"() <{metadata = ""}> : () -> (tensor, tensor<3x!tf_type.string>) // CHECK-NEXT: tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> -// CHECK-NEXT: }) {device = ""} : () -> (tensor, tensor<3x!tf_type.string>) -// CHECK-NEXT: "tf_device.launch"() ({ +// CHECK-NEXT: }) : () -> (tensor, tensor<3x!tf_type.string>) +// CHECK-NEXT: "tf_device.launch"() <{device = ""}> ({ // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%0#0) : (tensor) -> () // CHECK-NEXT: tf_device.return -// CHECK-NEXT: }) {device = ""} : () -> () +// CHECK-NEXT: }) : () -> () // CHECK-NEXT: %1:2 = "tf_device.parallel_execute"() ({ -// CHECK-NEXT: %2 = "tf_device.launch"() ({ +// CHECK-NEXT: %2 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> ({ // CHECK-NEXT: %3 = "tf.TPUExecute"(%arg0, %0#1) : (tensor<1x2xi32>, tensor<3x!tf_type.string>) -> tensor<2xi32> // CHECK-NEXT: tf_device.return %3 : tensor<2xi32> -// CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> tensor<2xi32> +// CHECK-NEXT: }) : () -> tensor<2xi32> // CHECK-NEXT: tf_device.return %2 : tensor<2xi32> // CHECK-NEXT: }, { -// CHECK-NEXT: %2 = "tf_device.launch"() ({ +// CHECK-NEXT: %2 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:1"}> ({ // CHECK-NEXT: %3 = "tf.TPUExecute"(%arg1, %0#1) : (tensor<1x2xi32>, tensor<3x!tf_type.string>) -> tensor<2xi32> // CHECK-NEXT: tf_device.return %3 : tensor<2xi32> -// CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:1"} : () -> tensor<2xi32> +// CHECK-NEXT: }) : () -> tensor<2xi32> // CHECK-NEXT: tf_device.return %2 : tensor<2xi32> // CHECK-NEXT: }) : () -> (tensor<2xi32>, tensor<2xi32>) // CHECK-NEXT: return %1#0, %1#1 : tensor<2xi32>, tensor<2xi32> @@ -189,25 +189,25 @@ module attributes {dtensor.all_reduce_combiner.num_ops_in_group = 0 : i64, dtens // CHECK-SAME: %arg1: tensor {tf.device = "/job:localhost/replica:0/task:0/device:TPU:1"} // CHECK-SAME: %arg2: tensor>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:0"} // CHECK-SAME: %arg3: tensor>> {tf.device = "/job:localhost/replica:0/task:0/device:TPU:1"} -// CHECK-NEXT: %0:2 = "tf_device.launch"() ({ +// CHECK-NEXT: %0:2 = "tf_device.launch"() <{device = ""}> ({ // CHECK-NEXT: %compilation_status, %program = "tf._TPUCompileMlir"() <{metadata = ""}> : () -> (tensor, tensor<3x!tf_type.string>) // CHECK-NEXT: tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string> -// CHECK-NEXT: }) {device = ""} : () -> (tensor, tensor<3x!tf_type.string>) -// CHECK-NEXT: "tf_device.launch"() ({ +// CHECK-NEXT: }) : () -> (tensor, tensor<3x!tf_type.string>) +// CHECK-NEXT: "tf_device.launch"() <{device = ""}> ({ // CHECK-NEXT: "tf.TPUCompileSucceededAssert"(%0#0) : (tensor) -> () // CHECK-NEXT: tf_device.return -// CHECK-NEXT: }) {device = ""} : () -> () +// CHECK-NEXT: }) : () -> () // CHECK-NEXT: %1:2 = "tf_device.parallel_execute"() ({ -// CHECK-NEXT: %2 = "tf_device.launch"() ({ +// CHECK-NEXT: %2 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:0"}> ({ // CHECK-NEXT: %3 = "tf.TPUExecute"(%arg0, %0#1) : (tensor, tensor<3x!tf_type.string>) -> tensor // CHECK-NEXT: tf_device.return %3 : tensor -// CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:0"} : () -> tensor +// CHECK-NEXT: }) : () -> tensor // CHECK-NEXT: tf_device.return %2 : tensor // CHECK-NEXT: }, { -// CHECK-NEXT: %2 = "tf_device.launch"() ({ +// CHECK-NEXT: %2 = "tf_device.launch"() <{device = "/job:localhost/replica:0/task:0/device:TPU:1"}> ({ // CHECK-NEXT: %3 = "tf.TPUExecute"(%arg1, %0#1) : (tensor, tensor<3x!tf_type.string>) -> tensor // CHECK-NEXT: tf_device.return %3 : tensor -// CHECK-NEXT: }) {device = "/job:localhost/replica:0/task:0/device:TPU:1"} : () -> tensor +// CHECK-NEXT: }) : () -> tensor // CHECK-NEXT: tf_device.return %2 : tensor // CHECK-NEXT: }) : () -> (tensor, tensor) // CHECK-NEXT: "tf.AssignVariableOp"(%arg2, %1#0) <{validate_shape = false}> {_global_shape = [], _layout = [], device = "/job:localhost/replica:0/task:0/device:TPU:0"} : (tensor>>, tensor) -> () diff --git a/tensorflow/dtensor/mlir/tests/update_tpu_metadata.mlir b/tensorflow/dtensor/mlir/tests/update_tpu_metadata.mlir index c7632cad45a4d8..98322240757575 100644 --- a/tensorflow/dtensor/mlir/tests/update_tpu_metadata.mlir +++ b/tensorflow/dtensor/mlir/tests/update_tpu_metadata.mlir @@ -9,9 +9,9 @@ func.func @main() { func.func @f_callee() { // CHECK: tf_device.launch + // CHECK: device = "" // CHECK: "tf._TPUCompileMlir" // CHECK-SAME: metadata = "\0A\09\08\01\12\05\12\03\08\80\01\18\04 \01" - // CHECK: device = "" %0:2 = "tf_device.launch"() ({ %1, %2 = "tf._TPUCompileMlir"() { NumDynamicShapes = 0 : i64, @@ -20,7 +20,7 @@ func.func @f_callee() { tf_device.return %1, %2 : tensor, tensor<2x!tf_type.string> }) {device = "tpu_host:0"} : () -> (tensor, tensor<2x!tf_type.string>) - // CHECK-NEXT: "tf.TPUExecute" + // CHECK: "tf.TPUExecute" "tf.TPUExecute"(%0#1) : (tensor<2x!tf_type.string>) -> () func.return } @@ -36,8 +36,8 @@ func.func @main() { func.func @f_callee() { // CHECK: tf_device.launch - // CHECK: "tf._TPUCompileMlir" // CHECK: device = "" + // CHECK: "tf._TPUCompileMlir" %0:2 = "tf_device.launch"() ({ %1, %2 = "tf._TPUCompileMlir"() { NumDynamicShapes = 0 : i64, @@ -47,8 +47,8 @@ func.func @f_callee() { }) {device = "tpu_host:0"} : () -> (tensor, tensor<2x!tf_type.string>) // CHECK: tf_device.launch - // CHECK: "tf.TPUExecute" // CHECK: device = "" + // CHECK: "tf.TPUExecute" "tf_device.launch"() ({ "tf.TPUExecute"(%0#1) : (tensor<2x!tf_type.string>) -> () tf_device.return @@ -83,24 +83,24 @@ func.func @f_callee() { // ----- -// Check for Xla Spmd mesh that TPUCompileOp has correct metadata proto and +// Check for Xla Spmd mesh that TPUCompileOp has correct metadata proto and // number of program outputs is equal to number of devices on mesh. // CHECK-LABEL: func @main func.func @main(%arg0: tensor, %arg1: tensor<12x24xf32>) -> (tensor<12x24xf32>) { %0 = "tf.StatefulPartitionedCall"(%arg1) { - config = "|x=2,y=4|0,1,2,3,4,5,6,7|0,1,2,3,4,5,6,7|/job:localhost/replica:0/task:0/device:TPU:0,/job:localhost/replica:0/task:0/device:TPU:1,/job:localhost/replica:0/task:0/device:TPU:2,/job:localhost/replica:0/task:0/device:TPU:3,/job:localhost/replica:0/task:0/device:TPU:4,/job:localhost/replica:0/task:0/device:TPU:5,/job:localhost/replica:0/task:0/device:TPU:6,/job:localhost/replica:0/task:0/device:TPU:7|use_xla_spmd", - config_proto = "", - executor_type = "", + config = "|x=2,y=4|0,1,2,3,4,5,6,7|0,1,2,3,4,5,6,7|/job:localhost/replica:0/task:0/device:TPU:0,/job:localhost/replica:0/task:0/device:TPU:1,/job:localhost/replica:0/task:0/device:TPU:2,/job:localhost/replica:0/task:0/device:TPU:3,/job:localhost/replica:0/task:0/device:TPU:4,/job:localhost/replica:0/task:0/device:TPU:5,/job:localhost/replica:0/task:0/device:TPU:6,/job:localhost/replica:0/task:0/device:TPU:7|use_xla_spmd", + config_proto = "", + executor_type = "", f = @_xla_spmd_func} : (tensor<12x24xf32>) -> tensor<12x24xf32> return %0 : tensor<12x24xf32> } func.func private @_xla_spmd_func(%arg0: tensor<12x24xf32>) -> tensor<12x24xf32> { // CHECK: tf_device.launch + // CHECK: device = "" // CHECK: %compilation_status, %program:8 = "tf._TPUCompileMlir" // CHECK-SAME: metadata = "\0A\10\08\01\12\08\12\02\08\0C\12\02\08\18\18\01\22\00\12\02\0A\00\18\01 \08x\01\88\01\ED\91\DC\F5\C3\8C\95\B5\90\01" - // CHECK: device = "" %0:2 = "tf_device.launch"() ({ %compilation_status, %program = "tf._TPUCompileMlir"() {metadata = "\0A\18\08\01\12\08\12\02\08\0C\12\02\08\18\18\01\22\08\08\01\1A\01\01\22\01\00\12\0A\0A\08\08\01\1A\01\01\22\01\00\18\01 \01\88\01\ED\91\DC\F5\C3\8C\95\B5\90\01", mlir_module = "#loc = loc(unknown)\0Amodule attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 1345 : i32}} {\0A func.func @main(%arg0: tensor<12x24xf32> {mhlo.sharding = \22\22} loc(unknown)) -> (tensor<12x24xf32> {mhlo.sharding = \22\22}) {\0A %0 = \22tf.Identity\22(%arg0) : (tensor<12x24xf32>) -> tensor<12x24xf32> loc(#loc)\0A return %0 : tensor<12x24xf32> loc(#loc)\0A } loc(#loc)\0A} loc(#loc)\0A"} : () -> (tensor, tensor<3x!tf_type.string>) tf_device.return %compilation_status, %program : tensor, tensor<3x!tf_type.string>