diff --git a/iree/turbine/aot/compiled_module.py b/iree/turbine/aot/compiled_module.py
index 270534e1..fc1c104b 100644
--- a/iree/turbine/aot/compiled_module.py
+++ b/iree/turbine/aot/compiled_module.py
@@ -44,6 +44,8 @@
     ModuleBuilderOptions,
 )
 
+from .tensor_traits import DeviceAffinity
+
 
 __all__ = [
     "CompiledModule",
@@ -107,12 +109,27 @@ def __call__(self, *args, **kwargs):
         return self.py_value(*args, **kwargs)
 
 
+class ExportTargetDef:
+    def __init__(
+        self,
+        target: Union[Callable, ExportedProgram],
+        *,
+        arg_device: dict[int, DeviceAffinity] | None = None,
+    ):
+        self.target = target
+        self.arg_device = arg_device
+
+    def __call__(self, *args, **kwargs):
+        return self.target(*args, **kwargs)
+
+
 class ExportProcDef:
     __slots__ = [
         "callable",
         "export_name",
         "signature",
         "file_line_loc",
+        "arg_device",
     ]
 
     def __init__(
@@ -122,14 +139,22 @@ def __init__(
         *,
         signature,
         file_line_loc: Optional[Tuple[str, int]] = None,
+        arg_device: dict[int, DeviceAffinity] | None = None,
     ):
         self.export_name = export_name
         self.callable = callable
         self.signature = signature
         self.file_line_loc = file_line_loc
+        self.arg_device = arg_device
 
     def copy(self) -> "ExportProcDef":
-        return ExportProcDef(self.export_name, self.callable, signature=self.signature)
+        return ExportProcDef(
+            self.export_name,
+            self.callable,
+            signature=self.signature,
+            file_line_loc=self.file_line_loc,
+            arg_device=self.arg_device,
+        )
 
     def __repr__(self):
         return f"<def {self.export_name}({self.signature})>"
@@ -142,14 +167,19 @@ def __init__(
         *,
         export_name: Optional[str] = None,
         public: bool = False,
+        arg_device: dict[int, DeviceAffinity] | None = None,
     ):
         self.export_name = export_name
         self.exported_program = ep
         self.public = public
+        self.arg_device = arg_device
 
     def copy(self) -> "ExportedProgramDef":
         return ExportedProgramDef(
-            self.exported_program, export_name=self.export_name, public=self.public
+            self.exported_program,
+            export_name=self.export_name,
+            public=self.public,
+            arg_device=self.arg_device,
         )
 
     def __repr__(self):
@@ -207,6 +237,19 @@ def globals_defs(self) -> Generator[Tuple[str, GlobalsDef], None, None]:
         )  # type: ignore
 
     def def_attribute(self, key, value):
+        if isinstance(value, ExportTargetDef):
+            if not isinstance(value.target, ExportedProgram):
+                # We expect exported function.
+                assert callable(value.target) and inspect.isfunction(value.target)
+                return self.def_export_proc(key, value.target, value.arg_device)
+
+            value = ExportedProgramDef(
+                value.target,
+                export_name=key,
+                public=not key.startswith("_"),
+                arg_device=value.arg_device,
+            )
+
         # Some decorators, the only thing we do is convert them to PyOnlyDef.
         # Do that first so the generic descriptor code below handles them.
         if isinstance(value, builtins.jittable):
@@ -233,6 +276,15 @@ def def_attribute(self, key, value):
             logging.debug("DEFINE PY_ONLY: %s = %r", key, value)
             self.add_export(key, value)
             return value
+        if isinstance(value, ExportTargetDef) and isinstance(
+            value.target, ExportedProgram
+        ):
+            value = ExportedProgramDef(
+                value.target,
+                export_name=key,
+                public=not key.startswith("_"),
+                arg_device=value.arg_device,
+            )
         if isinstance(value, ExportedProgramDef):
             if value.export_name is None:
                 value = value.copy()
@@ -250,7 +302,12 @@ def def_attribute(self, key, value):
             f"compiled module: {value!r}"
         )
 
-    def def_export_proc(self, name, f) -> ExportProcDef:
+    def def_export_proc(
+        self,
+        name,
+        f,
+        arg_device: dict[int, DeviceAffinity] | None = None,
+    ) -> ExportProcDef:
         logging.debug("DEFINE EXPORT: %s = %r", name, f)
         # Get a reasonable location.
         file_line_loc = None
@@ -292,7 +349,13 @@ def def_export_proc(self, name, f) -> ExportProcDef:
                 )
             input_sig.append(param_desc)
 
-        info = ExportProcDef(name, f, signature=input_sig, file_line_loc=file_line_loc)
+        info = ExportProcDef(
+            name,
+            f,
+            signature=input_sig,
+            file_line_loc=file_line_loc,
+            arg_device=arg_device,
+        )
         self.add_export(name, info)
         return info
 
@@ -568,6 +631,20 @@ def save_mlir(inst: "CompiledModule", path: Union[Path, str]):
 
     jittable = staticmethod(builtins.jittable)
 
+    @staticmethod
+    def signature_info(
+        *,
+        arg_device: dict[int, DeviceAffinity] | None = None,
+    ) -> Callable:
+        """Annotate an export target function.
+        This annotation is only required when additional information needs to be
+        provided."""
+
+        def _decorator(f: Callable):
+            return ExportTargetDef(f, arg_device=arg_device)
+
+        return _decorator
+
     def __getattr__(self, name):
         info = CompiledModule.get_info(self)
         try:
@@ -633,6 +710,7 @@ def __new__(
                 ep_def.exported_program,
                 symbol_name=ep_def.export_name or "main",
                 symbol_visibility=None if ep_def.public else "private",
+                arg_device=ep_def.arg_device,
             )
 
         # Instantiate procs.
@@ -661,6 +739,7 @@ def invoke_with_self(*args, **kwargs):
                     posargs=proc_def.signature,
                     kwargs={},  # TODO(#128): kwargs
                     loc=loc,
+                    arg_device=proc_def.arg_device,
                 )
                 trace.trace_py_func(invoke_with_self)
                 info.shadow_dict[key] = _uncallable_public_export
diff --git a/iree/turbine/aot/exporter.py b/iree/turbine/aot/exporter.py
index c1adb527..dbd859ac 100644
--- a/iree/turbine/aot/exporter.py
+++ b/iree/turbine/aot/exporter.py
@@ -32,6 +32,8 @@
 from .fx_programs import FxPrograms
 from . import decompositions
 
+from .tensor_traits import DeviceAffinity
+
 __all__ = [
     "export",
     "ExportOutput",
@@ -177,6 +179,7 @@ def export(
     function_name: Optional[str] = None,
     strict_export: bool = True,
     import_symbolic_shape_expressions: bool = False,
+    arg_device: dict[int, DeviceAffinity] | None = None,
 ) -> ExportOutput:
     """Exports a torch.nn.Module.
 
@@ -199,6 +202,7 @@ def export(
     *,
     module_name: Optional[str] = None,
     function_name: Optional[str] = None,
+    arg_device: dict[int, DeviceAffinity] | None = None,
 ) -> ExportOutput:
     """Exports a single entry-point module consisting of an ExportedProgram."""
     ...
@@ -226,6 +230,7 @@ def export(
     function_name: Optional[str] = None,
     strict_export: bool = True,
     import_symbolic_shape_expressions: bool = False,
+    arg_device: dict[int, DeviceAffinity] | None = None,
 ) -> ExportOutput:
     """Generic export of supported entities.
 
@@ -247,6 +252,10 @@ def export(
         must be empty.
       kwargs: Example keyword arguments.
       dynamic_shapes: Dynamic shape specs to pass to torch.export.
+      arg_device: device affinities for the exported function
+        arguments. On what devices should the program expect its arguments.
+        It is a mapping of argument index to device affinity of the flattened
+        arguments.
 
     Returns:
       An ExportOutput object that wraps the compilation and provides
@@ -266,12 +275,14 @@ def export(
             "This is an experimental feature in PyTorch that the IREE Turbine project is still evaluating. Please report issues or experiences."
         )
 
+    from .compiled_module import ExportTargetDef
+
     TransformedModule: Any
     current_decomps = decompositions.current_aot_decompositions()
     if isinstance(mdl, torch.export.ExportedProgram):
         TransformedModule = CompiledModule.create_from_dict(
             "LambdaCompiledModule",
-            {(function_name or "main"): mdl},
+            {(function_name or "main"): ExportTargetDef(mdl, arg_device=arg_device)},
             export_name=module_name or "module",
             options=ModuleBuilderOptions(
                 import_symbolic_shape_expressions=import_symbolic_shape_expressions,
@@ -311,7 +322,12 @@ def export(
 
         TransformedModule = CompiledModule.create_from_dict(
             "LambdaCompiledModule",
-            {(function_name or "main"): exported_program},
+            {
+                (function_name or "main"): ExportTargetDef(
+                    exported_program,
+                    arg_device=arg_device,
+                )
+            },
             export_name=module_name or "module",
             options=ModuleBuilderOptions(
                 import_symbolic_shape_expressions=import_symbolic_shape_expressions,
diff --git a/iree/turbine/aot/fx_programs.py b/iree/turbine/aot/fx_programs.py
index 696f9a00..1bfd21f0 100644
--- a/iree/turbine/aot/fx_programs.py
+++ b/iree/turbine/aot/fx_programs.py
@@ -14,6 +14,7 @@
 import os
 from pathlib import Path
 from typing import Any, Optional, Union
+from .compiled_module import ExportTargetDef
 
 import functools
 
@@ -21,6 +22,12 @@
 import torch.nn as nn
 
 from .decompositions import current_aot_decompositions
+from .tensor_traits import DeviceAffinity
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .compiled_module import ExportTargetDef
 
 # The dynamic_shapes support showed up in the Torch 2.3 timeframe.
 _supports_dynamic_shapes = hasattr(torch.export, "Dim")
@@ -61,7 +68,7 @@ class FxPrograms:
     """
 
     def __init__(self):
-        self.programs: dict[str, torch.export.ExportedProgram] = {}
+        self.programs: dict[str, ExportTargetDef] = {}
 
     def save(self, path: Union[str, os.PathLike]) -> int:
         """Saves the set of exported programs to a descriptor file.
@@ -86,7 +93,9 @@ def permute_path(name):
         count_deduped = 0
 
         # Save each.
-        for program_name, ep in self.programs.items():
+        for program_name, export_def in self.programs.items():
+            ep = export_def.target
+            assert isinstance(ep, torch.export.ExportedProgram)
             # First validate the ep with normal rules, which we will then
             # disable since we are violating the spec.
             ep._validate()
@@ -129,7 +138,7 @@ def load(path: Union[str, os.PathLike]) -> "FxPrograms":
             ep = torch.export.load(path.parent / program_file_name)
             _unsharify_state_dict(shared_state_dict, ep.state_dict)
             _unsharify_state_dict(shared_constants, _get_optional_constants(ep))
-            instance.programs[program_name] = ep
+            instance.programs[program_name] = ExportTargetDef(ep)
         return instance
 
 
@@ -169,6 +178,7 @@ def export_program(
         dynamic_shapes=None,
         strict: bool = True,
         name: Optional[str] = None,
+        arg_device: dict[int, DeviceAffinity] | None = None,
     ):
         if f is None:
             return functools.partial(
@@ -178,6 +188,7 @@ def export_program(
                 strict=strict,
                 dynamic_shapes=dynamic_shapes,
                 name=name,
+                arg_device=arg_device,
             )
 
         if name is None:
@@ -234,7 +245,10 @@ def new_forward(self, *forward_args, **forward_kwargs):
 
             _patch_op_dispatch_for_export()
             program = program.run_decompositions(current_decomps)
-        fx_builder.programs[name] = program
+        fx_builder.programs[name] = ExportTargetDef(
+            program,
+            arg_device=arg_device,
+        )
         return program
 
 
diff --git a/iree/turbine/aot/support/ir_utils.py b/iree/turbine/aot/support/ir_utils.py
index ee51f7b9..91f363d2 100644
--- a/iree/turbine/aot/support/ir_utils.py
+++ b/iree/turbine/aot/support/ir_utils.py
@@ -10,6 +10,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 import tempfile
+from itertools import zip_longest
 
 import numpy as np
 import torch
@@ -26,11 +27,13 @@
 )
 
 from ...support.ir_imports import (
-    AsmState,
+    ArrayAttr,
     Attribute,
     BF16Type,
+    Context,
     DenseElementsAttr,
     DenseResourceElementsAttr,
+    DictAttr,
     F16Type,
     F32Type,
     F64Type,
@@ -63,6 +66,7 @@
 from ...support.logging import aot_logger as logger
 
 from ..tensor_traits import (
+    DeviceAffinity,
     ExternalTensorTrait,
 )
 
@@ -235,6 +239,8 @@ def create_func_op(
         argument_types: Sequence[IrType],
         is_public: bool = True,
         add_entry_block: bool = True,
+        # Array of DictAttr corresponding to the attributes for each argument.
+        argument_attributes: ArrayAttr | list[DictAttr] | None = None,
     ) -> Tuple[str, func_d.FuncOp]:
         with self.ip:
             ftype = FunctionType.get(argument_types, [])
@@ -245,6 +251,8 @@ def create_func_op(
                 func_op.add_entry_block()
             self.symbol_table.insert(func_op)
             actual_symbol_name = StringAttr(func_op.attributes["sym_name"]).value
+            if argument_attributes is not None:
+                func_op.arg_attrs = argument_attributes
             return actual_symbol_name, func_op
 
     def torch_dtype_to_iree_type(self, dtype: torch.dtype) -> IrType:
@@ -470,3 +478,48 @@ def _is_float_type(type):
 
 def _is_integer_like_type(type):
     return isinstance(type, (IntegerType, IndexType))
+
+
+def _attribute_from_device_affinity(
+    affinity: DeviceAffinity, context: Context
+) -> Attribute:
+    return Attribute.parse(
+        f'#hal.device.promise<@"__device_{affinity.ordinal}">', context
+    )
+
+
+def attributes_from_argument_device_affinities(
+    affinities: dict[int, DeviceAffinity] | None,
+    arguments_count: int,
+    context: Context,
+) -> list[dict[str, Attribute]]:
+    """Get as attributes for function op arguments."""
+    if affinities is None:
+        return [{} for _ in range(arguments_count)]
+    return [
+        {"iree.abi.affinity": _attribute_from_device_affinity(affinities[i], context)}
+        if i in affinities
+        else {}
+        for i in range(arguments_count)
+    ]
+
+
+def update_func_op_argument_attributes(
+    func_op: func_d.FuncOp, attributes: list[dict[str, Attribute]]
+):
+    if func_d.ARGUMENT_ATTRIBUTE_NAME not in func_op.attributes:
+        mutable_arg_attrs: list[dict[str, Attribute]] = [
+            {} for _ in range(len(func_op.arguments))
+        ]
+    else:
+        mutable_arg_attrs = [
+            {named_attr.name: named_attr.attr for named_attr in dict_attr}
+            for dict_attr in func_op.arg_attrs
+        ]
+
+    for src, dst in zip_longest(attributes, mutable_arg_attrs):
+        dst.update(src)
+
+    func_op.arg_attrs = [
+        DictAttr.get(d, context=func_op.context) for d in mutable_arg_attrs
+    ]
diff --git a/iree/turbine/aot/support/procedural/exported_program.py b/iree/turbine/aot/support/procedural/exported_program.py
index f6540bab..14a47d7f 100644
--- a/iree/turbine/aot/support/procedural/exported_program.py
+++ b/iree/turbine/aot/support/procedural/exported_program.py
@@ -45,12 +45,15 @@
 )
 
 from ...tensor_traits import (
+    DeviceAffinity,
     ExternalTensorTrait,
 )
 
 from ..ir_utils import (
+    attributes_from_argument_device_affinities,
     GlobalAttributes,
     ModuleBuilder,
+    update_func_op_argument_attributes,
 )
 
 from .base import (
@@ -71,6 +74,9 @@
     IrTrace,
 )
 
+from typing import TYPE_CHECKING
+
+
 # Limit of tensor volumes. Over this limit, otherwise uncategorized tensor
 # constants will be emitted out-of-line. Under the limit, inline.
 INLINE_TENSOR_VOLUME_LIMIT = 1024
@@ -178,6 +184,7 @@ def import_exported_program(
     exported_program: torch.export.ExportedProgram,
     symbol_name: str,
     symbol_visibility: Optional[str],
+    arg_device: dict[int, DeviceAffinity] | None,
 ) -> ExportedProgramIntrinsic:
     fx_importer = _create_fx_importer(module_builder)
     entry_func_op = fx_importer.import_program(
@@ -186,6 +193,14 @@ def import_exported_program(
         func_visibility=symbol_visibility,
         import_symbolic_shape_expressions=module_builder.options.import_symbolic_shape_expressions,
     )
+    update_func_op_argument_attributes(
+        entry_func_op,
+        attributes_from_argument_device_affinities(
+            arg_device,
+            len(entry_func_op.arguments),
+            entry_func_op.context,
+        ),
+    )
 
     module_call_graph = exported_program.module_call_graph
     assert len(module_call_graph) >= 1, "Expected at least one module call signature"
diff --git a/iree/turbine/aot/support/procedural/tracer.py b/iree/turbine/aot/support/procedural/tracer.py
index 19342deb..252065c4 100644
--- a/iree/turbine/aot/support/procedural/tracer.py
+++ b/iree/turbine/aot/support/procedural/tracer.py
@@ -21,6 +21,7 @@
 )
 
 from ....support.ir_imports import (
+    DictAttr,
     Location,
     StringAttr,
     Value,
@@ -29,9 +30,7 @@
 
 from ....support.logging import aot_logger as logger
 
-from ..ir_utils import (
-    ModuleBuilder,
-)
+from ..ir_utils import ModuleBuilder, attributes_from_argument_device_affinities
 
 from .base import (
     AbstractIntrinsic,
@@ -45,6 +44,8 @@
     LiveGlobalCollectionProxy,
 )
 
+from ...tensor_traits import DeviceAffinity
+
 ###############################################################################
 # Concrete procedure building IrTracer.
 ###############################################################################
@@ -78,6 +79,7 @@ def define_func(
         posargs: Sequence,
         kwargs: dict,
         loc: Location,
+        arg_device: dict[int, DeviceAffinity] | None = None,
     ) -> "ProcedureTrace":
         # Unpack arguments.
         arguments_flat, arguments_tree_def = tree_flatten((posargs, kwargs))
@@ -88,7 +90,17 @@ def define_func(
             argument_ir_types.append(arg.get_ir_type(module_builder))
 
         with loc:
-            _, func_op = module_builder.create_func_op(symbol_name, argument_ir_types)
+            argument_attributes = [
+                DictAttr.get(d)
+                for d in attributes_from_argument_device_affinities(
+                    arg_device,
+                    arguments_count=len(argument_ir_types),
+                    context=module_builder.context,
+                )
+            ]
+            _, func_op = module_builder.create_func_op(
+                symbol_name, argument_ir_types, argument_attributes=argument_attributes
+            )
 
         # Bind proxy arguments to an IR value.
         ir_proxy_arguments_flat = []
diff --git a/iree/turbine/aot/tensor_traits.py b/iree/turbine/aot/tensor_traits.py
index bb7a5280..97283784 100644
--- a/iree/turbine/aot/tensor_traits.py
+++ b/iree/turbine/aot/tensor_traits.py
@@ -11,10 +11,26 @@
 
 
 __all__ = [
+    "DeviceAffinity",
     "ExternalTensorTrait",
 ]
 
 
+class DeviceAffinity:
+    """This is used to provide device affinities to exported function arguments."""
+
+    def __init__(self, ordinal: int):
+        self.ordinal = ordinal
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, DeviceAffinity):
+            return False
+        return self.ordinal == other.ordinal
+
+    def __repr__(self) -> str:
+        return f"DeviceAffinity({self.ordinal})"
+
+
 @dataclass
 class ExternalTensorTrait:
     """Represents a 'trait' that can be applied to a Tensor to signal that
diff --git a/iree/turbine/support/ir_imports.py b/iree/turbine/support/ir_imports.py
index 09aa4042..1803f16a 100644
--- a/iree/turbine/support/ir_imports.py
+++ b/iree/turbine/support/ir_imports.py
@@ -8,6 +8,7 @@
 """Unifies all imports of iree.compiler.ir into one place."""
 
 from iree.compiler.ir import (
+    ArrayAttr,
     AsmState,
     Attribute,
     Block,
@@ -15,6 +16,7 @@
     Context,
     DenseElementsAttr,
     DenseResourceElementsAttr,
+    DictAttr,
     FlatSymbolRefAttr,
     FloatAttr,
     FunctionType,
diff --git a/tests/aot/args_test.py b/tests/aot/args_test.py
index efbce489..2910fa2e 100644
--- a/tests/aot/args_test.py
+++ b/tests/aot/args_test.py
@@ -65,6 +65,24 @@ def compute(a, b):
             msg=f"Did not find two linalg.generics in module: module_str",
         )
 
+    def testDeviceAffinities(self):
+        class ProcArgsModule(CompiledModule):
+            @CompiledModule.signature_info(arg_device={1: DeviceAffinity(1)})
+            def foobar(self, a=AbstractTensor(3, 2), b=AbstractTensor(1, 1)):
+                return a, b
+
+        inst = ProcArgsModule(context=Context(), import_to="import")
+        module_str = str(CompiledModule.get_mlir_module(inst))
+        print(module_str)
+        self.assertRegex(
+            module_str,
+            (
+                "func.func @foobar\("
+                "%.+: tensor<3x2xf32>, "
+                "%.+: tensor<1x1xf32> {iree.abi.affinity = #hal.device.promise<@__device_1>}\)"
+            ),
+        )
+
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.DEBUG)
diff --git a/tests/aot/compiled_exported_program_test.py b/tests/aot/compiled_exported_program_test.py
index 6b86b185..d14e8ac6 100644
--- a/tests/aot/compiled_exported_program_test.py
+++ b/tests/aot/compiled_exported_program_test.py
@@ -177,6 +177,30 @@ class BuffersAsGlobalsModule(CompiledModule):
         self.assertIn("%_buffers.buf = util.global.load @_buffers.buf", module_str)
         self.assertIn("util.global.store", module_str)
 
+    def testDeviceAffinities(self):
+        class Module(torch.nn.Module):
+            def forward(self, x, y):
+                return x, y
+
+        module = Module()
+        export_output = export(
+            module,
+            function_name="foo",
+            args=(torch.empty(1, dtype=torch.int8), torch.empty(2, dtype=torch.int8)),
+            arg_device={1: DeviceAffinity(1)},
+        )
+        asm = str(export_output.mlir_module)
+        print(asm)
+        self.assertRegex(
+            asm,
+            (
+                "func.func @foo\("
+                "%.+: !torch.vtensor<\[1\],si8>, "
+                "%.+: !torch.vtensor<\[2\],si8> "
+                "{iree.abi.affinity = #hal.device.promise<@__device_1>}\)"
+            ),
+        )
+
 
 class SimpleParams(nn.Module):
     def __init__(self):
diff --git a/tests/aot/fx_programs_test.py b/tests/aot/fx_programs_test.py
index f2c70456..d5241654 100644
--- a/tests/aot/fx_programs_test.py
+++ b/tests/aot/fx_programs_test.py
@@ -61,10 +61,10 @@ def bs32(module: M, x1, x2):
     prog_0 = new_programs.programs["dynamic_batch"]
     prog_1 = new_programs.programs["bs32"]
 
-    for key, value_0 in prog_0.state_dict.items():
-        value_1 = prog_1.state_dict[key]
+    for key, value_0 in prog_0.target.state_dict.items():
+        value_1 = prog_1.target.state_dict[key]
         assert value_0 is value_1, f"State dict item {key} was not aliased on load"
 
-    for key, value_0 in prog_0.constants.items():
-        value_1 = prog_1.constants[key]
+    for key, value_0 in prog_0.target.constants.items():
+        value_1 = prog_1.target.constants[key]
         assert value_0 is value_1, f"Constant item {key} was not aliased on load"
diff --git a/tests/aot/fx_programs_test_device.py b/tests/aot/fx_programs_test_device.py
new file mode 100644
index 00000000..2c2ec65b
--- /dev/null
+++ b/tests/aot/fx_programs_test_device.py
@@ -0,0 +1,47 @@
+# Copyright 2024 Advanced Micro Devices, Inc
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from pathlib import Path
+import unittest
+
+import torch
+
+from iree.turbine.aot import (
+    DeviceAffinity,
+    export,
+    FxProgramsBuilder,
+)
+
+
+class FxProgramsTestDevice(unittest.TestCase):
+    def test_argument_device_affinities(self):
+        class Module(torch.nn.Module):
+            def main(self, x1, x2):
+                return x1, x2
+
+        args = (
+            torch.empty(2, 3, dtype=torch.int8),
+            torch.empty(4, 5, dtype=torch.int8),
+        )
+        fxb = FxProgramsBuilder(Module())
+
+        @fxb.export_program(
+            args=args,
+            arg_device={0: DeviceAffinity(0), 1: DeviceAffinity(1)},
+        )
+        def main(module: Module, x1, x2):
+            return module.main(x1, x2)
+
+        output = export(fxb)
+        asm = str(output.mlir_module)
+        self.assertRegex(
+            asm,
+            (
+                "func.func @main\("
+                "%.+: !torch.vtensor<\[2,3\],si8> {iree.abi.affinity = #hal.device.promise<@__device_0>}, "
+                "%.+: !torch.vtensor<\[4,5\],si8> {iree.abi.affinity = #hal.device.promise<@__device_1>}\)"
+            ),
+        )