diff --git a/backends/apple/coreml/runtime/test/export_stateful_model.py b/backends/apple/coreml/runtime/test/export_stateful_model.py
index 61d1a93980f..e477d1425bf 100644
--- a/backends/apple/coreml/runtime/test/export_stateful_model.py
+++ b/backends/apple/coreml/runtime/test/export_stateful_model.py
@@ -47,7 +47,7 @@ def main() -> None:
         torch.randn((1, embedding_dim)),
         torch.tensor([0]),
     )
-    exported_model = export(model, example_inputs)
+    exported_model = export(model, example_inputs, strict=True)
     edge_program_manager = exir.to_edge(exported_model)
     compile_specs = CoreMLBackend.generate_compile_specs(
         compute_precision=ct.precision.FLOAT16,
diff --git a/devtools/backend_debug/tests/test_delegation_info.py b/devtools/backend_debug/tests/test_delegation_info.py
index 6ff5169094b..980ef8d241d 100644
--- a/devtools/backend_debug/tests/test_delegation_info.py
+++ b/devtools/backend_debug/tests/test_delegation_info.py
@@ -31,7 +31,7 @@ def forward(self, a, x, b):
 
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        edge = to_edge(torch.export.export(m, inputs)).to_backend(
+        edge = to_edge(torch.export.export(m, inputs, strict=True)).to_backend(
             AddMulPartitionerDemo()
         )
         delegation_info = get_delegation_info(edge.exported_program().graph_module)
diff --git a/devtools/bundled_program/util/test_util.py b/devtools/bundled_program/util/test_util.py
index 505186f3a08..62776852db0 100644
--- a/devtools/bundled_program/util/test_util.py
+++ b/devtools/bundled_program/util/test_util.py
@@ -271,6 +271,7 @@ def get_common_executorch_program() -> (
         m_name: export(
             StatefulWrapperModule(eager_model, getattr(eager_model, m_name)),
             capture_inputs[m_name],
+            strict=True,
         )
         for m_name in eager_model.method_names
     }
diff --git a/devtools/etrecord/tests/etrecord_test.py b/devtools/etrecord/tests/etrecord_test.py
index daef7c3e1e2..cf50662c2a1 100644
--- a/devtools/etrecord/tests/etrecord_test.py
+++ b/devtools/etrecord/tests/etrecord_test.py
@@ -69,7 +69,7 @@ def get_test_model_with_bundled_program(self):
 
     def get_test_model_with_manager(self):
         f = models.BasicSinMax()
-        aten_dialect = export(f, f.get_random_inputs())
+        aten_dialect = export(f, f.get_random_inputs(), strict=True)
         edge_program: EdgeProgramManager = to_edge(
             aten_dialect, compile_config=EdgeCompileConfig(_check_ir_validity=False)
         )
diff --git a/docs/source/tutorials_source/devtools-integration-tutorial.py b/docs/source/tutorials_source/devtools-integration-tutorial.py
index dece18fa8ce..b9028dc91f5 100644
--- a/docs/source/tutorials_source/devtools-integration-tutorial.py
+++ b/docs/source/tutorials_source/devtools-integration-tutorial.py
@@ -89,10 +89,7 @@ def forward(self, x):
 
 model = Net()
 
-aten_model: ExportedProgram = export(
-    model,
-    (torch.randn(1, 1, 32, 32),),
-)
+aten_model: ExportedProgram = export(model, (torch.randn(1, 1, 32, 32),), strict=True)
 
 edge_program_manager: EdgeProgramManager = to_edge(
     aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True)
@@ -141,7 +138,7 @@ def forward(self, x):
 
 # Step 1: ExecuTorch Program Export
 m_name = "forward"
-method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),))}
+method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),), strict=True)}
 
 # Step 2: Construct Method Test Suites
 inputs = [[torch.randn(1, 1, 32, 32)] for _ in range(2)]
diff --git a/docs/source/tutorials_source/export-to-executorch-tutorial.py b/docs/source/tutorials_source/export-to-executorch-tutorial.py
index fac3eab08e5..87ae6d8ca60 100644
--- a/docs/source/tutorials_source/export-to-executorch-tutorial.py
+++ b/docs/source/tutorials_source/export-to-executorch-tutorial.py
@@ -66,7 +66,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 example_args = (torch.randn(1, 3, 256, 256),)
-aten_dialect: ExportedProgram = export(SimpleConv(), example_args)
+aten_dialect: ExportedProgram = export(SimpleConv(), example_args, strict=True)
 print(aten_dialect)
 
 ######################################################################
@@ -101,7 +101,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
 
 example_args = (torch.randn(3, 3), torch.randn(3, 3))
-aten_dialect: ExportedProgram = export(Basic(), example_args)
+aten_dialect: ExportedProgram = export(Basic(), example_args, strict=True)
 
 # Works correctly
 print(aten_dialect.module()(torch.ones(3, 3), torch.ones(3, 3)))
@@ -131,7 +131,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 dim1_x = Dim("dim1_x", min=1, max=10)
 dynamic_shapes = {"x": {1: dim1_x}, "y": {1: dim1_x}}
 aten_dialect: ExportedProgram = export(
-    Basic(), example_args, dynamic_shapes=dynamic_shapes
+    Basic(), example_args, dynamic_shapes=dynamic_shapes, strict=True
 )
 print(aten_dialect)
 
@@ -213,7 +213,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 print("Quantized Graph")
 print(converted_graph)
 
-aten_dialect: ExportedProgram = export(converted_graph, example_args)
+aten_dialect: ExportedProgram = export(converted_graph, example_args, strict=True)
 print("ATen Dialect Graph")
 print(aten_dialect)
 
@@ -243,7 +243,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 from executorch.exir import EdgeProgramManager, to_edge
 
 example_args = (torch.randn(1, 3, 256, 256),)
-aten_dialect: ExportedProgram = export(SimpleConv(), example_args)
+aten_dialect: ExportedProgram = export(SimpleConv(), example_args, strict=True)
 
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 print("Edge Dialect Graph")
@@ -267,10 +267,10 @@ def forward(self, x):
 
 
 encode_args = (torch.randn(1, 10),)
-aten_encode: ExportedProgram = export(Encode(), encode_args)
+aten_encode: ExportedProgram = export(Encode(), encode_args, strict=True)
 
 decode_args = (torch.randn(1, 5),)
-aten_decode: ExportedProgram = export(Decode(), decode_args)
+aten_decode: ExportedProgram = export(Decode(), decode_args, strict=True)
 
 edge_program: EdgeProgramManager = to_edge(
     {"encode": aten_encode, "decode": aten_decode}
@@ -291,7 +291,7 @@ def forward(self, x):
 # rather than the ``torch.ops.aten`` namespace.
 
 example_args = (torch.randn(1, 3, 256, 256),)
-aten_dialect: ExportedProgram = export(SimpleConv(), example_args)
+aten_dialect: ExportedProgram = export(SimpleConv(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 print("Edge Dialect Graph")
 print(edge_program.exported_program())
@@ -357,7 +357,7 @@ def forward(self, x):
 
 # Export and lower the module to Edge Dialect
 example_args = (torch.ones(1),)
-aten_dialect: ExportedProgram = export(LowerableModule(), example_args)
+aten_dialect: ExportedProgram = export(LowerableModule(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 to_be_lowered_module = edge_program.exported_program()
 
@@ -423,7 +423,7 @@ def forward(self, x):
 
 
 example_args = (torch.ones(1),)
-aten_dialect: ExportedProgram = export(ComposedModule(), example_args)
+aten_dialect: ExportedProgram = export(ComposedModule(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 exported_program = edge_program.exported_program()
 print("Edge Dialect graph")
@@ -461,7 +461,7 @@ def forward(self, a, x, b):
 
 
 example_args = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-aten_dialect: ExportedProgram = export(Foo(), example_args)
+aten_dialect: ExportedProgram = export(Foo(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 exported_program = edge_program.exported_program()
 print("Edge Dialect graph")
@@ -495,7 +495,7 @@ def forward(self, a, x, b):
 
 
 example_args = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-aten_dialect: ExportedProgram = export(Foo(), example_args)
+aten_dialect: ExportedProgram = export(Foo(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 exported_program = edge_program.exported_program()
 delegated_program = edge_program.to_backend(AddMulPartitionerDemo())
@@ -577,7 +577,9 @@ def forward(self, x):
 pre_autograd_aten_dialect = export_for_training(M(), example_args).module()
 # Optionally do quantization:
 # pre_autograd_aten_dialect = convert_pt2e(prepare_pt2e(pre_autograd_aten_dialect, CustomBackendQuantizer))
-aten_dialect: ExportedProgram = export(pre_autograd_aten_dialect, example_args)
+aten_dialect: ExportedProgram = export(
+    pre_autograd_aten_dialect, example_args, strict=True
+)
 edge_program: exir.EdgeProgramManager = exir.to_edge(aten_dialect)
 # Optionally do delegation:
 # edge_program = edge_program.to_backend(CustomBackendPartitioner)
diff --git a/examples/apple/coreml/scripts/export.py b/examples/apple/coreml/scripts/export.py
index 53316ea2001..a4ceaee05da 100644
--- a/examples/apple/coreml/scripts/export.py
+++ b/examples/apple/coreml/scripts/export.py
@@ -88,7 +88,9 @@ def partition_module_to_coreml(module):
 
 def lower_module_to_coreml(module, compile_specs, example_inputs):
     module = module.eval()
-    edge = to_edge(export(module, example_inputs), compile_config=_EDGE_COMPILE_CONFIG)
+    edge = to_edge(
+        export(module, example_inputs, strict=True), compile_config=_EDGE_COMPILE_CONFIG
+    )
     # All of the subsequent calls on the edge_dialect_graph generated above (such as delegation or
     # to_executorch()) are done in place and the graph is also modified in place. For debugging purposes
     # we would like to keep a copy of the original edge dialect graph and hence we create a deepcopy of
@@ -107,7 +109,8 @@ def lower_module_to_coreml(module, compile_specs, example_inputs):
 def export_lowered_module_to_executorch_program(lowered_module, example_inputs):
     lowered_module(*example_inputs)
     exec_prog = to_edge(
-        export(lowered_module, example_inputs), compile_config=_EDGE_COMPILE_CONFIG
+        export(lowered_module, example_inputs, strict=True),
+        compile_config=_EDGE_COMPILE_CONFIG,
     ).to_executorch(config=exir.ExecutorchBackendConfig(extract_delegate_segments=True))
 
     return exec_prog
@@ -170,7 +173,7 @@ def main():
 
     if args.use_partitioner:
         model.eval()
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
 
         edge_program_manager = exir.to_edge(exir_program_aten)
         edge_copy = copy.deepcopy(edge_program_manager)
diff --git a/examples/apple/coreml/scripts/inspector_utils.py b/examples/apple/coreml/scripts/inspector_utils.py
index 08af6fb3484..736d293c9b5 100644
--- a/examples/apple/coreml/scripts/inspector_utils.py
+++ b/examples/apple/coreml/scripts/inspector_utils.py
@@ -65,9 +65,7 @@ def build_devtools_runner_including_coreml(
     build_devtools_runner_command: str = (
         "./examples/devtools/build_example_runner.sh --coreml"
     )
-    build_command: str = (
-        f"{cd_root_command} && {conda_activate_env_command} && {build_devtools_runner_command}"
-    )
+    build_command: str = f"{cd_root_command} && {conda_activate_env_command} && {build_devtools_runner_command}"
     subprocess.run(
         f'bash -c "{build_command}"', shell=True, check=True
     ).check_returncode()
@@ -87,10 +85,7 @@ def to_core_aten(
     module: torch.nn.Module,
     example_inputs: Tuple[Value, ...],
 ) -> ExportedProgram:
-    core_aten_program = export(
-        mod=module,
-        args=example_inputs,
-    )
+    core_aten_program = export(mod=module, args=example_inputs, strict=True)
     return core_aten_program
 
 
diff --git a/examples/devtools/scripts/gen_sample_etrecord.py b/examples/devtools/scripts/gen_sample_etrecord.py
index 55544395b5a..a6b3d487251 100644
--- a/examples/devtools/scripts/gen_sample_etrecord.py
+++ b/examples/devtools/scripts/gen_sample_etrecord.py
@@ -31,10 +31,7 @@
 
 def gen_etrecord(model: torch.nn.Module, inputs: Any, output_path=None):
     f = model
-    aten_dialect: ExportedProgram = export(
-        f,
-        inputs,
-    )
+    aten_dialect: ExportedProgram = export(f, inputs, strict=True)
     edge_program: EdgeProgramManager = to_edge(
         aten_dialect, compile_config=EdgeCompileConfig(_check_ir_validity=True)
     )
diff --git a/examples/llm_manual/export_nanogpt.py b/examples/llm_manual/export_nanogpt.py
index 2d69c50ec99..9de2e831e25 100644
--- a/examples/llm_manual/export_nanogpt.py
+++ b/examples/llm_manual/export_nanogpt.py
@@ -30,7 +30,7 @@
     m = export_for_training(
         model, example_inputs, dynamic_shapes=dynamic_shape
     ).module()
-    traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)
+    traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape, strict=True)
 
 # Convert the model into a runnable ExecuTorch program.
 # To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
diff --git a/examples/mediatek/aot_utils/oss_utils/utils.py b/examples/mediatek/aot_utils/oss_utils/utils.py
index cb55822b9de..2246b8eeb15 100755
--- a/examples/mediatek/aot_utils/oss_utils/utils.py
+++ b/examples/mediatek/aot_utils/oss_utils/utils.py
@@ -37,9 +37,9 @@ def build_executorch_binary(
         for data in dataset:
             annotated_model(*data)
         quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
-        aten_dialect = torch.export.export(quantized_model, inputs)
+        aten_dialect = torch.export.export(quantized_model, inputs, strict=True)
     else:
-        aten_dialect = torch.export.export(model, inputs)
+        aten_dialect = torch.export.export(model, inputs, strict=True)
 
     from executorch.exir.program._program import to_edge_transform_and_lower
 
diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py
index 77c91bc635d..5da17727075 100644
--- a/examples/mediatek/model_export_scripts/llama.py
+++ b/examples/mediatek/model_export_scripts/llama.py
@@ -338,7 +338,7 @@ def export_to_et_ir(
         print(f"Exporting Shape {shape} to:\n{dest_path}")
         example_inputs = model.get_example_inputs(*ntok_and_cache)
         aten_dialect: exir.ExportedProgram = torch.export.export(
-            converted_graph, example_inputs
+            converted_graph, example_inputs, strict=True
         )
 
         print("Lowering to Edge Dialect Graph")
diff --git a/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py b/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
index 8e678801b8c..3da00cd70cd 100644
--- a/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
+++ b/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
@@ -70,6 +70,7 @@ def test_llama3_2_text_decoder_aoti(self) -> None:
                 model.get_example_inputs(),
                 kwargs=model.get_example_kwarg_inputs(),
                 dynamic_shapes=model.get_dynamic_shapes(),
+                strict=True,
             )
         with tempfile.TemporaryDirectory() as tmpdir:
             path = torch._inductor.aoti_compile_and_package(
diff --git a/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py b/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
index c2f1e77ceea..2edeb16ab7c 100644
--- a/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
+++ b/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
@@ -32,6 +32,7 @@ def test_flamingo_vision_encoder(self) -> None:
             encoder,
             model.get_example_inputs(),
             dynamic_shapes=model.get_dynamic_shapes(),
+            strict=True,
         )
         with tempfile.TemporaryDirectory() as tmpdir:
             path = torch._inductor.aoti_compile_and_package(
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
index bdb30db735b..dabb07e61ce 100644
--- a/examples/models/llava/export_llava.py
+++ b/examples/models/llava/export_llava.py
@@ -116,6 +116,7 @@ def forward(self, input_pos, embeddings):
             manager.pre_autograd_graph_module,
             manager.example_inputs,
             dynamic_shapes=manager._get_dynamic_shape(),
+            strict=True,
         )
     return text_model_ep
 
@@ -158,6 +159,7 @@ def forward(self, images):
             manager.pre_autograd_graph_module,
             manager.example_inputs,
             dynamic_shapes=manager.dynamic_shapes,
+            strict=True,
         )
     return image_encoder_ep
 
@@ -176,7 +178,10 @@ def quant_embedding(model):
     dynamic_shapes = [{1: token_dim_1}]
     with torch.no_grad():
         token_embedding_ep = torch.export.export(
-            quantized_token_embed.embed_tokens, (prompt,), dynamic_shapes=dynamic_shapes
+            quantized_token_embed.embed_tokens,
+            (prompt,),
+            dynamic_shapes=dynamic_shapes,
+            strict=True,
         )
     return token_embedding_ep
 
diff --git a/examples/models/phi-3-mini-lora/export_model.py b/examples/models/phi-3-mini-lora/export_model.py
index e6f291bd581..aa7994cf4d3 100644
--- a/examples/models/phi-3-mini-lora/export_model.py
+++ b/examples/models/phi-3-mini-lora/export_model.py
@@ -55,7 +55,7 @@ def export_phi3_mini_lora(model) -> None:
     tokens = randint(0, vocab_size, (batch_size, seq_len), dtype=long)
     example_args = (tokens,)
     with sdpa_kernel([SDPBackend.MATH]):
-        aten_dialect: ExportedProgram = export(model, example_args)
+        aten_dialect: ExportedProgram = export(model, example_args, strict=True)
 
         # 2. to_edge: Make optimizations for Edge devices.
         print("Lowering to edge dialect")
@@ -93,7 +93,7 @@ def export_phi3_mini_lora_training(model) -> None:
     labels = tokens
     example_args = (tokens, labels)
     with sdpa_kernel([SDPBackend.MATH]):
-        exported_graph: ExportedProgram = export(model, example_args)
+        exported_graph: ExportedProgram = export(model, example_args, strict=True)
         print("Creating a joint forward-backwards graph for training")
         joint_graph = _export_forward_backward(exported_graph)
 
diff --git a/examples/qualcomm/oss_scripts/llama2/llama.py b/examples/qualcomm/oss_scripts/llama2/llama.py
index 323874a3fa8..55f84bbcaba 100755
--- a/examples/qualcomm/oss_scripts/llama2/llama.py
+++ b/examples/qualcomm/oss_scripts/llama2/llama.py
@@ -108,7 +108,6 @@ def annotate_cat(node: Node, quantization_config: QuantizationConfig):
     def annotate_single_in_single_out(
         node: Node, quantization_config: QuantizationConfig
     ) -> None:
-
         input_qspec_map = {}
         input_act = node.args[0]
         input_qspec_map[input_act] = quantization_config.input_activation
@@ -356,7 +355,7 @@ def quantize(self, quant_dtype, custom_annotations=()):
 
         with torch.no_grad():
             fx_graph_module = torch.export.export(
-                self.llama_model, self.inputs
+                self.llama_model, self.inputs, strict=True
             ).module()
             fx_graph_module = prepare_pt2e(fx_graph_module, quantizer)
         print("Quantizing the model...")
diff --git a/examples/qualcomm/oss_scripts/llama3_2/llama.py b/examples/qualcomm/oss_scripts/llama3_2/llama.py
index bb6c65aea21..72d4a905c06 100755
--- a/examples/qualcomm/oss_scripts/llama3_2/llama.py
+++ b/examples/qualcomm/oss_scripts/llama3_2/llama.py
@@ -236,7 +236,7 @@ def quantize(self, quant_dtype, args, custom_annotations=()):
 
         with torch.no_grad():
             fx_graph_module = torch.export.export(
-                self.llama_model, self.inputs
+                self.llama_model, self.inputs, strict=True
             ).module()
             fx_graph_module = prepare_pt2e(fx_graph_module, quantizer)
         logging.info("Quantizing the model...")
diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py
index 7445ba4a5ec..23f1f59a7dd 100644
--- a/examples/qualcomm/scripts/export_example.py
+++ b/examples/qualcomm/scripts/export_example.py
@@ -61,7 +61,7 @@ def main() -> None:
     quantizer = QnnQuantizer()
 
     # Typical pytorch 2.0 quantization flow
-    m = torch.export.export(model.eval(), example_inputs).module()
+    m = torch.export.export(model.eval(), example_inputs, strict=True).module()
     m = prepare_pt2e(m, quantizer)
     # Calibration
     m(*example_inputs)
diff --git a/examples/qualcomm/scripts/mobilebert_fine_tune.py b/examples/qualcomm/scripts/mobilebert_fine_tune.py
index 8051d157166..4ecdaf3583f 100755
--- a/examples/qualcomm/scripts/mobilebert_fine_tune.py
+++ b/examples/qualcomm/scripts/mobilebert_fine_tune.py
@@ -292,7 +292,7 @@ def calibrator(gm):
         )
         # lower all graph again, the skipped operators will be left in CPU
         exec_prog = to_edge(
-            torch.export.export(graph_module, inputs[0]),
+            torch.export.export(graph_module, inputs[0], strict=True),
         ).to_executorch()
 
         with open(f"{args.artifact}/{pte_filename}.pte", "wb") as file:
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
index bebe99c1d77..c2d2f002aa8 100755
--- a/examples/qualcomm/utils.py
+++ b/examples/qualcomm/utils.py
@@ -281,7 +281,7 @@ def build_executorch_binary(
         None: The function writes the output to a specified .pte file.
     """
     if quant_dtype is not None:
-        captured_model = torch.export.export(model, inputs).module()
+        captured_model = torch.export.export(model, inputs, strict=True).module()
         if qat_training_data:
             quantizer = custom_quantizer or make_quantizer(
                 quant_dtype=quant_dtype, is_qat=True
diff --git a/exir/backend/test/demos/rpc/test_rpc.py b/exir/backend/test/demos/rpc/test_rpc.py
index 63feb954fee..d53f62cb33f 100644
--- a/exir/backend/test/demos/rpc/test_rpc.py
+++ b/exir/backend/test/demos/rpc/test_rpc.py
@@ -104,7 +104,7 @@ def test_delegate_whole_program(self):
         simple_net = self.get_a_simple_net()
         simple_net_input = simple_net.get_example_inputs()
         exported_program = to_edge(
-            export(simple_net, simple_net_input),
+            export(simple_net, simple_net_input, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -124,7 +124,9 @@ def forward(self, *args):
 
         composite_model = CompositeModule()
 
-        exec_prog = to_edge(export(composite_model, simple_net_input)).to_executorch()
+        exec_prog = to_edge(
+            export(composite_model, simple_net_input, strict=True)
+        ).to_executorch()
 
         executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
 
@@ -159,7 +161,7 @@ def forward(self, a, x, b):
         model = Model()
         inputs = (torch.ones(2, 2), torch.ones(2, 2), torch.ones(2, 2))
 
-        exported_program = to_edge(export(model, inputs))
+        exported_program = to_edge(export(model, inputs, strict=True))
 
         # First lower to demo backend
         demo_backend_lowered = exported_program.to_backend(AddMulPartitionerDemo())
diff --git a/exir/backend/test/demos/test_delegate_aten_mode.py b/exir/backend/test/demos/test_delegate_aten_mode.py
index 920cc084343..59b6e0b32f2 100644
--- a/exir/backend/test/demos/test_delegate_aten_mode.py
+++ b/exir/backend/test/demos/test_delegate_aten_mode.py
@@ -35,7 +35,7 @@ def forward(self, a, x, b):
 
         add_mul_module = AddMulModule()
         model_inputs = (torch.ones(2, 2), 2 * torch.ones(2, 2), 3 * torch.ones(2, 2))
-        edge_graph_module = to_edge(export(add_mul_module, model_inputs))
+        edge_graph_module = to_edge(export(add_mul_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_add_mul = to_backend(
@@ -56,7 +56,9 @@ def forward(self, a, x, b):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch()
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch()
 
         buff = exec_prog.buffer
 
diff --git a/exir/backend/test/test_backends.py b/exir/backend/test/test_backends.py
index df2b25d055e..d2bcfa31676 100644
--- a/exir/backend/test/test_backends.py
+++ b/exir/backend/test/test_backends.py
@@ -1251,7 +1251,7 @@ def forward(self, x: Dict[str, torch.Tensor]):
                 return y
 
         inputs = ({"a": torch.randn(2, 2), "b": torch.randn(2, 2)},)
-        edge_prog = exir.to_edge(torch.export.export(M(), inputs))
+        edge_prog = exir.to_edge(torch.export.export(M(), inputs, strict=True))
         lowered_gm = to_backend(
             BackendWithCompilerDemo.__name__, edge_prog.exported_program(), []
         )
diff --git a/exir/backend/test/test_backends_lifted.py b/exir/backend/test/test_backends_lifted.py
index 7e5bfa6089a..3c55bebd320 100644
--- a/exir/backend/test/test_backends_lifted.py
+++ b/exir/backend/test/test_backends_lifted.py
@@ -129,7 +129,7 @@ def forward(self, x):
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
         expected_res = sin_module(*model_inputs)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
 
         lowered_sin_module = to_backend(
             "BackendWithCompilerDemo", edgeir_m.exported_program(), []
@@ -154,7 +154,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -174,7 +174,9 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             )
@@ -248,7 +250,7 @@ def forward(self, a, x, b):
 
         add_mul_module = AddMulModule()
         model_inputs = (torch.ones(2, 2), 2 * torch.ones(2, 2), 3 * torch.ones(2, 2))
-        edge_graph_module = to_edge(export(add_mul_module, model_inputs))
+        edge_graph_module = to_edge(export(add_mul_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_add_mul = to_backend(
@@ -269,7 +271,9 @@ def forward(self, a, x, b):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             )
@@ -298,7 +302,7 @@ def forward(self, x):
         sin_module = SinModule()
         # the backend only  accepts shape <= 4
         model_inputs = (torch.ones(6),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -318,7 +322,9 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             ),
@@ -361,7 +367,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -383,7 +389,9 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             ),
@@ -452,7 +460,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         error_msg = r"call_function aten.cos.default is not supported in backend BackendWithCompilerDemo"
 
         with self.assertRaisesRegex(
@@ -473,7 +481,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         error_msg = r"Backend FakeBackendWithCompilerDemo was not found."
 
         with self.assertRaisesRegex(
@@ -499,7 +507,9 @@ def forward(self, x):
         # sin_module is an nn.Module
         to_be_lowered = LowerableSubModel()
         example_input = (torch.ones(1),)
-        to_be_lowered_exir_submodule = to_edge(export(to_be_lowered, example_input))
+        to_be_lowered_exir_submodule = to_edge(
+            export(to_be_lowered, example_input, strict=True)
+        )
 
         max_value = example_input[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
@@ -538,7 +548,9 @@ def forward(self, x):
         # Verify the input works with eager module
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             ),
@@ -598,14 +610,14 @@ def forward(self, x_raw, h, c):
         orig_res = composite_m(*inputs)
 
         traced = to_edge(
-            export(composite_m, inputs),
+            export(composite_m, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _use_edge_ops=True
             ),
         )
 
         program_without_delegates = to_edge(
-            export(CompositeModel(3), inputs),
+            export(CompositeModel(3), inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -719,17 +731,14 @@ def forward(self, x_raw, h, c):
         orig_res = composite_m(*inputs)
 
         traced = to_edge(
-            export(composite_m, inputs),
+            export(composite_m, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _use_edge_ops=True
             ),
         )
 
         program_without_delegates = to_edge(
-            export(
-                CompositeModel(3),
-                (input_x, input_h, input_c),
-            ),
+            export(CompositeModel(3), (input_x, input_h, input_c), strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -842,7 +851,7 @@ def forward(self, a, x, b):
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
         orig_res = m(*inputs)
 
-        ep = to_edge(export(m, inputs))
+        ep = to_edge(export(m, inputs, strict=True))
         executorch_prog = ep
         executorch_prog = executorch_prog.to_backend(AddMulPartitionerDemo())
         executorch_prog = executorch_prog.to_executorch(
@@ -899,7 +908,7 @@ def forward(self, x, y):
 
         inputs = (torch.randn(1, 3), torch.randn(1, 3))
         orig_res = Model()(*inputs)
-        ep = to_edge(export(Model(), inputs))
+        ep = to_edge(export(Model(), inputs, strict=True))
         executorch_prog = ep
         executorch_prog = executorch_prog.to_backend(AddAttributePartitionerDemo())
         executorch_prog = executorch_prog.to_executorch(
@@ -962,7 +971,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
                     partition_tags=partition_tags,
                 )
 
-        ep = to_edge(export(Model(), inputs))
+        ep = to_edge(export(Model(), inputs, strict=True))
         with self.assertRaises(AssertionError):
             _ = ep.to_backend(BadPartitioner())
 
@@ -988,10 +997,7 @@ def test_quantized_with_delegate(self) -> None:
 
         # fails to trace here
         converted_linear_gm = to_edge(
-            export(
-                converted_linear,
-                example_inputs,
-            ),
+            export(converted_linear, example_inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -1023,12 +1029,7 @@ def forward(self, x, y):
         f = Module()
         inputs = (torch.ones(2, 2), torch.ones(2, 2))
         orig_res = f(*inputs)
-        orig = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        )
+        orig = to_edge(export(f, inputs, strict=True))
         partitioned = orig
         partitioned = partitioned.to_backend(AddMulPartitionerDemo())
 
@@ -1077,12 +1078,7 @@ def forward(self, xs, y):
         f = Module()
         inputs = (torch.ones(2, 2), torch.ones(2, 2))
         orig_res = f(*inputs)
-        orig = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        )
+        orig = to_edge(export(f, inputs, strict=True))
         partitioned = orig
         partitioned = partitioned.to_backend(AddMulPartitionerDemo())
 
@@ -1151,12 +1147,7 @@ def forward(self, xs, pred1, pred2, y):
 
         f = Module()
         orig_res = f(*inputs)
-        orig = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        )
+        orig = to_edge(export(f, inputs, strict=True))
         partitioned = orig
         partitioned = partitioned.to_backend(AddMulPartitionerDemo())
 
@@ -1219,7 +1210,7 @@ def forward(self, x: List[torch.Tensor]):
 
         f = Module()
         inputs = ([torch.randn(2, 2), torch.randn(2, 2)],)
-        edge_prog = to_edge(export(f, inputs))
+        edge_prog = to_edge(export(f, inputs, strict=True))
         lowered_gm = to_backend(
             BackendWithCompilerDemo.__name__, edge_prog.exported_program(), []
         )
@@ -1232,7 +1223,7 @@ def __init__(self):
             def forward(self, x: List[torch.Tensor]):
                 return self.lowered(x)
 
-        gm = to_edge(export(ComposedM(), inputs))
+        gm = to_edge(export(ComposedM(), inputs, strict=True))
         gm.exported_program().module()(*inputs)
 
     def test_dict_input(self):
@@ -1243,7 +1234,7 @@ def forward(self, x: Dict[str, torch.Tensor]):
 
         f = Module()
         inputs = ({"a": torch.randn(2, 2), "b": torch.randn(2, 2)},)
-        edge_prog = to_edge(export(f, inputs))
+        edge_prog = to_edge(export(f, inputs, strict=True))
         lowered_gm = to_backend(
             BackendWithCompilerDemo.__name__, edge_prog.exported_program(), []
         )
@@ -1256,5 +1247,5 @@ def __init__(self):
             def forward(self, x: List[torch.Tensor]):
                 return self.lowered(x)
 
-        gm = to_edge(export(ComposedM(), inputs))
+        gm = to_edge(export(ComposedM(), inputs, strict=True))
         gm.exported_program().module()(*inputs)
diff --git a/exir/backend/test/test_compatibility.py b/exir/backend/test/test_compatibility.py
index 97f3e2b51b7..9d87aa5be0e 100644
--- a/exir/backend/test/test_compatibility.py
+++ b/exir/backend/test/test_compatibility.py
@@ -32,7 +32,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
diff --git a/exir/backend/test/test_graph_partition.py b/exir/backend/test/test_graph_partition.py
index 401e1c0307c..87dd6dc729c 100644
--- a/exir/backend/test/test_graph_partition.py
+++ b/exir/backend/test/test_graph_partition.py
@@ -25,7 +25,7 @@ def get_graph_module(
     ) -> torch.fx.GraphModule:
         graph_module = (
             to_edge(
-                export(module, inputs),
+                export(module, inputs, strict=True),
                 compile_config=EdgeCompileConfig(
                     _check_ir_validity=False,
                 ),
@@ -70,7 +70,6 @@ def extract_partition_list(
         supported_modules: List[torch.nn.Module],
         op_support: Optional[OperatorSupportBase] = None,
     ) -> List:
-
         node_list = self.get_node_list(graph_module, supported_modules)
 
         partition_list = generate_partitions_from_list_of_nodes(
diff --git a/exir/backend/test/test_lowered_backend_module.py b/exir/backend/test/test_lowered_backend_module.py
index 65b098f9550..dcc5841bc3e 100644
--- a/exir/backend/test/test_lowered_backend_module.py
+++ b/exir/backend/test/test_lowered_backend_module.py
@@ -58,7 +58,7 @@ def forward(self, *args):
 
         return (
             to_edge(
-                export(WrappedModule(), example_inputs),
+                export(WrappedModule(), example_inputs, strict=True),
                 compile_config=edge_compile_config,
             )
             .to_executorch()
@@ -78,10 +78,7 @@ def forward(self, x):
         model_inputs = (torch.ones(1),)
         expected_res = sin_module(*model_inputs)
         edgeir_m = to_edge(
-            export(
-                sin_module,
-                model_inputs,
-            ),
+            export(sin_module, model_inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _use_edge_ops=True
             ),
@@ -133,7 +130,8 @@ def test_emit_lowered_backend_module(self, unlift):
             model_inputs = model.get_random_inputs()
 
             edgeir_m = to_edge(
-                export(model, model_inputs), compile_config=edge_compile_config
+                export(model, model_inputs, strict=True),
+                compile_config=edge_compile_config,
             )
             lowered_model = to_backend(
                 QnnBackend.__name__, edgeir_m.exported_program(), []
@@ -189,7 +187,8 @@ def test_emit_nested_lowered_backend_module(self, unlift):
             model_inputs = model.get_random_inputs()
 
             edgeir_m = to_edge(
-                export(model, model_inputs), compile_config=edge_compile_config
+                export(model, model_inputs, strict=True),
+                compile_config=edge_compile_config,
             )
             lowered_module = to_backend(
                 QnnBackend.__name__, edgeir_m.exported_program(), []
@@ -206,7 +205,8 @@ def forward(self, *args):
 
             wrapped_module = WrappedModule(lowered_module)
             wrapped_module_edge = to_edge(
-                export(wrapped_module, model_inputs), compile_config=edge_compile_config
+                export(wrapped_module, model_inputs, strict=True),
+                compile_config=edge_compile_config,
             )
 
             nested_lowered_model = to_backend(
diff --git a/exir/backend/test/test_partitioner.py b/exir/backend/test/test_partitioner.py
index da1ae0444dd..917dae32d74 100644
--- a/exir/backend/test/test_partitioner.py
+++ b/exir/backend/test/test_partitioner.py
@@ -77,7 +77,7 @@ def partition(
         mlp = MLP()
         example_inputs = mlp.get_random_inputs()
         model = export_for_training(mlp, example_inputs).module()
-        aten = export(model, example_inputs)
+        aten = export(model, example_inputs, strict=True)
         spec_key = "path"
         spec_value = "/a/b/c/d"
         spec = MappingProxyType({spec_key: spec_value})
@@ -138,7 +138,7 @@ def partition(
         mlp = MLP()
         example_inputs = mlp.get_random_inputs()
         model = export_for_training(mlp, example_inputs).module()
-        aten = export(model, example_inputs)
+        aten = export(model, example_inputs, strict=True)
         edge = exir.to_edge(aten)
 
         with self.assertRaisesRegex(
@@ -178,7 +178,7 @@ def partition(
         mlp = MLP()
         example_inputs = mlp.get_random_inputs()
         model = export_for_training(mlp, example_inputs).module()
-        edge = exir.to_edge(export(model, example_inputs))
+        edge = exir.to_edge(export(model, example_inputs, strict=True))
 
         with self.assertRaisesRegex(
             RuntimeError,
@@ -230,7 +230,7 @@ def partition(
                 )
 
         model = export_for_training(self.AddConst(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         delegated = edge.to_backend(PartitionerNoTagData())
 
         # Check Owning Program still owns all constant data
@@ -309,7 +309,7 @@ def partition(
                 )
 
         model = export_for_training(self.AddConst(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         delegated = edge.to_backend(PartitionerTagData())
 
         # Check Owning Program still owns all constant data
@@ -384,7 +384,7 @@ def partition(
                 )
 
         model = export_for_training(self.AddConst(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         delegated = edge.to_backend(PartitionerTagData())
 
         # Check Owning Program still owns only buffers
@@ -472,7 +472,7 @@ def partition(
 
         inputs = (torch.ones(2, 2),)
         model = export_for_training(ReuseConstData(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         exec_prog = edge.to_backend(PartitionerTagData()).to_executorch()
         executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
         inputs_flattened, _ = tree_flatten(inputs)
@@ -532,7 +532,7 @@ def partition(
                 )
 
         model = export_for_training(ReuseConstData(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         with self.assertRaises(RuntimeError) as error:
             _ = edge.to_backend(PartitionerTagData())
 
@@ -558,10 +558,7 @@ def forward(self, x):
                 return y
 
         edge = exir.to_edge(
-            torch.export.export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
+            torch.export.export(MutableStateModule(), (torch.zeros(1),), strict=True)
         )
         self.assertGreater(
             len(edge.exported_program().graph_signature.buffers_to_mutate),
@@ -635,7 +632,9 @@ def forward(self, x):
 
         model_inputs = (torch.ones(3, 3),)
         orig_res = TestModule()(*model_inputs)
-        edge_program = exir.to_edge(torch.export.export(TestModule(), model_inputs))
+        edge_program = exir.to_edge(
+            torch.export.export(TestModule(), model_inputs, strict=True)
+        )
         lowered = edge_program.to_backend(AddAttributePartitionerDemo())
 
         self.assertTrue(
@@ -684,7 +683,7 @@ def forward(self, q, k_val, input_pos):
         model = Model()
         model.eval()
 
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
         exir_program_aten.module()(*example_inputs)
         edge_program_manager = exir.to_edge(exir_program_aten)
         lowered = edge_program_manager.to_backend(AllNodesPartitionerDemo())
@@ -726,7 +725,7 @@ def forward(self, x):
         model.eval()
 
         example_inputs = (torch.randn(SHAPE),)
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
         edge_program_manager = exir.to_edge(exir_program_aten)
         with self.assertRaises(AssertionError):
             edge_program_manager.to_backend(AddAttributePartitionerDemo())
diff --git a/exir/backend/test/test_passes.py b/exir/backend/test/test_passes.py
index 4dcc7757faa..bc18f090238 100644
--- a/exir/backend/test/test_passes.py
+++ b/exir/backend/test/test_passes.py
@@ -18,7 +18,6 @@
 
 class TestPasses(unittest.TestCase):
     def test_duplicate_constant_node_pass(self):
-
         class ReuseConstData(torch.nn.Module):
             def __init__(self):
                 super().__init__()
@@ -30,7 +29,9 @@ def forward(self, x):
                 return y, z
 
         model = export_for_training(ReuseConstData(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(torch.export.export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(
+            torch.export.export(model, (torch.ones(2, 2),), strict=True)
+        )
 
         const_nodes = [
             node.name
diff --git a/exir/backend/test/test_utils.py b/exir/backend/test/test_utils.py
index 0fc522dd68e..e449809ede8 100644
--- a/exir/backend/test/test_utils.py
+++ b/exir/backend/test/test_utils.py
@@ -94,20 +94,14 @@ def forward(self, x, y):
 
         graph_module_1: torch.fx.GraphModule = (
             to_edge(
-                export(
-                    MyModule1(),
-                    (torch.rand(3, 4), torch.rand(3, 4)),
-                )
+                export(MyModule1(), (torch.rand(3, 4), torch.rand(3, 4)), strict=True)
             )
             .exported_program()
             .graph_module
         )
         graph_module_2: torch.fx.GraphModule = (
             to_edge(
-                export(
-                    MyModule2(),
-                    (torch.rand(3, 4), torch.rand(3, 4)),
-                )
+                export(MyModule2(), (torch.rand(3, 4), torch.rand(3, 4)), strict=True)
             )
             .exported_program()
             .graph_module
@@ -131,10 +125,7 @@ def forward(self, x):
 
         large_model = (
             to_edge(
-                export(
-                    LargeModel(),
-                    inputs,
-                ),
+                export(LargeModel(), inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .exported_program()
@@ -143,7 +134,7 @@ def forward(self, x):
 
         pattern = (
             to_edge(
-                export(torch.nn.Linear(3, 3), inputs),
+                export(torch.nn.Linear(3, 3), inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .exported_program()
@@ -179,10 +170,7 @@ def partition(
                 )
 
         exported_program = to_edge(
-            export(
-                torch.nn.Linear(3, 3),
-                (torch.randn(3, 3),),
-            )
+            export(torch.nn.Linear(3, 3), (torch.randn(3, 3),), strict=True)
         )
 
         error_msg = r"needs a `partition_tags` field containing a mapping of tags to delegate spec"
@@ -216,7 +204,7 @@ def forward(self, a, x, b):
 
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        edge = to_edge(export(m, inputs))
+        edge = to_edge(export(m, inputs, strict=True))
         edge = edge.to_backend(AddMulPartitionerDemo())
         number_of_cpu_nodes = get_non_lowered_nodes(edge.exported_program().graph)
         # Only sub is not not lowerable
@@ -237,7 +225,7 @@ def forward(self, a, x, b):
 
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        edge = to_edge(export(m, inputs))
+        edge = to_edge(export(m, inputs, strict=True))
         edge = edge.to_backend(AddMulPartitionerDemo())
         number_of_delegates = get_delegates(edge.exported_program().graph)
         # there will be 2 delegates: (mm + add) -> sub -> (mm + add)
@@ -259,7 +247,9 @@ def forward(self, a, x, b):
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
 
-        edge = to_edge(export(m, inputs)).to_backend(AddMulPartitionerDemo())
+        edge = to_edge(export(m, inputs, strict=True)).to_backend(
+            AddMulPartitionerDemo()
+        )
 
         graph_str = format_delegated_graph(edge.exported_program().graph_module)
         self.assertIn(
diff --git a/exir/capture/_capture.py b/exir/capture/_capture.py
index 3c72256a33c..975191f0744 100644
--- a/exir/capture/_capture.py
+++ b/exir/capture/_capture.py
@@ -210,10 +210,11 @@ def capture(  # noqa: C901
                         cast(torch.nn.Module, f.__self__),
                         args,
                         dynamic_shapes=dynamic_shapes,
+                        strict=True,
                     )
             else:
                 mod = f if isinstance(f, torch.nn.Module) else WrapperModule(f)
-                ep = export(mod, args, dynamic_shapes=dynamic_shapes)
+                ep = export(mod, args, dynamic_shapes=dynamic_shapes, strict=True)
 
             ep = ep.run_decompositions(_default_decomposition_table())
             ep = _transform(ep, ReplaceViewOpsWithViewCopyOpsPass())
diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
index fc10c1db66f..2fb11393db4 100644
--- a/exir/emit/test/test_emit.py
+++ b/exir/emit/test/test_emit.py
@@ -154,12 +154,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(
-                export(
-                    f,
-                    (torch.ones(3, 2), torch.zeros(3, 2)),
-                )
-            )
+            to_edge(export(f, (torch.ones(3, 2), torch.zeros(3, 2)), strict=True))
             .to_executorch()
             .executorch_program
         )
@@ -180,7 +175,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
     def test_basic_end_to_end(self) -> None:
         f = models.BasicSinMax()
         program = (
-            to_edge(export(f, f.get_random_inputs())).to_executorch().executorch_program
+            to_edge(export(f, f.get_random_inputs(), strict=True))
+            .to_executorch()
+            .executorch_program
         )
         exec_plan = program.execution_plan[0]
         ops = exec_plan.operators
@@ -210,7 +207,7 @@ def forward(
         f = Foo()
 
         x = (torch.randn(100),)
-        program = to_edge(export(f, x)).to_executorch().executorch_program
+        program = to_edge(export(f, x, strict=True)).to_executorch().executorch_program
         exec_plan = program.execution_plan[0]
         self.assertEqual(len(exec_plan.outputs), 4)
         self.assertEqual(len(exec_plan.inputs), 1)
@@ -230,7 +227,7 @@ class M(torch.nn.Module):
             def forward(self, x):
                 return [((1, 3, 1.2), True, [x + x, x * x], None)]
 
-        ep = torch.export.export(M(), (torch.ones(2, 3),))
+        ep = torch.export.export(M(), (torch.ones(2, 3),), strict=True)
         res = ep.module()(torch.ones(2, 3))
         self.assertEqual(res[0][0], (1, 3, 1.2))
         program = to_edge(ep).to_executorch().executorch_program
@@ -251,7 +248,7 @@ class M(torch.nn.Module):
             def forward(self, x, y, z):
                 return x + y, x + x, x + y + z
 
-        ep = torch.export.export(M(), (torch.ones(2, 3), 2, True))
+        ep = torch.export.export(M(), (torch.ones(2, 3), 2, True), strict=True)
         ep.module()(torch.ones(2, 3), 2, True)
         program = to_edge(ep).to_executorch().executorch_program
         inputs = program.execution_plan[0].inputs
@@ -270,7 +267,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         inputs = (torch.ones((10, 10)),)
-        edge = to_edge(export(f, inputs))
+        edge = to_edge(export(f, inputs, strict=True))
 
         removed_ops = ["aten::relu_", "aten::view"]
         expected_ops = [
@@ -319,7 +316,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(2, 2),)
 
-        program = to_edge(export(model, inputs)).to_executorch().executorch_program
+        program = (
+            to_edge(export(model, inputs, strict=True))
+            .to_executorch()
+            .executorch_program
+        )
 
         self.assertEqual(len(program.execution_plan[0].operators), 2)
 
@@ -333,7 +334,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(export(f, (torch.randn(2, 3, 5),)))
+            to_edge(export(f, (torch.randn(2, 3, 5),), strict=True))
             .to_executorch()
             .executorch_program
         )
@@ -359,7 +360,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(export(f, (torch.randn(3, 5),))).to_executorch().executorch_program
+            to_edge(export(f, (torch.randn(3, 5),), strict=True))
+            .to_executorch()
+            .executorch_program
         )
         # The value for beta should appear before alpha
         self.assertEqual(program.execution_plan[0].values[12].val, Int(3))
@@ -378,7 +381,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         x, _ = torch.sort(torch.randn(3, 4))
-        program = to_edge(export(f, (x,))).to_executorch().executorch_program
+        program = (
+            to_edge(export(f, (x,), strict=True)).to_executorch().executorch_program
+        )
         # The value for right should appear before side
         self.assertEqual(program.execution_plan[0].values[6].val, Bool(False))
         self.assertEqual(program.execution_plan[0].values[7].val, Bool(True))
@@ -402,7 +407,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(export(f, (torch.ones(3), torch.ones(3))))
+            to_edge(export(f, (torch.ones(3), torch.ones(3)), strict=True))
             .to_executorch()
             .executorch_program
         )
@@ -429,7 +434,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(2, 2, dtype=torch.int32),)
 
         # Trace to FX Graph.
-        program = to_edge(export(model_out, inputs)).to_executorch().executorch_program
+        program = (
+            to_edge(export(model_out, inputs, strict=True))
+            .to_executorch()
+            .executorch_program
+        )
 
         self.assertEqual(len(program.execution_plan[0].chains[0].instructions), 2)
         self._assertCallLength(program, 0, 4)
@@ -449,7 +458,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         h = Foo()
 
         x = (torch.randn(3, 2),)
-        exec_prog = to_edge(export(h, x)).to_executorch(
+        exec_prog = to_edge(export(h, x, strict=True)).to_executorch(
             exir.ExecutorchBackendConfig(emit_stacktrace=True)
         )
         program = exec_prog.executorch_program
@@ -497,7 +506,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         h = Hoo()
 
         x = (torch.randn(3, 2),)
-        program = to_edge(export(h, x)).to_executorch().executorch_program
+        program = to_edge(export(h, x, strict=True)).to_executorch().executorch_program
 
         # Check the stacktrace is None since we did not specify to get the stacktrace
         self.assertTrue(program.execution_plan[0].chains[0].stacktrace is None)
@@ -512,9 +521,10 @@ def forward(self, x: torch.Tensor, n: torch.Tensor) -> torch.Tensor:
 
         x = torch.randn(3, 2)
         program = (
-            to_edge(export(f, (x, x)))
+            to_edge(export(f, (x, x), strict=True))
             # .to_edge(self.compile_config)  # TODO(larryliu): fix cat
-            .to_executorch().executorch_program
+            .to_executorch()
+            .executorch_program
         )
 
         self.assertEqual(len(program.execution_plan[0].chains[0].instructions), 1)
@@ -529,7 +539,7 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         f = Foo()
 
         x = (torch.randn(10),)
-        program = to_edge(export(f, x)).to_executorch().executorch_program
+        program = to_edge(export(f, x, strict=True)).to_executorch().executorch_program
         self._assertCallLength(program, 0, 8)
 
     def test_emit_layout(self) -> None:
@@ -540,7 +550,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         x = (torch.randn(3, 2),)
-        program = to_edge(export(f, x)).to_executorch().executorch_program
+        program = to_edge(export(f, x, strict=True)).to_executorch().executorch_program
 
         vals = program.execution_plan[0].values
         for val in vals:
@@ -560,7 +570,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = (torch.triu(torch.ones(2, 2)),)
         program = (
             to_edge(
-                export(f, x),
+                export(f, x, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .to_executorch()
@@ -578,7 +588,9 @@ def forward(self, x):
                 return torch.nn.functional.interpolate(x, scale_factor=2)
 
         x = (torch.randn(1, 1, 2, 2),)
-        program = to_edge(export(M(), x)).to_executorch().executorch_program
+        program = (
+            to_edge(export(M(), x, strict=True)).to_executorch().executorch_program
+        )
         self.assertIsInstance(
             program.execution_plan[0].values[-1].val, schema.OptionalTensorList
         )
@@ -600,7 +612,9 @@ def false_fn(y: torch.Tensor) -> torch.Tensor:
                 ret = control_flow.cond(pred, true_fn, false_fn, [x])
                 return ret
 
-        module = to_edge(export(M(), (torch.tensor(True), torch.ones(2, 2))))
+        module = to_edge(
+            export(M(), (torch.tensor(True), torch.ones(2, 2)), strict=True)
+        )
         program = module.to_executorch().executorch_program
 
         num_mm = 0
@@ -635,7 +649,7 @@ def map_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(4, 4), torch.ones(4))
         module = to_edge(
-            export(f, inputs),
+            export(f, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         program = module.to_executorch().executorch_program
@@ -708,7 +722,7 @@ def map_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(4, 4), torch.ones(4))
         module = to_edge(
-            export(f, inputs),
+            export(f, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         _load_for_executorch_from_buffer(module.to_executorch().buffer)
@@ -725,7 +739,7 @@ def map_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(4, 4), torch.ones(4))
         module = to_edge(
-            export(f, inputs),
+            export(f, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         buffer = module.to_executorch().buffer
@@ -746,7 +760,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5),)
         program = (
             to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .to_executorch()
@@ -790,12 +804,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         f = Add()
 
-        edge_program_manager = to_edge(
-            export(
-                f,
-                (torch.ones(3, 2),),
-            )
-        )
+        edge_program_manager = to_edge(export(f, (torch.ones(3, 2),), strict=True))
         edge_program_manager._edge_programs["forward"] = constant_prop_pass(
             edge_program_manager.exported_program()
         )
@@ -805,12 +814,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             .non_const_buffer_sizes
         )
 
-        edge_program_manager = to_edge(
-            export(
-                f,
-                (torch.ones(3, 2),),
-            )
-        )
+        edge_program_manager = to_edge(export(f, (torch.ones(3, 2),), strict=True))
         non_const_buffer_size_without_const_prop_pass = (
             edge_program_manager.to_executorch()
             .executorch_program.execution_plan[0]
@@ -889,7 +893,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5),)
         try:
             to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             ).to_executorch()
         except:
@@ -908,7 +912,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5, 2, 1),)
         with self.assertRaises(InternalError):
             to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(
                     _check_ir_validity=False, _skip_dim_order=True
                 ),
@@ -916,7 +920,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         # Success if you use dim_order
         to_edge(
-            export(model, inputs),
+            export(model, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _skip_dim_order=False
             ),
@@ -939,12 +943,12 @@ def forward_sigmoid(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5),)
         with patch_forward(model, model.forward_relu):
             program_relu = to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             ).to_executorch()
         with patch_forward(model, model.forward_sigmoid):
             program_sigmoid = to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             ).to_executorch()
         exir_input = {
@@ -1003,9 +1007,11 @@ def forward_sigmoid(self, x: torch.Tensor) -> torch.Tensor:
         model = SimpleLinear()
         inputs = (torch.ones(10, 5),)
         with patch_forward(model, model.forward_relu):
-            program_relu = to_edge(export(model, inputs)).to_executorch()
+            program_relu = to_edge(export(model, inputs, strict=True)).to_executorch()
         with patch_forward(model, model.forward_sigmoid):
-            program_sigmoid = to_edge(export(model, inputs)).to_executorch()
+            program_sigmoid = to_edge(
+                export(model, inputs, strict=True)
+            ).to_executorch()
         exir_input = {
             "forward_relu": program_relu.exported_program(),
             "forward_sigmoid": program_sigmoid.exported_program(),
@@ -1056,10 +1062,7 @@ def make_program(
             inputs,
         ) -> "ExecutorchProgramManager":
             return to_edge(
-                export(
-                    WrapperModule(fn),
-                    inputs,
-                )
+                export(WrapperModule(fn), inputs, strict=True)
             ).to_executorch()
 
         program_a = make_program(model.a, inputs)
@@ -1106,11 +1109,7 @@ def forward(self, k: torch.Tensor) -> torch.Tensor:
         k = torch.rand(2, 4)
         dim0_k = Dim("dim0_k", max=3)
         dynamic_shapes = {"k": {0: dim0_k}}
-        captured = export(
-            func,
-            (k,),
-            dynamic_shapes=dynamic_shapes,
-        )
+        captured = export(func, (k,), dynamic_shapes=dynamic_shapes, strict=True)
         edge = to_edge(captured)
         from executorch.exir.passes import MemoryPlanningPass
 
@@ -1158,7 +1157,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         model = Simple()
         inputs = (torch.ones(10, 5),)
-        program = to_edge(export(model, inputs)).to_executorch()
+        program = to_edge(export(model, inputs, strict=True)).to_executorch()
         exir_input = {
             "forward": program.exported_program(),
         }
@@ -1232,7 +1231,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         self.assertEqual(len(merged_program.execution_plan[4].outputs), 2)
 
         merged_program = to_edge(
-            export(model, inputs), constant_methods=getters
+            export(model, inputs, strict=True), constant_methods=getters
         ).to_executorch()
         executorch_module = _load_for_executorch_from_buffer(merged_program.buffer)
         torch.allclose(executorch_module.run_method("get_tensor", [])[0], tensor_output)
@@ -1243,10 +1242,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
     def test_emit_debug_handle_map(self) -> None:
         mul_model = Mul()
         program_mul = to_edge(
-            export(
-                mul_model,
-                mul_model.get_random_inputs(),
-            )
+            export(mul_model, mul_model.get_random_inputs(), strict=True)
         ).to_executorch()
         # this triggers the actual emission of the graph
         program_mul._emitter_output.program
@@ -1263,10 +1259,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         mul_model = SimpleAddMul()
         program_mul = to_edge(
-            export(
-                mul_model,
-                (torch.ones(2, 2),),
-            )
+            export(mul_model, (torch.ones(2, 2),), strict=True)
         ).to_executorch()
 
         # this triggers the actual emission of the graph
@@ -1317,7 +1310,7 @@ def forward(self, x):
 
         inputs = ([torch.ones(2, 2), torch.ones(2, 2)],)
         model = TestModel()
-        edgeir_m = to_edge(export(model, inputs))
+        edgeir_m = to_edge(export(model, inputs, strict=True))
         lowered_module = to_backend(
             "BackendWithCompilerExample", edgeir_m.exported_program(), []
         )
@@ -1332,7 +1325,7 @@ def forward(self, list_a):
 
         composite_model = CompositeModule()
         exec_prog = to_edge(
-            export(composite_model, inputs),
+            export(composite_model, inputs, strict=True),
         ).to_executorch()
         exec_prog.buffer
 
@@ -1359,7 +1352,7 @@ def forward(self, input):  # a, x, b):
 
         model_inputs = ((torch.ones(2, 2), 2 * torch.ones(2, 2), 3 * torch.ones(2, 2)),)
         model = AddMulModule()
-        edgeir_m = to_edge(export(model, model_inputs))
+        edgeir_m = to_edge(export(model, model_inputs, strict=True))
         lowered_module = to_backend(
             "BackendWithCompilerExample", edgeir_m.exported_program(), []
         )
@@ -1374,7 +1367,7 @@ def forward(self, list_a):
 
         composite_model = CompositeModule()
         exec_prog = to_edge(
-            export(composite_model, model_inputs),
+            export(composite_model, model_inputs, strict=True),
         ).to_executorch()
         exec_prog.buffer
 
@@ -1401,7 +1394,7 @@ def forward(self, x, y):
 
         inputs = (torch.ones(2, 2), torch.ones(2, 2))
         model = TestModel()
-        edgeir_m = to_edge(export(model, inputs))
+        edgeir_m = to_edge(export(model, inputs, strict=True))
         lowered_module = to_backend(
             "BackendWithCompilerExample", edgeir_m.exported_program(), []
         )
@@ -1416,7 +1409,7 @@ def forward(self, x, y):
 
         composite_model = CompositeModule()
         exec_prog = to_edge(
-            export(composite_model, inputs),
+            export(composite_model, inputs, strict=True),
         ).to_executorch()
         # Reading the program triggers the call to emit_program underneath which
         # we need to be done for our test to succeed.
@@ -1449,12 +1442,7 @@ def forward(self, x):
         self.assertEqual(model.W1.untyped_storage().nbytes(), 8)
         self.assertEqual(model.W2.nbytes, 4)
         self.assertEqual(model.W2.untyped_storage().nbytes(), 8)
-        program = to_edge(
-            export(
-                model,
-                (torch.ones(1),),
-            )
-        ).to_executorch()
+        program = to_edge(export(model, (torch.ones(1),), strict=True)).to_executorch()
 
         program = program._emitter_output.program
         # each emitted weight is not a view
@@ -1471,12 +1459,7 @@ def forward(self, x):
                 return x + self.buf
 
         model = NonPersistentBuffer()
-        program = to_edge(
-            export(
-                model,
-                (torch.ones(1),),
-            )
-        ).to_executorch()
+        program = to_edge(export(model, (torch.ones(1),), strict=True)).to_executorch()
         program = program._emitter_output.program
         # confirm that the buffer was emitted
         self.assertEqual(len(program.constant_buffer), 2)
@@ -1494,10 +1477,7 @@ def forward(self, x):
         model = LiftedConstants()
 
         program = to_edge(
-            export(
-                model,
-                (torch.ones(3, 2),),
-            )
+            export(model, (torch.ones(3, 2),), strict=True)
         ).to_executorch()
 
         program = program._emitter_output.program
@@ -1527,12 +1507,7 @@ def forward(self, x):
                 self.state.add_(1)
                 return y
 
-        model = to_edge(
-            export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
-        )
+        model = to_edge(export(MutableStateModule(), (torch.zeros(1),), strict=True))
         model = model.to_executorch()
         model.dump_executorch_program(True)
         self.assertTrue(
@@ -1563,12 +1538,7 @@ def forward(self, x):
                 self.state.add_(1)
                 return y
 
-        model = to_edge(
-            export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
-        )
+        model = to_edge(export(MutableStateModule(), (torch.zeros(1),), strict=True))
         model = model.to_executorch(
             config=ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
@@ -1594,12 +1564,7 @@ def forward(self, x):
                 masked_weights = x.masked_fill(self.mask == 0, float("-inf"))
                 return masked_weights
 
-        model = to_edge(
-            export(
-                InfinityMaskModel(),
-                (torch.randn(2, 2),),
-            )
-        )
+        model = to_edge(export(InfinityMaskModel(), (torch.randn(2, 2),), strict=True))
 
         # Confirm that we can serialize the model with infinity in it.
         model = model.to_executorch()
@@ -1623,7 +1588,7 @@ def forward(self, x):
                 x.add_(1)
 
         model = to_edge(
-            export(MutateInputTensorModule(), (torch.zeros(1),))
+            export(MutateInputTensorModule(), (torch.zeros(1),), strict=True)
         ).to_executorch(
             config=ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False)
@@ -1643,7 +1608,9 @@ def __init__(self):
             def forward(self, x):
                 return self.linear(x)
 
-        model = to_edge(export(LinearModule(), (torch.ones(5, 5),))).to_executorch(
+        model = to_edge(
+            export(LinearModule(), (torch.ones(5, 5),), strict=True)
+        ).to_executorch(
             config=ExecutorchBackendConfig(
                 external_constants=True,
             )
diff --git a/exir/program/test/test_fake_program.py b/exir/program/test/test_fake_program.py
index 15959efde47..5ad5d102b42 100644
--- a/exir/program/test/test_fake_program.py
+++ b/exir/program/test/test_fake_program.py
@@ -30,8 +30,7 @@ def forward(self, arg) -> torch.Tensor:
 
     linear = Linear()
     exported_program = export(
-        linear,
-        args=(torch.randn(10, 10),),
+        linear, args=(torch.randn(10, 10),), strict=True
     ).run_decompositions()
     return exported_program
 
diff --git a/exir/program/test/test_program.py b/exir/program/test/test_program.py
index d07972f971a..046ad03e757 100644
--- a/exir/program/test/test_program.py
+++ b/exir/program/test/test_program.py
@@ -166,11 +166,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             torch.ones(1),
             torch.zeros(1),
         ),
+        strict=True,
     ).run_decompositions()
-    programs["foo"] = export(
-        foo,
-        (torch.ones(1),),
-    ).run_decompositions()
+    programs["foo"] = export(foo, (torch.ones(1),), strict=True).run_decompositions()
     return programs
 
 
@@ -289,7 +287,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
                 return x * 3.14
 
         mul = Mul()
-        ep = to_edge(torch.export.export(mul, (torch.ones(1),))).exported_program()
+        ep = to_edge(
+            torch.export.export(mul, (torch.ones(1),), strict=True)
+        ).exported_program()
         for node in ep.graph.nodes:
             self.assertNotEqual(node.op, "get_attr")
         self.assertEqual(
@@ -306,7 +306,7 @@ def forward(self, x, y):
                 torch._check(z < 4)
                 return x[z : z + y.shape[0]]
 
-        ep = torch.export.export(M(), (torch.randn(10), torch.tensor([3])))
+        ep = torch.export.export(M(), (torch.randn(10), torch.tensor([3])), strict=True)
 
         edge_manager = to_edge(
             ep, compile_config=exir.EdgeCompileConfig(_check_ir_validity=False)
@@ -350,7 +350,6 @@ def test_edge_manager_transform(self):
         )
 
     def test_issue_3659(self):
-
         class Mul(torch.nn.Module):
             def __init__(self):
                 super(Mul, self).__init__()
@@ -371,7 +370,10 @@ def get_dynamic_shapes(self):
 
         model = Mul()
         ep = torch.export.export(
-            model, model.get_example_inputs(), dynamic_shapes=model.get_dynamic_shapes()
+            model,
+            model.get_example_inputs(),
+            dynamic_shapes=model.get_dynamic_shapes(),
+            strict=True,
         )
 
         to_edge(
@@ -549,7 +551,7 @@ def _test_edge_dialect_verifier(
         if not isinstance(callable, torch.nn.Module):
             callable = WrapperModule(callable)
 
-        exported_foo = export(callable, inputs)
+        exported_foo = export(callable, inputs, strict=True)
         _ = to_edge(exported_foo, compile_config=edge_compile_config)
 
     def test_edge_dialect_custom_op(self):
@@ -697,7 +699,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         from torch._export.verifier import SpecViolationError
 
         input = torch.arange(9, dtype=torch.float) - 4
-        ep = torch.export.export(LinalgNorm(), (input,))
+        ep = torch.export.export(LinalgNorm(), (input,), strict=True)
 
         # aten::linalg_norm is not a core op, so it should error out
         with self.assertRaises(SpecViolationError):
@@ -744,7 +746,7 @@ def count_nodes(graph_module, target):
 
     def test_to_edge_with_single_preserved_op(self):
         model = TestLinear()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -759,7 +761,7 @@ def test_to_edge_with_single_preserved_op(self):
 
     def test_to_edge_with_partial_ops_preserved(self):
         model = TestLinearSDPACombined()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -774,7 +776,7 @@ def test_to_edge_with_partial_ops_preserved(self):
 
     def test_to_edge_with_multiple_ops_preserved(self):
         model = TestLinearSDPACombined()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -791,7 +793,7 @@ def test_to_edge_with_multiple_ops_preserved(self):
 
     def test_to_edge_with_preserved_ops_not_in_model(self):
         model = TestSDPA()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -806,7 +808,7 @@ def test_to_edge_with_preserved_ops_not_in_model(self):
 
     def test_save_fails(self):
         model = TestLinear()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
         edge = to_edge(program)
         et = edge.to_executorch()
         with self.assertRaises(ValueError):