From 389327a1d94786dd94ff0aa12a963efdea6f531d Mon Sep 17 00:00:00 2001
From: Yancheng Zheng <103552181+anakinxc@users.noreply.github.com>
Date: Mon, 24 Jun 2024 11:02:09 +0800
Subject: [PATCH] Repo sync (#743)

---
 .licenserc.yaml                               |   1 +
 .vscode/settings.json                         |   2 +-
 CHANGELOG.md                                  |   6 +
 bazel/repositories.bzl                        |   4 +-
 docs/development/pipeline.rst                 |  10 +-
 docs/reference/pphlo_doc.rst                  |   2 +-
 docs/reference/update_pphlo_doc.sh            |   4 +-
 docs/tutorials/quick_start.ipynb              |   4 +-
 examples/cpp/simple_pphlo.cc                  |   4 +-
 libspu/compiler/codegen/BUILD.bazel           |   2 +-
 libspu/compiler/core/BUILD.bazel              |   2 +-
 libspu/compiler/core/core.cc                  |   9 +-
 libspu/compiler/core/core.h                   |   5 +
 libspu/compiler/front_end/BUILD.bazel         |   4 +-
 libspu/compiler/front_end/fe.cc               |  18 +-
 libspu/compiler/front_end/hlo_importer.h      |   2 +-
 libspu/compiler/passes/BUILD.bazel            | 305 ---------------
 .../compiler/passes/decompose_comparison.cc   |  69 ----
 libspu/compiler/passes/decompose_minmax.cc    |  70 ----
 libspu/compiler/tests/BUILD.bazel             |   3 +-
 libspu/compiler/tests/hlo2pphlo/gather_s.mlir |  15 -
 libspu/compiler/tests/interpret/abs.mlir      |  44 +++
 libspu/compiler/tests/interpret/add.mlir      | 350 ++++++++++++++++++
 libspu/compiler/tests/interpret/and.mlir      | 275 ++++++++++++++
 libspu/compiler/tests/interpret/atan2.mlir    |  25 ++
 .../compiler/tests/interpret/broadcast.mlir   |   9 +
 libspu/compiler/tests/interpret/case.mlir     |  70 ++++
 libspu/compiler/tests/interpret/ceil.mlir     |  66 ++++
 libspu/compiler/tests/interpret/clamp.mlir    |  11 +
 .../compiler/tests/interpret/concatenate.mlir |  10 +
 libspu/compiler/tests/interpret/convert.mlir  |  23 ++
 .../compiler/tests/interpret/convolution.mlir | 108 ++++++
 libspu/compiler/tests/interpret/cosine.mlir   |  66 ++++
 libspu/compiler/tests/interpret/divide.mlir   |  75 ++++
 .../compiler/tests/interpret/dot_general.mlir |  51 +++
 .../tests/interpret/dynamic_slice.mlir        |  14 +
 .../tests/interpret/dynamic_update_slice.mlir |  17 +
 libspu/compiler/tests/interpret/equal.mlir    | 175 +++++++++
 .../compiler/tests/interpret/exponential.mlir |  22 ++
 .../interpret/exponential_minus_one.mlir      |  22 ++
 libspu/compiler/tests/interpret/floor.mlir    |  66 ++++
 .../tests/interpret/generate_mlir_tests.py    | 124 +++++++
 libspu/compiler/tests/interpret/greater.mlir  | 100 +++++
 .../tests/interpret/greater_equal.mlir        | 100 +++++
 libspu/compiler/tests/interpret/if.mlir       |  33 ++
 libspu/compiler/tests/interpret/iota.mlir     | 152 ++++++++
 libspu/compiler/tests/interpret/less.mlir     | 100 +++++
 .../compiler/tests/interpret/less_equal.mlir  | 100 +++++
 libspu/compiler/tests/interpret/log.mlir      |  22 ++
 .../tests/interpret/log_plus_one.mlir         |  22 ++
 libspu/compiler/tests/interpret/logistic.mlir |  22 ++
 libspu/compiler/tests/interpret/maximum.mlir  | 300 +++++++++++++++
 libspu/compiler/tests/interpret/minimum.mlir  | 300 +++++++++++++++
 libspu/compiler/tests/interpret/multiply.mlir | 300 +++++++++++++++
 libspu/compiler/tests/interpret/negate.mlir   | 242 ++++++++++++
 libspu/compiler/tests/interpret/not.mlir      | 242 ++++++++++++
 .../compiler/tests/interpret/not_equal.mlir   | 100 +++++
 libspu/compiler/tests/interpret/or.mlir       | 275 ++++++++++++++
 libspu/compiler/tests/interpret/pad.mlir      |  17 +
 libspu/compiler/tests/interpret/popcnt.mlir   |  22 ++
 libspu/compiler/tests/interpret/power.mlir    |  75 ++++
 libspu/compiler/tests/interpret/reduce.mlir   |  16 +
 .../tests/interpret/reduce_window.mlir        |  39 ++
 libspu/compiler/tests/interpret/reshape.mlir  |  88 +++++
 libspu/compiler/tests/interpret/reverse.mlir  |  11 +
 .../tests/interpret/round_nearest_afz.mlir    |  22 ++
 libspu/compiler/tests/interpret/rsqrt.mlir    |  22 ++
 libspu/compiler/tests/interpret/select.mlir   |  23 ++
 .../tests/interpret/select_and_scatter.mlir   |  31 ++
 .../interpret/shift_right_arithmetic.mlir     |  25 ++
 .../tests/interpret/shift_right_logical.mlir  |  25 ++
 libspu/compiler/tests/interpret/sign.mlir     |  44 +++
 libspu/compiler/tests/interpret/sine.mlir     |  66 ++++
 libspu/compiler/tests/interpret/slice.mlir    |  15 +
 libspu/compiler/tests/interpret/sort.mlir     |  19 +
 libspu/compiler/tests/interpret/sqrt.mlir     |  22 ++
 libspu/compiler/tests/interpret/subtract.mlir | 275 ++++++++++++++
 libspu/compiler/tests/interpret/tanh.mlir     |  66 ++++
 .../interpret/template/basic_binary.template  |  22 ++
 .../interpret/template/basic_unary.template   |  19 +
 .../tests/interpret/test_json/abs.json        |  39 ++
 .../tests/interpret/test_json/add.json        | 303 +++++++++++++++
 .../tests/interpret/test_json/and.json        | 237 ++++++++++++
 .../tests/interpret/test_json/arshift.json    |  27 ++
 .../tests/interpret/test_json/atan2.json      |  28 ++
 .../tests/interpret/test_json/ceil.json       |  57 +++
 .../tests/interpret/test_json/cosine.json     |  57 +++
 .../tests/interpret/test_json/divide.json     |  70 ++++
 .../tests/interpret/test_json/equal.json      | 153 ++++++++
 .../interpret/test_json/exponential.json      |  24 ++
 .../test_json/exponential_minus_one.json      |  23 ++
 .../tests/interpret/test_json/floor.json      |  57 +++
 .../tests/interpret/test_json/greater.json    |  90 +++++
 .../interpret/test_json/greater_equal.json    |  90 +++++
 .../tests/interpret/test_json/less.json       |  90 +++++
 .../tests/interpret/test_json/less_equal.json |  90 +++++
 .../tests/interpret/test_json/log.json        |  23 ++
 .../interpret/test_json/log_plus_one.json     |  23 ++
 .../tests/interpret/test_json/logistic.json   |  23 ++
 .../tests/interpret/test_json/max.json        | 261 +++++++++++++
 .../tests/interpret/test_json/min.json        | 261 +++++++++++++
 .../tests/interpret/test_json/multiply.json   | 261 +++++++++++++
 .../tests/interpret/test_json/negate.json     | 185 +++++++++
 .../tests/interpret/test_json/not.json        | 182 +++++++++
 .../tests/interpret/test_json/not_equal.json  |  90 +++++
 .../tests/interpret/test_json/or.json         | 237 ++++++++++++
 .../tests/interpret/test_json/popcnt.json     |  22 ++
 .../tests/interpret/test_json/power.json      |  71 ++++
 .../tests/interpret/test_json/reshape.json    |  70 ++++
 .../tests/interpret/test_json/round_afz.json  |  23 ++
 .../tests/interpret/test_json/rshift.json     |  27 ++
 .../tests/interpret/test_json/rsqrt.json      |  23 ++
 .../tests/interpret/test_json/sign.json       |  39 ++
 .../tests/interpret/test_json/sine.json       |  57 +++
 .../tests/interpret/test_json/sqrt.json       |  23 ++
 .../tests/interpret/test_json/subtract.json   | 240 ++++++++++++
 .../tests/interpret/test_json/tanh.json       |  57 +++
 .../tests/interpret/test_json/xor.json        | 237 ++++++++++++
 .../compiler/tests/interpret/transpose.mlir   |  30 ++
 libspu/compiler/tests/interpret/while.mlir    |  27 ++
 libspu/compiler/tests/interpret/xor.mlir      | 275 ++++++++++++++
 libspu/compiler/tests/lit.cfg.py              |   5 +-
 .../tests/optimizations/decompose_minmax.mlir |  17 -
 .../optimizations/expand_secret_gather.mlir   |  14 -
 .../no_expand_secret_gather.mlir              |  15 -
 .../{ => passes}/hlo2pphlo/binary_ops_pp.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/binary_ops_ps.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/binary_ops_ss.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/comparison_pp.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/comparison_ps.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/comparison_ss.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/complex_p.mlir     |   2 +-
 .../{ => passes}/hlo2pphlo/complex_s.mlir     |   2 +-
 .../{ => passes}/hlo2pphlo/conditional_p.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/conditional_s.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/dynamic_slice.mlir |   2 +-
 .../hlo2pphlo/empty_function.mlir             |   2 +-
 .../{ => passes}/hlo2pphlo/gather_p.mlir      |   4 +-
 .../tests/passes/hlo2pphlo/gather_s.mlir      |  15 +
 .../{ => passes}/hlo2pphlo/nullary_ops.mlir   |   2 +-
 .../{ => passes}/hlo2pphlo/reduce_p.mlir      |   2 +-
 .../{ => passes}/hlo2pphlo/reduce_s.mlir      |   2 +-
 .../hlo2pphlo/select_and_scatter.mlir         |   2 +-
 .../{ => passes}/hlo2pphlo/shape_ops.mlir     |   2 +-
 .../tests/{ => passes}/hlo2pphlo/sort_p.mlir  |   2 +-
 .../tests/{ => passes}/hlo2pphlo/sort_s.mlir  |   2 +-
 .../{ => passes}/hlo2pphlo/tenary_ops_p.mlir  |   2 +-
 .../hlo2pphlo/tenary_ops_s_1.mlir             |   2 +-
 .../hlo2pphlo/tenary_ops_s_2.mlir             |   2 +-
 .../{ => passes}/hlo2pphlo/unary_ops_p.mlir   |   2 +-
 .../{ => passes}/hlo2pphlo/unary_ops_s.mlir   |   2 +-
 .../{ => passes}/hlo2pphlo/vreduce_mixed.mlir |   2 +-
 .../{ => passes}/hlo2pphlo/vreduce_p.mlir     |   2 +-
 .../{ => passes}/hlo2pphlo/vreduce_s.mlir     |   2 +-
 .../tests/{ => passes}/hlo2pphlo/while_p.mlir |   2 +-
 .../tests/{ => passes}/hlo2pphlo/while_s.mlir |   2 +-
 .../passes/optimizations/canonicalize.mlir    |  98 +++++
 .../optimizations/convert_push_down.mlir      |   2 +-
 .../optimizations/decompose_ops.mlir}         |  44 ++-
 .../optimizations/expand_secret_gather.mlir   |  14 +
 .../optimizations/lower_mixed_type_op.mlir    |   2 +-
 .../no_expand_secret_gather.mlir              |  15 +
 .../optimizations/ops_negative.mlir           |   2 +-
 .../optimize_denominator_with_bcst.mlir       |   2 +-
 .../optimizations/optimize_maxpool.mlir       |   2 +-
 .../optimizations/optimize_select.mlir        |  10 +-
 .../optimizations/optimize_sqrt_to_rsqrt.mlir |   2 +-
 .../optimizations/partial_sort_to_topk.mlir   |   2 +-
 .../optimizations/pphlo_simple_dealloc.mlir   |   2 +-
 .../optimizations/reduce_truncation.mlir      |   2 +-
 .../optimizations/sort_lowering.mlir          |   2 +-
 libspu/compiler/tools/BUILD.bazel             |  35 +-
 .../tools/{pphlo-lsp.cc => spu-lsp.cc}        |   2 +-
 .../tools/{pphlo-opt.cc => spu-opt.cc}        |   4 +-
 libspu/compiler/tools/spu-translate.cc        | 259 +++++++++++++
 libspu/compiler/utils/BUILD.bazel             |  30 ++
 libspu/compiler/utils/utils.cc                |  27 ++
 libspu/compiler/utils/utils.h                 |  23 ++
 libspu/device/BUILD.bazel                     |  21 +-
 libspu/device/api.cc                          |   7 +-
 libspu/device/executor.cc                     |   2 -
 libspu/device/executor.h                      |  14 +
 libspu/device/intrinsic_table.h               |  28 ++
 libspu/device/pphlo/BUILD.bazel               |  41 +-
 libspu/device/pphlo/pphlo_executor.cc         |  92 ++---
 libspu/device/pphlo/pphlo_executor_test.cc    | 130 +------
 .../device/pphlo/pphlo_intrinsic_executor.cc  |  26 +-
 .../device/pphlo/pphlo_intrinsic_executor.h   |   2 +-
 libspu/device/pphlo/pphlo_verifier.cc         |   3 +-
 libspu/device/pphlo/pphlo_verifier.h          |   4 +-
 libspu/device/pphlo/pphlo_verifier_test.cc    |   2 +-
 libspu/device/utils/BUILD.bazel               |  52 +++
 .../device/{ => utils}/debug_dump_constant.cc |   2 +-
 .../device/{ => utils}/debug_dump_constant.h  |   0
 .../pphlo_executor_debug_runner.cc            |   2 +-
 .../pphlo_executor_test_runner.cc             |   2 +-
 .../pphlo_executor_test_runner.h              |   0
 libspu/dialect/pphlo/{ => IR}/BUILD.bazel     |   1 +
 .../dialect/pphlo/{ => IR}/assembly_format.cc |  74 +---
 .../dialect/pphlo/{ => IR}/assembly_format.h  |  28 +-
 libspu/dialect/pphlo/{ => IR}/attrs.cc        |   8 +-
 libspu/dialect/pphlo/{ => IR}/attrs.h         |   4 +-
 libspu/dialect/pphlo/{ => IR}/attrs.td        |   2 +-
 libspu/dialect/pphlo/{ => IR}/base_enums.cc   |   4 +-
 libspu/dialect/pphlo/{ => IR}/base_enums.h    |   2 +-
 libspu/dialect/pphlo/{ => IR}/base_enums.td   |   2 +-
 .../pphlo/{ => IR}/canonicalization.cc        | 226 ++++++++++-
 .../{ => IR}/canonicalization_patterns.td     |   2 +-
 libspu/dialect/pphlo/{ => IR}/dialect.cc      |  32 +-
 libspu/dialect/pphlo/{ => IR}/dialect.h       |   2 +-
 libspu/dialect/pphlo/{ => IR}/dialect.td      |   1 +
 libspu/dialect/pphlo/IR/fold.cc               | 159 ++++++++
 libspu/dialect/pphlo/{ => IR}/interface.h     |   2 +-
 libspu/dialect/pphlo/{ => IR}/interface.td    |   0
 libspu/dialect/pphlo/{ => IR}/ops.cc          |  21 +-
 libspu/dialect/pphlo/{ => IR}/ops.h           |   8 +-
 libspu/dialect/pphlo/{ => IR}/ops.td          |  51 ++-
 libspu/dialect/pphlo/{ => IR}/print_parse.cc  |   2 +-
 .../dialect/pphlo/{ => IR}/type_inference.cc  | 135 ++++++-
 libspu/dialect/pphlo/{ => IR}/types.cc        |  13 +-
 libspu/dialect/pphlo/{ => IR}/types.h         |   8 +-
 libspu/dialect/pphlo/{ => IR}/types.td        |   6 +-
 libspu/dialect/pphlo/fold.cc                  | 106 ------
 libspu/dialect/pphlo/transforms/BUILD.bazel   |  76 ++++
 .../pphlo/transforms}/convert_push_down.cc    |  14 +-
 .../dialect/pphlo/transforms/decompose_ops.cc | 144 +++++++
 .../pphlo/transforms/decompose_patterns.td    |  67 ++++
 .../pphlo/transforms}/expand_secret_gather.cc |  30 +-
 .../transforms}/hlo_legalize_to_pphlo.cc      | 319 ++++++++--------
 .../transforms/inline_secret_control_flow.cc  | 244 ++++++++++++
 .../pphlo/transforms}/insert_deallocation.cc  |  12 +-
 .../transforms}/lower_conversion_cast.cc      |  10 +-
 .../pphlo/transforms}/lower_mixed_type_op.cc  |  20 +-
 .../transforms}/map_stablehlo_to_pphlo_op.h   |  20 +-
 .../optimize_denominator_with_broadcast.cc    |  10 +-
 .../pphlo/transforms}/optimize_maxpool.cc     |  19 +-
 .../pphlo/transforms}/optimize_select.cc      |  29 +-
 .../transforms}/optimize_sqrt_plus_eps.cc     |  10 +-
 .../pphlo/transforms}/partial_sort_to_topk.cc |  16 +-
 .../pphlo/transforms}/pass_details.h          |   8 +-
 .../pphlo/transforms}/passes.h                |  20 +-
 .../pphlo/transforms}/passes.td               |  24 +-
 .../pphlo/transforms}/reduce_truncation.cc    |  19 +-
 .../pphlo/transforms}/register_passes.h       |   6 +-
 .../transforms}/rewrite_div_sqrt_patterns.cc  |  14 +-
 .../pphlo/transforms/rewrite_signbit.cc       | 124 +++++++
 .../pphlo/transforms}/sort_lowering.cc        |  14 +-
 .../pphlo/transforms}/value_visibility_map.cc |   4 +-
 .../pphlo/transforms}/value_visibility_map.h  |  12 +-
 .../pphlo/transforms}/visibility_inference.cc |   4 +-
 .../pphlo/transforms}/visibility_inference.h  |   8 +-
 libspu/dialect/utils/BUILD.bazel              |  29 ++
 libspu/dialect/utils/assembly_format.cc       | 100 +++++
 libspu/dialect/utils/assembly_format.h        |  63 ++++
 libspu/dialect/utils/utils.cc                 |  35 ++
 libspu/dialect/utils/utils.h                  |  34 ++
 libspu/kernel/BUILD.bazel                     |   1 -
 libspu/kernel/hal/fxp_approx.cc               |  64 ++++
 libspu/kernel/hal/fxp_approx.h                |   4 +
 libspu/kernel/hal/fxp_approx_test.cc          |  62 ++++
 libspu/kernel/hal/fxp_cleartext.cc            |  13 +-
 libspu/kernel/hal/fxp_cleartext.h             |   4 +
 libspu/kernel/hal/polymorphic.cc              |  16 +
 libspu/kernel/hal/polymorphic.h               |   8 +
 libspu/kernel/hal/polymorphic_test.cc         |  36 ++
 libspu/kernel/hlo/basic_unary.cc              |  19 +-
 libspu/kernel/hlo/basic_unary.h               |   7 +-
 libspu/kernel/hlo/basic_unary_test.cc         |   2 +
 libspu/mpc/aby3/oram.h                        |   4 +-
 libspu/version.h                              |   2 +-
 spu/experimental/drop_cached_var_impl.py      |   2 +-
 spu/experimental/make_cached_var_impl.py      |   2 +-
 spu/intrinsic/README.md                       |   3 +-
 spu/tests/distributed_test.py                 |   6 +-
 spu/tests/jnp_testbase.py                     |   7 +-
 spu/tests/spu_runtime_test.py                 |   2 +-
 spu/utils/distributed_impl.py                 |   4 +-
 277 files changed, 12159 insertions(+), 1445 deletions(-)
 delete mode 100644 libspu/compiler/passes/BUILD.bazel
 delete mode 100644 libspu/compiler/passes/decompose_comparison.cc
 delete mode 100644 libspu/compiler/passes/decompose_minmax.cc
 delete mode 100644 libspu/compiler/tests/hlo2pphlo/gather_s.mlir
 create mode 100644 libspu/compiler/tests/interpret/abs.mlir
 create mode 100644 libspu/compiler/tests/interpret/add.mlir
 create mode 100644 libspu/compiler/tests/interpret/and.mlir
 create mode 100644 libspu/compiler/tests/interpret/atan2.mlir
 create mode 100644 libspu/compiler/tests/interpret/broadcast.mlir
 create mode 100644 libspu/compiler/tests/interpret/case.mlir
 create mode 100644 libspu/compiler/tests/interpret/ceil.mlir
 create mode 100644 libspu/compiler/tests/interpret/clamp.mlir
 create mode 100644 libspu/compiler/tests/interpret/concatenate.mlir
 create mode 100644 libspu/compiler/tests/interpret/convert.mlir
 create mode 100644 libspu/compiler/tests/interpret/convolution.mlir
 create mode 100644 libspu/compiler/tests/interpret/cosine.mlir
 create mode 100644 libspu/compiler/tests/interpret/divide.mlir
 create mode 100644 libspu/compiler/tests/interpret/dot_general.mlir
 create mode 100644 libspu/compiler/tests/interpret/dynamic_slice.mlir
 create mode 100644 libspu/compiler/tests/interpret/dynamic_update_slice.mlir
 create mode 100644 libspu/compiler/tests/interpret/equal.mlir
 create mode 100644 libspu/compiler/tests/interpret/exponential.mlir
 create mode 100644 libspu/compiler/tests/interpret/exponential_minus_one.mlir
 create mode 100644 libspu/compiler/tests/interpret/floor.mlir
 create mode 100755 libspu/compiler/tests/interpret/generate_mlir_tests.py
 create mode 100644 libspu/compiler/tests/interpret/greater.mlir
 create mode 100644 libspu/compiler/tests/interpret/greater_equal.mlir
 create mode 100644 libspu/compiler/tests/interpret/if.mlir
 create mode 100644 libspu/compiler/tests/interpret/iota.mlir
 create mode 100644 libspu/compiler/tests/interpret/less.mlir
 create mode 100644 libspu/compiler/tests/interpret/less_equal.mlir
 create mode 100644 libspu/compiler/tests/interpret/log.mlir
 create mode 100644 libspu/compiler/tests/interpret/log_plus_one.mlir
 create mode 100644 libspu/compiler/tests/interpret/logistic.mlir
 create mode 100644 libspu/compiler/tests/interpret/maximum.mlir
 create mode 100644 libspu/compiler/tests/interpret/minimum.mlir
 create mode 100644 libspu/compiler/tests/interpret/multiply.mlir
 create mode 100644 libspu/compiler/tests/interpret/negate.mlir
 create mode 100644 libspu/compiler/tests/interpret/not.mlir
 create mode 100644 libspu/compiler/tests/interpret/not_equal.mlir
 create mode 100644 libspu/compiler/tests/interpret/or.mlir
 create mode 100644 libspu/compiler/tests/interpret/pad.mlir
 create mode 100644 libspu/compiler/tests/interpret/popcnt.mlir
 create mode 100644 libspu/compiler/tests/interpret/power.mlir
 create mode 100644 libspu/compiler/tests/interpret/reduce.mlir
 create mode 100644 libspu/compiler/tests/interpret/reduce_window.mlir
 create mode 100644 libspu/compiler/tests/interpret/reshape.mlir
 create mode 100644 libspu/compiler/tests/interpret/reverse.mlir
 create mode 100644 libspu/compiler/tests/interpret/round_nearest_afz.mlir
 create mode 100644 libspu/compiler/tests/interpret/rsqrt.mlir
 create mode 100644 libspu/compiler/tests/interpret/select.mlir
 create mode 100644 libspu/compiler/tests/interpret/select_and_scatter.mlir
 create mode 100644 libspu/compiler/tests/interpret/shift_right_arithmetic.mlir
 create mode 100644 libspu/compiler/tests/interpret/shift_right_logical.mlir
 create mode 100644 libspu/compiler/tests/interpret/sign.mlir
 create mode 100644 libspu/compiler/tests/interpret/sine.mlir
 create mode 100644 libspu/compiler/tests/interpret/slice.mlir
 create mode 100644 libspu/compiler/tests/interpret/sort.mlir
 create mode 100644 libspu/compiler/tests/interpret/sqrt.mlir
 create mode 100644 libspu/compiler/tests/interpret/subtract.mlir
 create mode 100644 libspu/compiler/tests/interpret/tanh.mlir
 create mode 100644 libspu/compiler/tests/interpret/template/basic_binary.template
 create mode 100644 libspu/compiler/tests/interpret/template/basic_unary.template
 create mode 100644 libspu/compiler/tests/interpret/test_json/abs.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/add.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/and.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/arshift.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/atan2.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/ceil.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/cosine.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/divide.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/equal.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/exponential.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/exponential_minus_one.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/floor.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/greater.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/greater_equal.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/less.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/less_equal.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/log.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/log_plus_one.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/logistic.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/max.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/min.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/multiply.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/negate.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/not.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/not_equal.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/or.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/popcnt.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/power.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/reshape.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/round_afz.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/rshift.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/rsqrt.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/sign.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/sine.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/sqrt.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/subtract.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/tanh.json
 create mode 100644 libspu/compiler/tests/interpret/test_json/xor.json
 create mode 100644 libspu/compiler/tests/interpret/transpose.mlir
 create mode 100644 libspu/compiler/tests/interpret/while.mlir
 create mode 100644 libspu/compiler/tests/interpret/xor.mlir
 delete mode 100644 libspu/compiler/tests/optimizations/decompose_minmax.mlir
 delete mode 100644 libspu/compiler/tests/optimizations/expand_secret_gather.mlir
 delete mode 100644 libspu/compiler/tests/optimizations/no_expand_secret_gather.mlir
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/binary_ops_pp.mlir (94%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/binary_ops_ps.mlir (95%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/binary_ops_ss.mlir (95%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/comparison_pp.mlir (94%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/comparison_ps.mlir (95%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/comparison_ss.mlir (95%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/complex_p.mlir (82%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/complex_s.mlir (85%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/conditional_p.mlir (91%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/conditional_s.mlir (93%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/dynamic_slice.mlir (73%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/empty_function.mlir (64%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/gather_p.mlir (66%)
 create mode 100644 libspu/compiler/tests/passes/hlo2pphlo/gather_s.mlir
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/nullary_ops.mlir (94%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/reduce_p.mlir (93%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/reduce_s.mlir (93%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/select_and_scatter.mlir (94%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/shape_ops.mlir (92%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/sort_p.mlir (87%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/sort_s.mlir (89%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/tenary_ops_p.mlir (84%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/tenary_ops_s_1.mlir (85%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/tenary_ops_s_2.mlir (85%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/unary_ops_p.mlir (95%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/unary_ops_s.mlir (95%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/vreduce_mixed.mlir (91%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/vreduce_p.mlir (90%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/vreduce_s.mlir (92%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/while_p.mlir (87%)
 rename libspu/compiler/tests/{ => passes}/hlo2pphlo/while_s.mlir (88%)
 create mode 100644 libspu/compiler/tests/passes/optimizations/canonicalize.mlir
 rename libspu/compiler/tests/{ => passes}/optimizations/convert_push_down.mlir (91%)
 rename libspu/compiler/tests/{optimizations/decompose_comparison.mlir => passes/optimizations/decompose_ops.mlir} (52%)
 create mode 100644 libspu/compiler/tests/passes/optimizations/expand_secret_gather.mlir
 rename libspu/compiler/tests/{ => passes}/optimizations/lower_mixed_type_op.mlir (96%)
 create mode 100644 libspu/compiler/tests/passes/optimizations/no_expand_secret_gather.mlir
 rename libspu/compiler/tests/{ => passes}/optimizations/ops_negative.mlir (98%)
 rename libspu/compiler/tests/{ => passes}/optimizations/optimize_denominator_with_bcst.mlir (84%)
 rename libspu/compiler/tests/{ => passes}/optimizations/optimize_maxpool.mlir (97%)
 rename libspu/compiler/tests/{ => passes}/optimizations/optimize_select.mlir (90%)
 rename libspu/compiler/tests/{ => passes}/optimizations/optimize_sqrt_to_rsqrt.mlir (96%)
 rename libspu/compiler/tests/{ => passes}/optimizations/partial_sort_to_topk.mlir (98%)
 rename libspu/compiler/tests/{ => passes}/optimizations/pphlo_simple_dealloc.mlir (85%)
 rename libspu/compiler/tests/{ => passes}/optimizations/reduce_truncation.mlir (89%)
 rename libspu/compiler/tests/{ => passes}/optimizations/sort_lowering.mlir (95%)
 rename libspu/compiler/tools/{pphlo-lsp.cc => spu-lsp.cc} (96%)
 rename libspu/compiler/tools/{pphlo-opt.cc => spu-opt.cc} (93%)
 create mode 100644 libspu/compiler/tools/spu-translate.cc
 create mode 100644 libspu/compiler/utils/BUILD.bazel
 create mode 100644 libspu/compiler/utils/utils.cc
 create mode 100644 libspu/compiler/utils/utils.h
 create mode 100644 libspu/device/intrinsic_table.h
 create mode 100644 libspu/device/utils/BUILD.bazel
 rename libspu/device/{ => utils}/debug_dump_constant.cc (97%)
 rename libspu/device/{ => utils}/debug_dump_constant.h (100%)
 rename libspu/device/{pphlo => utils}/pphlo_executor_debug_runner.cc (99%)
 rename libspu/device/{pphlo => utils}/pphlo_executor_test_runner.cc (97%)
 rename libspu/device/{pphlo => utils}/pphlo_executor_test_runner.h (100%)
 rename libspu/dialect/pphlo/{ => IR}/BUILD.bazel (99%)
 rename libspu/dialect/pphlo/{ => IR}/assembly_format.cc (57%)
 rename libspu/dialect/pphlo/{ => IR}/assembly_format.h (84%)
 rename libspu/dialect/pphlo/{ => IR}/attrs.cc (98%)
 rename libspu/dialect/pphlo/{ => IR}/attrs.h (93%)
 rename libspu/dialect/pphlo/{ => IR}/attrs.td (97%)
 rename libspu/dialect/pphlo/{ => IR}/base_enums.cc (82%)
 rename libspu/dialect/pphlo/{ => IR}/base_enums.h (94%)
 rename libspu/dialect/pphlo/{ => IR}/base_enums.td (96%)
 rename libspu/dialect/pphlo/{ => IR}/canonicalization.cc (67%)
 rename libspu/dialect/pphlo/{ => IR}/canonicalization_patterns.td (95%)
 rename libspu/dialect/pphlo/{ => IR}/dialect.cc (65%)
 rename libspu/dialect/pphlo/{ => IR}/dialect.h (94%)
 rename libspu/dialect/pphlo/{ => IR}/dialect.td (98%)
 create mode 100644 libspu/dialect/pphlo/IR/fold.cc
 rename libspu/dialect/pphlo/{ => IR}/interface.h (94%)
 rename libspu/dialect/pphlo/{ => IR}/interface.td (100%)
 rename libspu/dialect/pphlo/{ => IR}/ops.cc (97%)
 rename libspu/dialect/pphlo/{ => IR}/ops.h (90%)
 rename libspu/dialect/pphlo/{ => IR}/ops.td (97%)
 rename libspu/dialect/pphlo/{ => IR}/print_parse.cc (99%)
 rename libspu/dialect/pphlo/{ => IR}/type_inference.cc (70%)
 rename libspu/dialect/pphlo/{ => IR}/types.cc (91%)
 rename libspu/dialect/pphlo/{ => IR}/types.h (87%)
 rename libspu/dialect/pphlo/{ => IR}/types.td (97%)
 delete mode 100644 libspu/dialect/pphlo/fold.cc
 create mode 100644 libspu/dialect/pphlo/transforms/BUILD.bazel
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/convert_push_down.cc (92%)
 create mode 100644 libspu/dialect/pphlo/transforms/decompose_ops.cc
 create mode 100644 libspu/dialect/pphlo/transforms/decompose_patterns.td
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/expand_secret_gather.cc (97%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/hlo_legalize_to_pphlo.cc (88%)
 create mode 100644 libspu/dialect/pphlo/transforms/inline_secret_control_flow.cc
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/insert_deallocation.cc (95%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/lower_conversion_cast.cc (92%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/lower_mixed_type_op.cc (90%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/map_stablehlo_to_pphlo_op.h (78%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/optimize_denominator_with_broadcast.cc (93%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/optimize_maxpool.cc (95%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/optimize_select.cc (85%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/optimize_sqrt_plus_eps.cc (95%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/partial_sort_to_topk.cc (97%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/pass_details.h (79%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/passes.h (82%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/passes.td (87%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/reduce_truncation.cc (92%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/register_passes.h (84%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/rewrite_div_sqrt_patterns.cc (95%)
 create mode 100644 libspu/dialect/pphlo/transforms/rewrite_signbit.cc
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/sort_lowering.cc (93%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/value_visibility_map.cc (92%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/value_visibility_map.h (89%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/visibility_inference.cc (99%)
 rename libspu/{compiler/passes => dialect/pphlo/transforms}/visibility_inference.h (96%)
 create mode 100644 libspu/dialect/utils/BUILD.bazel
 create mode 100644 libspu/dialect/utils/assembly_format.cc
 create mode 100644 libspu/dialect/utils/assembly_format.h
 create mode 100644 libspu/dialect/utils/utils.cc
 create mode 100644 libspu/dialect/utils/utils.h

diff --git a/.licenserc.yaml b/.licenserc.yaml
index b64ffe28..92b3ce59 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -40,6 +40,7 @@ header: # <1>
     - '**/*.mlir'
     - '**/*.csv'
     - '**/*.tmpl'
+    - 'libspu/compiler/tests/interpret/template/**.template'
     - 'LICENSE'
     - 'NOTICE'
     - '.bazelversion'
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 76010311..02434d07 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -41,5 +41,5 @@
     "[python]": {
         "editor.defaultFormatter": "ms-python.black-formatter"
     },
-    "mlir.server_path": "bazel-bin/libspu/compiler/tools/pphlo-lsp"
+    "mlir.server_path": "bazel-bin/libspu/compiler/tools/spu-lsp"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 709cb3fa..0712ac66 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,12 @@
 
 ## TBD
 
+- [Feature] Support jax.numpy.bitwise_count
+- [Bugfix] Fix jax.numpy.signbit wrong answer with very large input
+
+## 20240621
+
+- [SPU] 0.9.1b0 release
 - [Feature] Add ORAM based dynamic_slice for ABY3
 - [Feature] Add Atan2Op support
 - [API] Add beaver cache support for semi2k (**experimental**)
diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl
index 0b40b95b..ca1b5349 100644
--- a/bazel/repositories.bzl
+++ b/bazel/repositories.bzl
@@ -136,8 +136,8 @@ def _bazel_skylib():
     )
 
 def _com_github_openxla_xla():
-    OPENXLA_COMMIT = "d9d0e780ff6a37c4d501c8e0e4f4a9fdca30cbd4"
-    OPENXLA_SHA256 = "77ef83491f409afbe549a2bd695d710a70fdf7f04db35eeb1fba3e97ef767113"
+    OPENXLA_COMMIT = "9b0dd58c9b625a2e958f4fc7787a1ff5c95dbb40"
+    OPENXLA_SHA256 = "f150c5b49e4d4497aae2c79232f1efe2baccaa72223b21dc8715be73eab74417"
 
     # We need openxla to handle xla/mhlo/stablehlo
     maybe(
diff --git a/docs/development/pipeline.rst b/docs/development/pipeline.rst
index d958c8e6..6f308587 100644
--- a/docs/development/pipeline.rst
+++ b/docs/development/pipeline.rst
@@ -39,17 +39,17 @@ The horizontal part depicts the data pipeline, from left to right.
 1. Data providers use :ref:`SPU io <reference/py_api:Runtime IO>` module to encrypt input data.
 
    * For SPU MPC backend, *encrypt* means to split plaintext data into shares.
-   * For floating point data, encoding to fixed-point may be also required.
+   * For floating-point data, encoding to fixed-point may be also required.
 
-2. The encrypted data is send to :ref:`SPU runtime <reference/py_api:Runtime Setup>`.
-3. The output data is fetched *result owner*, and decrypted by the :ref:`SPU io <reference/py_api:Runtime IO>` module.
+2. The encrypted data is sent to :ref:`SPU runtime <reference/py_api:Runtime Setup>`.
+3. The output data is fetched by *result owner*, and decrypted by the :ref:`SPU io <reference/py_api:Runtime IO>` module.
 
 
 Just in time
 ------------
 
-Jit is short for `Just-in-time compilation <https://en.wikipedia.org/wiki/Just-in-time_compilation>`_, with this approach, the compiler can get more information, such as input shapes, than in `AOT mode <https://en.wikipedia.org/wiki/Ahead-of-time_compilation>`_. Jit may introduce more evaluation overhead, but it's really trivial in secure computation setting.
+JIT is short for `Just-in-time compilation <https://en.wikipedia.org/wiki/Just-in-time_compilation>`_, with this approach, the compiler can get more information, such as input shapes, than in `AOT mode <https://en.wikipedia.org/wiki/Ahead-of-time_compilation>`_. JIT may introduce more evaluation overhead, but it's really trivial in secure computation setting.
 
-In SPU, jit has more benefits since the backend engine may be orders of magnitude faster if it knows the *visibility* of data. For example, when multiplying two secrets, the backend MPC engine may involve expensive *beaver triple* progress, but when one of the inputs (of multiply) is public known to all parties, the operation will be much faster. So we should *mark* as much data as possible to be *public* (if it doesn't need to be protected), and tell the compiler these information.
+In SPU, JIT has more benefits since the backend engine may be orders of magnitude faster if it knows the *visibility* of data. For example, when multiplying two secrets, the backend MPC engine may involve expensive *beaver triple* progress, but when one of the inputs (of multiply) is public known to all parties, the operation will be much faster. So we should *mark* as much data as possible to be *public* (if it doesn't need to be protected), and tell the compiler these information.
 
 So, SPU compilation normally happens after all data infeed is done, and `just in time` before the real evaluation.
diff --git a/docs/reference/pphlo_doc.rst b/docs/reference/pphlo_doc.rst
index f8a9e7db..751541e1 100644
--- a/docs/reference/pphlo_doc.rst
+++ b/docs/reference/pphlo_doc.rst
@@ -3,7 +3,7 @@ PPHlo API reference
 
 PPHlo is short for (SPU High level ops), it's the assembly language of SPU.
 
-PPHlo is built on `MLIR <https://mlir.llvm.org/>`_ infrastructure, the concrete ops definition could be found :spu_code_host:`here <spu/blob/main/libspu/dialect/pphlo/ops.td>`.
+PPHlo is built on `MLIR <https://mlir.llvm.org/>`_ infrastructure, the concrete ops definition could be found :spu_code_host:`here <spu/blob/main/libspu/dialect/pphlo/IR/ops.td>`.
 
 Op List
 ~~~~~~~
diff --git a/docs/reference/update_pphlo_doc.sh b/docs/reference/update_pphlo_doc.sh
index 30e45a8b..14f26857 100755
--- a/docs/reference/update_pphlo_doc.sh
+++ b/docs/reference/update_pphlo_doc.sh
@@ -18,7 +18,7 @@
 SCRIPT=`realpath $0`
 SCRIPTPATH=`dirname $SCRIPT`
 
-bazel build //libspu/dialect/pphlo:op_doc
+bazel build //libspu/dialect/pphlo/IR:op_doc
 
-cp `bazel info workspace`/bazel-bin/libspu/dialect/pphlo/op_doc.md $SCRIPTPATH/pphlo_op_doc.md
+cp `bazel info workspace`/bazel-bin/libspu/dialect/pphlo/IR/op_doc.md $SCRIPTPATH/pphlo_op_doc.md
 
diff --git a/docs/tutorials/quick_start.ipynb b/docs/tutorials/quick_start.ipynb
index b10af4b7..1570c09d 100644
--- a/docs/tutorials/quick_start.ipynb
+++ b/docs/tutorials/quick_start.ipynb
@@ -645,7 +645,7 @@
     }
    ],
    "source": [
-    "print(sigmoid.dump_pphlo(np.random.rand(3, 3)))"
+    "print(sigmoid.dump_ir(np.random.rand(3, 3)))"
    ]
   },
   {
@@ -698,7 +698,7 @@
    "source": [
     "X = ppd.device(\"P1\")(make_rand)()\n",
     "\n",
-    "print(sigmoid.dump_pphlo(X))"
+    "print(sigmoid.dump_ir(X))"
    ]
   },
   {
diff --git a/examples/cpp/simple_pphlo.cc b/examples/cpp/simple_pphlo.cc
index e3bd780a..a8259572 100644
--- a/examples/cpp/simple_pphlo.cc
+++ b/examples/cpp/simple_pphlo.cc
@@ -38,7 +38,7 @@ func.func @main() -> () {
     %0 = pphlo.constant dense<1> : tensor<i32>
     %1 = pphlo.constant dense<2> : tensor<i32>
     %2 = pphlo.add %0, %1 : tensor<i32>
-    pphlo.custom_call @dbg_print (%2) {has_side_effect = true} : (tensor<i32>)->()
+    pphlo.custom_call @spu.dbg_print (%2) {has_side_effect = true} : (tensor<i32>)->()
     return
 })";
 
@@ -76,7 +76,7 @@ void parameters(spu::SPUContext* sctx) {
   constexpr auto code = R"PPHlo(
 func.func @main(%arg0: tensor<!pphlo.secret<f32>>, %arg1: tensor<!pphlo.secret<i32>>) -> () {
   %0 = pphlo.multiply %arg0, %arg1 : (tensor<!pphlo.secret<f32>>, tensor<!pphlo.secret<i32>>) -> tensor<!pphlo.secret<f32>>
-  pphlo.custom_call @dbg_print (%0) {has_side_effect = true} : (tensor<!pphlo.secret<f32>>)->()
+  pphlo.custom_call @spu.dbg_print (%0) {has_side_effect = true} : (tensor<!pphlo.secret<f32>>)->()
   return
 })PPHlo";
 
diff --git a/libspu/compiler/codegen/BUILD.bazel b/libspu/compiler/codegen/BUILD.bazel
index fd0391ff..2585dae3 100644
--- a/libspu/compiler/codegen/BUILD.bazel
+++ b/libspu/compiler/codegen/BUILD.bazel
@@ -23,6 +23,6 @@ spu_cc_library(
     srcs = ["codegen.cc"],
     hdrs = ["codegen.h"],
     deps = [
-        "//libspu/dialect/pphlo:dialect",
+        "//libspu/dialect/pphlo/IR:dialect",
     ],
 )
diff --git a/libspu/compiler/core/BUILD.bazel b/libspu/compiler/core/BUILD.bazel
index ac4718a2..04adfe8c 100644
--- a/libspu/compiler/core/BUILD.bazel
+++ b/libspu/compiler/core/BUILD.bazel
@@ -21,7 +21,7 @@ spu_cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//libspu/compiler/common:compilation_context",
-        "//libspu/compiler/passes:all_passes",
+        "//libspu/dialect/pphlo/transforms:all_passes",
         "@llvm-project//mlir:Transforms",
     ],
 )
diff --git a/libspu/compiler/core/core.cc b/libspu/compiler/core/core.cc
index 32c3a47a..355dd68a 100644
--- a/libspu/compiler/core/core.cc
+++ b/libspu/compiler/core/core.cc
@@ -21,8 +21,8 @@
 #include "mlir/Transforms/Passes.h"
 
 #include "libspu/compiler/common/compilation_context.h"
-#include "libspu/compiler/passes/passes.h"
 #include "libspu/core/prelude.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace spu::compiler {
 
@@ -51,14 +51,17 @@ void Core::buildPipeline(mlir::PassManager *pm) {
     optPM.addPass(mlir::createCSEPass());
     optPM.addPass(mlir::spu::pphlo::createOptimizeMaxPoolingPass());
   }
-  optPM.addPass(mlir::spu::pphlo::createDecomposeComparisonPass());
-  optPM.addPass(mlir::spu::pphlo::createDecomposeMinMaxPass());
+  optPM.addPass(mlir::spu::pphlo::createDecomposeOps());
   optPM.addPass(mlir::spu::pphlo::createSortLowering());
 
   if (!options.disable_partial_sort_optimization()) {
     optPM.addPass(mlir::spu::pphlo::createPartialSortToTopK());
   }
 
+  optPM.addPass(mlir::spu::pphlo::createRewriteSignbitPatterns());
+
+  optPM.addPass(mlir::spu::pphlo::createInlineSecretControlFlow());
+
   if (!options.disable_sqrt_plus_epsilon_rewrite()) {
     optPM.addPass(mlir::spu::pphlo::createOptimizeSqrtPlusEps());
   }
diff --git a/libspu/compiler/core/core.h b/libspu/compiler/core/core.h
index 78dfde02..1afba093 100644
--- a/libspu/compiler/core/core.h
+++ b/libspu/compiler/core/core.h
@@ -34,6 +34,11 @@ class Core final {
 private:
   CompilationContext *ctx_;
 
+#ifdef EXPOSE_PIPELINE_BUILDER
+public:
+#else
+private: // NOLINT:
+#endif
   void buildPipeline(mlir::PassManager *pm);
 };
 
diff --git a/libspu/compiler/front_end/BUILD.bazel b/libspu/compiler/front_end/BUILD.bazel
index 399e30b8..8b68cf6d 100644
--- a/libspu/compiler/front_end/BUILD.bazel
+++ b/libspu/compiler/front_end/BUILD.bazel
@@ -71,8 +71,8 @@ spu_cc_library(
     deps = [
         ":hlo_importer",
         "//libspu/compiler/common:compilation_context",
-        "//libspu/compiler/passes:hlo_legalize_to_pphlo",
-        "//libspu/compiler/passes:lower_conversion_cast",
+        "//libspu/compiler/utils",
+        "//libspu/dialect/pphlo/transforms:all_passes",
         "@llvm-project//mlir:FuncExtensions",
         "@llvm-project//mlir:Parser",
         "@xla//xla/mlir_hlo:mhlo_passes",
diff --git a/libspu/compiler/front_end/fe.cc b/libspu/compiler/front_end/fe.cc
index ebd3546b..682f1c5b 100644
--- a/libspu/compiler/front_end/fe.cc
+++ b/libspu/compiler/front_end/fe.cc
@@ -29,21 +29,13 @@
 
 #include "libspu/compiler/common/compilation_context.h"
 #include "libspu/compiler/front_end/hlo_importer.h"
-#include "libspu/compiler/passes/passes.h"
+#include "libspu/compiler/utils/utils.h"
 #include "libspu/core/prelude.h"
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace spu::compiler {
 
-namespace {
-
-mlir::LogicalResult argparser_error_handler(const llvm::Twine &msg) {
-  SPDLOG_ERROR(msg.str());
-  return mlir::failure();
-}
-
-} // namespace
-
 FE::FE(CompilationContext *ctx) : ctx_(ctx) {
   ctx_->getMLIRContext()
       ->loadDialect<mlir::spu::pphlo::PPHloDialect, mlir::mhlo::MhloDialect,
@@ -133,8 +125,8 @@ void FE::buildFrontEndPipeline(mlir::PassManager *pm, const std::string &args) {
   {
     auto l = mlir::spu::pphlo::createLegalizeToPPHloPass();
     if (!args.empty()) {
-      SPU_ENFORCE(
-          l->initializeOptions(args, argparser_error_handler).succeeded());
+      SPU_ENFORCE(l->initializeOptions(args, mlir::spu::argparser_error_handler)
+                      .succeeded());
     }
     pm->addPass(std::move(l));
   }
diff --git a/libspu/compiler/front_end/hlo_importer.h b/libspu/compiler/front_end/hlo_importer.h
index 7c7725b6..8e57b7f0 100644
--- a/libspu/compiler/front_end/hlo_importer.h
+++ b/libspu/compiler/front_end/hlo_importer.h
@@ -28,7 +28,7 @@ class CompilationContext;
 
 class HloImporter final {
 public:
-  explicit HloImporter(CompilationContext *context) : context_(context){};
+  explicit HloImporter(CompilationContext *context) : context_(context) {};
 
   /// Load a xla module and returns a mlir-hlo module
   mlir::OwningOpRef<mlir::ModuleOp>
diff --git a/libspu/compiler/passes/BUILD.bazel b/libspu/compiler/passes/BUILD.bazel
deleted file mode 100644
index 730f6f49..00000000
--- a/libspu/compiler/passes/BUILD.bazel
+++ /dev/null
@@ -1,305 +0,0 @@
-# Copyright 2021 Ant Group Co., Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library")
-load("//bazel:spu.bzl", "spu_cc_library")
-
-package(
-    default_visibility = ["//visibility:public"],
-    licenses = ["notice"],
-)
-
-gentbl_cc_library(
-    name = "pphlo_pass_inc_gen",
-    tbl_outs = [
-        (
-            ["-gen-pass-decls"],
-            "passes.h.inc",
-        ),
-    ],
-    tblgen = "@llvm-project//mlir:mlir-tblgen",
-    td_file = "passes.td",
-    deps = [
-        "@llvm-project//mlir:PassBaseTdFiles",
-    ],
-)
-
-spu_cc_library(
-    name = "pass_details",
-    hdrs = [
-        "pass_details.h",
-    ],
-    visibility = [
-        "//visibility:private",  # This target is a private detail of pass implementations
-    ],
-    deps = [
-        ":pphlo_pass_inc_gen",
-        "@llvm-project//mlir:Pass",
-    ],
-)
-
-spu_cc_library(
-    name = "map_stablehlo_to_pphlo_op",
-    hdrs = ["map_stablehlo_to_pphlo_op.h"],
-    visibility = [
-        "//visibility:private",  # This target is a private detail of ops map
-    ],
-    deps = [
-        "//libspu/dialect/pphlo:dialect",
-        "@stablehlo//:stablehlo_ops",
-    ],
-)
-
-spu_cc_library(
-    name = "value_visibility_map",
-    srcs = ["value_visibility_map.cc"],
-    hdrs = ["value_visibility_map.h"],
-    deps = [
-        "//libspu/core:prelude",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-    ],
-)
-
-spu_cc_library(
-    name = "visibility_inference",
-    srcs = ["visibility_inference.cc"],
-    hdrs = ["visibility_inference.h"],
-    deps = [
-        ":value_visibility_map",
-        "//libspu/core:prelude",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@stablehlo//:stablehlo_ops",
-    ],
-)
-
-spu_cc_library(
-    name = "expand_secret_gather",
-    srcs = ["expand_secret_gather.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "hlo_legalize_to_pphlo",
-    srcs = ["hlo_legalize_to_pphlo.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":map_stablehlo_to_pphlo_op",
-        ":pass_details",
-        ":visibility_inference",
-        "//libspu/compiler/common:compilation_context",
-        "//libspu/core:prelude",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@stablehlo//:stablehlo_ops",
-    ],
-)
-
-spu_cc_library(
-    name = "decompose_comparison",
-    srcs = ["decompose_comparison.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "lower_conversion_cast",
-    srcs = ["lower_conversion_cast.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "decompose_minmax",
-    srcs = ["decompose_minmax.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "reduce_truncation",
-    srcs = ["reduce_truncation.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "lower_mixed_type_op",
-    srcs = ["lower_mixed_type_op.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "optimize_maxpool",
-    srcs = ["optimize_maxpool.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "optimize_select",
-    srcs = ["optimize_select.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "optimize_sqrt_plus_eps",
-    srcs = ["optimize_sqrt_plus_eps.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "rewrite_div_sqrt_patterns",
-    srcs = ["rewrite_div_sqrt_patterns.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "optimize_denominator_with_broadcast",
-    srcs = ["optimize_denominator_with_broadcast.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "insert_deallocation",
-    srcs = ["insert_deallocation.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "sort_lowering",
-    srcs = ["sort_lowering.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "convert_push_down",
-    srcs = ["convert_push_down.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "partial_sort_to_topk",
-    srcs = ["partial_sort_to_topk.cc"],
-    hdrs = ["passes.h"],
-    deps = [
-        ":pass_details",
-        "//libspu/dialect/pphlo:dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-)
-
-spu_cc_library(
-    name = "all_passes",
-    hdrs = ["register_passes.h"],
-    deps = [
-        ":convert_push_down",
-        ":decompose_comparison",
-        ":decompose_minmax",
-        ":expand_secret_gather",
-        ":hlo_legalize_to_pphlo",
-        ":insert_deallocation",
-        ":lower_conversion_cast",
-        ":lower_mixed_type_op",
-        ":optimize_denominator_with_broadcast",
-        ":optimize_maxpool",
-        ":optimize_select",
-        ":optimize_sqrt_plus_eps",
-        ":partial_sort_to_topk",
-        ":reduce_truncation",
-        ":rewrite_div_sqrt_patterns",
-        ":sort_lowering",
-    ],
-)
diff --git a/libspu/compiler/passes/decompose_comparison.cc b/libspu/compiler/passes/decompose_comparison.cc
deleted file mode 100644
index d7839dad..00000000
--- a/libspu/compiler/passes/decompose_comparison.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2021 Ant Group Co., Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
-
-namespace mlir::spu::pphlo {
-
-namespace {
-
-// != -> 1 - equal
-// >= -> 1 - less
-// <= -> 1 - greater
-
-template <typename CompTy, typename LowerTy>
-struct CompareOpConverter : public OpRewritePattern<CompTy> {
-  explicit CompareOpConverter(MLIRContext *context)
-      : OpRewritePattern<CompTy>(context) {}
-
-  LogicalResult matchAndRewrite(CompTy op,
-                                PatternRewriter &rewriter) const override {
-    OpBuilder builder(op);
-
-    auto eq_op = builder.create<LowerTy>(op.getLoc(), op.getOperands());
-
-    rewriter.replaceOpWithNewOp<NotOp>(op, eq_op);
-
-    return success();
-  }
-};
-
-struct DecomposeComparison
-    : public DecomposeComparisonBase<DecomposeComparison> {
-  void runOnOperation() override {
-    RewritePatternSet patterns(&getContext());
-    populateOwningPatterns(&patterns, &getContext());
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-  }
-
-private:
-  static void populateOwningPatterns(RewritePatternSet *patterns,
-                                     MLIRContext *ctx) {
-    patterns->insert<CompareOpConverter<NotEqualOp, EqualOp>,
-                     CompareOpConverter<GreaterEqualOp, LessOp>,
-                     CompareOpConverter<LessEqualOp, GreaterOp>>(ctx);
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<func::FuncOp>> createDecomposeComparisonPass() {
-  return std::make_unique<DecomposeComparison>();
-}
-
-} // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/decompose_minmax.cc b/libspu/compiler/passes/decompose_minmax.cc
deleted file mode 100644
index a588edc1..00000000
--- a/libspu/compiler/passes/decompose_minmax.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2022 Ant Group Co., Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
-
-namespace mlir::spu::pphlo {
-
-namespace {
-
-// MaxOp -> select(greater(x, y), x, y)
-// MinOp -> select(less(x,y), x, y)
-template <typename InOp, typename RetOp>
-struct MinMaxOpConverter : public OpRewritePattern<InOp> {
-private:
-  TypeTools typetools_;
-
-public:
-  explicit MinMaxOpConverter(MLIRContext *context)
-      : OpRewritePattern<InOp>(context), typetools_(context) {}
-
-  LogicalResult matchAndRewrite(InOp op,
-                                PatternRewriter &rewriter) const override {
-    OpBuilder builder(op);
-
-    auto gt = builder.create<RetOp>(op->getLoc(), op.getOperands());
-
-    rewriter.replaceOpWithNewOp<SelectOp>(op, op.getType(), gt.getResult(),
-                                          op.getOperand(0), op.getOperand(1));
-
-    return success();
-  }
-};
-
-struct DecomposeMinMax : public DecomposeMinMaxBase<DecomposeMinMax> {
-  void runOnOperation() override {
-    RewritePatternSet patterns(&getContext());
-    populateOwningPatterns(&patterns, &getContext());
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-  }
-
-private:
-  static void populateOwningPatterns(RewritePatternSet *patterns,
-                                     MLIRContext *ctx) {
-    patterns->insert<MinMaxOpConverter<MaxOp, GreaterOp>,
-                     MinMaxOpConverter<MinOp, LessOp>>(ctx);
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<func::FuncOp>> createDecomposeMinMaxPass() {
-  return std::make_unique<DecomposeMinMax>();
-}
-
-} // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/tests/BUILD.bazel b/libspu/compiler/tests/BUILD.bazel
index d49b7bbc..2dd06127 100644
--- a/libspu/compiler/tests/BUILD.bazel
+++ b/libspu/compiler/tests/BUILD.bazel
@@ -38,7 +38,8 @@ expand_template(
         data = [
             "lit.cfg.py",
             "lit.site.cfg.py",
-            "//libspu/compiler/tools:pphlo-opt",
+            "//libspu/compiler/tools:spu-opt",
+            "//libspu/compiler/tools:spu-translate",
             "@llvm-project//llvm:FileCheck",
             "@llvm-project//llvm:not",
         ] + glob(["%s.bc" % src]),
diff --git a/libspu/compiler/tests/hlo2pphlo/gather_s.mlir b/libspu/compiler/tests/hlo2pphlo/gather_s.mlir
deleted file mode 100644
index c68cacd2..00000000
--- a/libspu/compiler/tests/hlo2pphlo/gather_s.mlir
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
-
-func.func @main(%arg0 : tensor<3x4x2xi64>, %arg1 : tensor<2x3x2xi64>) -> tensor<2x3x2x2xi64> {
-  // CHECK: pphlo.custom_call @pphlo.gather(%arg0, %arg1) {pphlo.attributes = {collapsed_slice_dims = array<i64: 0>, index_vector_dim = 2 : i64, offset_dims = array<i64: 2, 3>, slice_sizes = array<i64: 1, 2, 2>, start_index_map = array<i64: 1, 0>}} : (tensor<3x4x2x!pphlo.secret<i64>>, tensor<2x3x2x!pphlo.secret<i64>>) -> tensor<2x3x2x2x!pphlo.secret<i64>>
-  %result = "stablehlo.gather"(%arg0, %arg1) {
-    dimension_numbers = #stablehlo.gather<
-      offset_dims = [2, 3],
-      collapsed_slice_dims = [0],
-      start_index_map = [1, 0],
-      index_vector_dim = 2>,
-    slice_sizes = array<i64: 1, 2, 2>,
-    indices_are_sorted = false
-  } : (tensor<3x4x2xi64>, tensor<2x3x2xi64>) -> tensor<2x3x2x2xi64>
-  return %result : tensor<2x3x2x2xi64>
-}
diff --git a/libspu/compiler/tests/interpret/abs.mlir b/libspu/compiler/tests/interpret/abs.mlir
new file mode 100644
index 00000000..584fc87a
--- /dev/null
+++ b/libspu/compiler/tests/interpret/abs.mlir
@@ -0,0 +1,44 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @abs_op_test_i64_i64_p() {
+   %0 = pphlo.constant dense<[-2, 0, 2]> : tensor<3xi64>
+   %1 = pphlo.abs %0 : (tensor<3xi64>)->tensor<3xi64>
+   %2 = pphlo.constant dense<[ 2, 0, 2]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @abs_op_test_i64_i64_s() {
+   %0 = pphlo.constant dense<[-2, 0, 2]> : tensor<3xi64>
+   %1 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %2 = pphlo.abs %1 : (tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.constant dense<[ 2, 0, 2]> : tensor<3xi64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @abs_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[23.1, -23.1, 0.0]> : tensor<3xf64>
+   %1 = pphlo.abs %0 : (tensor<3xf64>)->tensor<3xf64>
+   %2 = pphlo.constant dense<[23.1,  23.1, 0.0]> : tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @abs_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[23.1, -23.1, 0.0]> : tensor<3xf64>
+   %1 = pphlo.convert %0 : (tensor<3xf64>)->tensor<3x!pphlo.secret<f64>>
+   %2 = pphlo.abs %1 : (tensor<3x!pphlo.secret<f64>>)->tensor<3x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[23.1,  23.1, 0.0]> : tensor<3xf64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f64>>)->tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/add.mlir b/libspu/compiler/tests/interpret/add.mlir
new file mode 100644
index 00000000..9244b346
--- /dev/null
+++ b/libspu/compiler/tests/interpret/add.mlir
@@ -0,0 +1,350 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @add_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.add %0,%1 : (tensor<5xi8>,tensor<5xi8>)->tensor<5xi8>
+   %3 = pphlo.constant dense<[-128, 0, 16, -18, 127]> : tensor<5xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.convert %0 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<i8>>,tensor<5x!pphlo.secret<i8>>)->tensor<5x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[-128, 0, 16, -18, 127]> : tensor<5xi8>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i8>>)->tensor<5xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.add %0,%1 : (tensor<2xui8>,tensor<2xui8>)->tensor<2xui8>
+   %3 = pphlo.constant dense<[255, 32]> : tensor<2xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.convert %0 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %4 = pphlo.add %2, %3 : (tensor<2x!pphlo.secret<ui8>>,tensor<2x!pphlo.secret<ui8>>)->tensor<2x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[255, 32]> : tensor<2xui8>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui8>>)->tensor<2xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.add %0,%1 : (tensor<5xi16>,tensor<5xi16>)->tensor<5xi16>
+   %3 = pphlo.constant dense<[-32768, 0, 256, -258, 32767]> : tensor<5xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.convert %0 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<i16>>,tensor<5x!pphlo.secret<i16>>)->tensor<5x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[-32768, 0, 256, -258, 32767]> : tensor<5xi16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i16>>)->tensor<5xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.add %0,%1 : (tensor<2xui16>,tensor<2xui16>)->tensor<2xui16>
+   %3 = pphlo.constant dense<[65535, 512]> : tensor<2xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.convert %0 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %4 = pphlo.add %2, %3 : (tensor<2x!pphlo.secret<ui16>>,tensor<2x!pphlo.secret<ui16>>)->tensor<2x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[65535, 512]> : tensor<2xui16>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui16>>)->tensor<2xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.add %0,%1 : (tensor<5xi32>,tensor<5xi32>)->tensor<5xi32>
+   %3 = pphlo.constant dense<[-2147483648, 0, 65536, -65538, 2147483647]> : tensor<5xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.convert %0 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<i32>>,tensor<5x!pphlo.secret<i32>>)->tensor<5x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[-2147483648, 0, 65536, -65538, 2147483647]> : tensor<5xi32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i32>>)->tensor<5xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.add %0,%1 : (tensor<2xui32>,tensor<2xui32>)->tensor<2xui32>
+   %3 = pphlo.constant dense<[4294967295, 131072]> : tensor<2xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.convert %0 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %4 = pphlo.add %2, %3 : (tensor<2x!pphlo.secret<ui32>>,tensor<2x!pphlo.secret<ui32>>)->tensor<2x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[4294967295, 131072]> : tensor<2xui32>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui32>>)->tensor<2xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.add %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi64>
+   %3 = pphlo.constant dense<[-9223372036854775808, 0, 4294967296, -4294967298, 9223372036854775807]> : tensor<5xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[-9223372036854775808, 0, 4294967296, -4294967298, 9223372036854775807]> : tensor<5xi64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i64>>)->tensor<5xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[18446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.add %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xui64>
+   %3 = pphlo.constant dense<[18446744073709551615, 8589934592]> : tensor<2xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[18446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.add %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[18446744073709551615, 8589934592]> : tensor<2xui64>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui64>>)->tensor<2xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.add %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, true, true, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.add %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, true, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_f16_f16_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.141]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75, 0.3, 3.141]> : tensor<5xf16>
+   %2 = pphlo.add %0,%1 : (tensor<5xf16>,tensor<5xf16>)->tensor<5xf16>
+   %3 = pphlo.constant dense<[0.000000e+00, 8.000000e+00, 8.750000e-01, 3.999020e-01, 6.281250e+00]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_f16_f16_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.141]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75, 0.3, 3.141]> : tensor<5xf16>
+   %2 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.convert %1 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<f16>>,tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %5 = pphlo.constant dense<[0.000000e+00, 8.000000e+00, 8.750000e-01, 3.999020e-01, 6.281250e+00]> : tensor<5xf16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_f32_f32_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75,  0.3, 3.14159265]> : tensor<5xf32>
+   %2 = pphlo.add %0,%1 : (tensor<5xf32>,tensor<5xf32>)->tensor<5xf32>
+   %3 = pphlo.constant dense<[0.000000e+00, 8.000000e+00, 8.750000e-01, 4.000000e-01, 6.28318548]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_f32_f32_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75,  0.3, 3.14159265]> : tensor<5xf32>
+   %2 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.convert %1 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<f32>>,tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %5 = pphlo.constant dense<[0.000000e+00, 8.000000e+00, 8.750000e-01, 4.000000e-01, 6.28318548]> : tensor<5xf32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75, 0.3, 3.14159265358979323846]> : tensor<5xf64>
+   %2 = pphlo.add %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xf64>
+   %3 = pphlo.constant dense<[0.000000e+00, 8.000000e+00, 8.750000e-01, 4.000000e-01, 6.2831853071795862]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75, 0.3, 3.14159265358979323846]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.add %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[0.000000e+00, 8.000000e+00, 8.750000e-01, 4.000000e-01, 6.2831853071795862]> : tensor<5xf64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<2> : tensor<i8>
+   %1 = pphlo.constant dense<3> : tensor<i8>
+   %2 = pphlo.add %0,%1 : (tensor<i8>,tensor<i8>)->tensor<i8>
+   %3 = pphlo.constant dense<5> : tensor<i8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<i8>, tensor<i8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<2> : tensor<i8>
+   %1 = pphlo.constant dense<3> : tensor<i8>
+   %2 = pphlo.convert %0 : (tensor<i8>)->tensor<!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<i8>)->tensor<!pphlo.secret<i8>>
+   %4 = pphlo.add %2, %3 : (tensor<!pphlo.secret<i8>>,tensor<!pphlo.secret<i8>>)->tensor<!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<5> : tensor<i8>
+   %6 = pphlo.convert %4 : (tensor<!pphlo.secret<i8>>)->tensor<i8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<i8>, tensor<i8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<2> : tensor<2x0x3xi8>
+   %1 = pphlo.constant dense<3> : tensor<2x0x3xi8>
+   %2 = pphlo.add %0,%1 : (tensor<2x0x3xi8>,tensor<2x0x3xi8>)->tensor<2x0x3xi8>
+   %3 = pphlo.constant dense<> : tensor<2x0x3xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2x0x3xi8>, tensor<2x0x3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @add_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<2> : tensor<2x0x3xi8>
+   %1 = pphlo.constant dense<3> : tensor<2x0x3xi8>
+   %2 = pphlo.convert %0 : (tensor<2x0x3xi8>)->tensor<2x0x3x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<2x0x3xi8>)->tensor<2x0x3x!pphlo.secret<i8>>
+   %4 = pphlo.add %2, %3 : (tensor<2x0x3x!pphlo.secret<i8>>,tensor<2x0x3x!pphlo.secret<i8>>)->tensor<2x0x3x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<> : tensor<2x0x3xi8>
+   %6 = pphlo.convert %4 : (tensor<2x0x3x!pphlo.secret<i8>>)->tensor<2x0x3xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2x0x3xi8>, tensor<2x0x3xi8>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/and.mlir b/libspu/compiler/tests/interpret/and.mlir
new file mode 100644
index 00000000..c06920e1
--- /dev/null
+++ b/libspu/compiler/tests/interpret/and.mlir
@@ -0,0 +1,275 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @and_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[127, -128, -128]> : tensor<3xi8>
+   %1 = pphlo.constant dense<[0, 127, -128]> : tensor<3xi8>
+   %2 = pphlo.and %0,%1 : (tensor<3xi8>,tensor<3xi8>)->tensor<3xi8>
+   %3 = pphlo.constant dense<[0, 0, -128]> : tensor<3xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[127, -128, -128]> : tensor<3xi8>
+   %1 = pphlo.constant dense<[0, 127, -128]> : tensor<3xi8>
+   %2 = pphlo.convert %0 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<i8>>,tensor<3x!pphlo.secret<i8>>)->tensor<3x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[0, 0, -128]> : tensor<3xi8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i8>>)->tensor<3xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.constant dense<255> : tensor<3xui8>
+   %2 = pphlo.and %0,%1 : (tensor<3xui8>,tensor<3xui8>)->tensor<3xui8>
+   %3 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.constant dense<255> : tensor<3xui8>
+   %2 = pphlo.convert %0 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<ui8>>,tensor<3x!pphlo.secret<ui8>>)->tensor<3x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[32767, -32768, -32768]> : tensor<3xi16>
+   %1 = pphlo.constant dense<[0, 32767, -32768]> : tensor<3xi16>
+   %2 = pphlo.and %0,%1 : (tensor<3xi16>,tensor<3xi16>)->tensor<3xi16>
+   %3 = pphlo.constant dense<[0, 0, -32768]> : tensor<3xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[32767, -32768, -32768]> : tensor<3xi16>
+   %1 = pphlo.constant dense<[0, 32767, -32768]> : tensor<3xi16>
+   %2 = pphlo.convert %0 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<i16>>,tensor<3x!pphlo.secret<i16>>)->tensor<3x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[0, 0, -32768]> : tensor<3xi16>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i16>>)->tensor<3xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.constant dense<65535> : tensor<3xui16>
+   %2 = pphlo.and %0,%1 : (tensor<3xui16>,tensor<3xui16>)->tensor<3xui16>
+   %3 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.constant dense<65535> : tensor<3xui16>
+   %2 = pphlo.convert %0 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<ui16>>,tensor<3x!pphlo.secret<ui16>>)->tensor<3x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, -2147483648]> : tensor<3xi32>
+   %1 = pphlo.constant dense<[0, 2147483647, -2147483648]> : tensor<3xi32>
+   %2 = pphlo.and %0,%1 : (tensor<3xi32>,tensor<3xi32>)->tensor<3xi32>
+   %3 = pphlo.constant dense<[0, 0, -2147483648]> : tensor<3xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, -2147483648]> : tensor<3xi32>
+   %1 = pphlo.constant dense<[0, 2147483647, -2147483648]> : tensor<3xi32>
+   %2 = pphlo.convert %0 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<i32>>,tensor<3x!pphlo.secret<i32>>)->tensor<3x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[0, 0, -2147483648]> : tensor<3xi32>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i32>>)->tensor<3xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.constant dense<4294967295> : tensor<3xui32>
+   %2 = pphlo.and %0,%1 : (tensor<3xui32>,tensor<3xui32>)->tensor<3xui32>
+   %3 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.constant dense<4294967295> : tensor<3xui32>
+   %2 = pphlo.convert %0 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<ui32>>,tensor<3x!pphlo.secret<ui32>>)->tensor<3x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, -9223372036854775808]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[0, 9223372036854775807, -9223372036854775808]> : tensor<3xi64>
+   %2 = pphlo.and %0,%1 : (tensor<3xi64>,tensor<3xi64>)->tensor<3xi64>
+   %3 = pphlo.constant dense<[0, 0, -9223372036854775808]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, -9223372036854775808]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[0, 9223372036854775807, -9223372036854775808]> : tensor<3xi64>
+   %2 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<i64>>,tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[0, 0, -9223372036854775808]> : tensor<3xi64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.constant dense<18446744073709551615> : tensor<3xui64>
+   %2 = pphlo.and %0,%1 : (tensor<3xui64>,tensor<3xui64>)->tensor<3xui64>
+   %3 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.constant dense<18446744073709551615> : tensor<3xui64>
+   %2 = pphlo.convert %0 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %4 = pphlo.and %2, %3 : (tensor<3x!pphlo.secret<ui64>>,tensor<3x!pphlo.secret<ui64>>)->tensor<3x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.and %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, false, false, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.and %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false, false, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.and %0,%1 : (tensor<2xi1>,tensor<2xi1>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, false]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %4 = pphlo.and %2, %3 : (tensor<2x!pphlo.secret<i1>>,tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<true> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.and %0,%1 : (tensor<2xi1>,tensor<2xi1>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @and_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<true> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %4 = pphlo.and %2, %3 : (tensor<2x!pphlo.secret<i1>>,tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/atan2.mlir b/libspu/compiler/tests/interpret/atan2.mlir
new file mode 100644
index 00000000..73acce5f
--- /dev/null
+++ b/libspu/compiler/tests/interpret/atan2.mlir
@@ -0,0 +1,25 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @atan2_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, -1.0]> : tensor<3xf64>
+   %1 = pphlo.constant dense<[0.0, 0.0, 0.0]> : tensor<3xf64>
+   %2 = pphlo.atan2 %0,%1 : (tensor<3xf64>,tensor<3xf64>)->tensor<3xf64>
+   %3 = pphlo.constant dense<[0.0, 1.5707963267948966, -1.5707963267948966]> : tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @atan2_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, -1.0]> : tensor<3xf64>
+   %1 = pphlo.constant dense<[0.0, 0.0, 0.0]> : tensor<3xf64>
+   %2 = pphlo.convert %0 : (tensor<3xf64>)->tensor<3x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<3xf64>)->tensor<3x!pphlo.secret<f64>>
+   %4 = pphlo.atan2 %2, %3 : (tensor<3x!pphlo.secret<f64>>,tensor<3x!pphlo.secret<f64>>)->tensor<3x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[0.0, 1.5707963267948966, -1.5707963267948966]> : tensor<3xf64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<f64>>)->tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/broadcast.mlir b/libspu/compiler/tests/interpret/broadcast.mlir
new file mode 100644
index 00000000..34a1d934
--- /dev/null
+++ b/libspu/compiler/tests/interpret/broadcast.mlir
@@ -0,0 +1,9 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @broadcast_in_dim() {
+  %operand = pphlo.constant dense<[[1], [2], [3]]> : tensor<3x1xi64>
+  %result = pphlo.broadcast %operand, dims = [0, 2] : (tensor<3x1xi64>) -> tensor<3x2x2xi64>
+  %expected = pphlo.constant dense<[[[1, 1], [1, 1]], [[2, 2], [2, 2]], [[3, 3], [3, 3]]]> : tensor<3x2x2xi64>
+  pphlo.custom_call @expect_eq(%result, %expected) : (tensor<3x2x2xi64>,tensor<3x2x2xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/case.mlir b/libspu/compiler/tests/interpret/case.mlir
new file mode 100644
index 00000000..9beae82b
--- /dev/null
+++ b/libspu/compiler/tests/interpret/case.mlir
@@ -0,0 +1,70 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @case_negative_index_default() {
+  %index = pphlo.constant dense<-1> : tensor<i32>
+  %result_branch0 = pphlo.constant dense<0> : tensor<2xi64>
+  %result_branch1 = pphlo.constant dense<1> : tensor<2xi64>
+  %result0, %result1 = "pphlo.case"(%index) ({
+    pphlo.return %result_branch0, %result_branch0 : tensor<2xi64>, tensor<2xi64>
+  }, {
+    pphlo.return %result_branch1, %result_branch1 : tensor<2xi64>, tensor<2xi64>
+  }) : (tensor<i32>) -> (tensor<2xi64>, tensor<2xi64>)
+  %expected = pphlo.constant dense<[1, 1]> : tensor<2xi64>
+  pphlo.custom_call @expect_eq(%result0, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  pphlo.custom_call @expect_eq(%result1, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @case_in_bound_index() {
+  %index = pphlo.constant dense<0> : tensor<i32>
+  %result_branch0 = pphlo.constant dense<0> : tensor<2xi64>
+  %result_branch1 = pphlo.constant dense<1> : tensor<2xi64>
+  %result0, %result1 = "pphlo.case"(%index) ({
+    pphlo.return %result_branch0, %result_branch0 : tensor<2xi64>, tensor<2xi64>
+  }, {
+    pphlo.return %result_branch1, %result_branch1 : tensor<2xi64>, tensor<2xi64>
+  }) : (tensor<i32>) -> (tensor<2xi64>, tensor<2xi64>)
+  %expected = pphlo.constant dense<[0, 0]> : tensor<2xi64>
+  pphlo.custom_call @expect_eq(%result0, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  pphlo.custom_call @expect_eq(%result1, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @case_out_of_bound_index_default() {
+  %index = pphlo.constant dense<2> : tensor<i32>
+  %result_branch0 = pphlo.constant dense<0> : tensor<2xi64>
+  %result_branch1 = pphlo.constant dense<1> : tensor<2xi64>
+  %result0, %result1 = "pphlo.case"(%index) ({
+    pphlo.return %result_branch0, %result_branch0 : tensor<2xi64>, tensor<2xi64>
+  }, {
+    pphlo.return %result_branch1, %result_branch1 : tensor<2xi64>, tensor<2xi64>
+  }) : (tensor<i32>) -> (tensor<2xi64>, tensor<2xi64>)
+  %expected = pphlo.constant dense<[1, 1]> : tensor<2xi64>
+  pphlo.custom_call @expect_eq(%result0, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  pphlo.custom_call @expect_eq(%result1, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @case_out_of_bound_index_default() {
+  %0 = pphlo.constant dense<2> : tensor<i32>
+  %index = pphlo.convert %0 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
+  %1 = pphlo.constant dense<0> : tensor<2xi64>
+  %2 = pphlo.constant dense<1> : tensor<2xi64>
+  %result_branch0 = pphlo.convert %1 : (tensor<2xi64>) -> tensor<2x!pphlo.secret<i64>>
+  %result_branch1 = pphlo.convert %2 : (tensor<2xi64>) -> tensor<2x!pphlo.secret<i64>>
+  %result0, %result1 = "pphlo.case"(%index) ({
+    pphlo.return %result_branch0, %result_branch0 : tensor<2x!pphlo.secret<i64>>, tensor<2x!pphlo.secret<i64>>
+  }, {
+    pphlo.return %result_branch1, %result_branch1 : tensor<2x!pphlo.secret<i64>>, tensor<2x!pphlo.secret<i64>>
+  }) : (tensor<!pphlo.secret<i32>>) -> (tensor<2x!pphlo.secret<i64>>, tensor<2x!pphlo.secret<i64>>)
+  %expected = pphlo.constant dense<[1, 1]> : tensor<2xi64>
+  pphlo.custom_call @expect_eq(%result0, %expected) : (tensor<2x!pphlo.secret<i64>>,tensor<2xi64>)->()
+  pphlo.custom_call @expect_eq(%result1, %expected) : (tensor<2x!pphlo.secret<i64>>,tensor<2xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/ceil.mlir b/libspu/compiler/tests/interpret/ceil.mlir
new file mode 100644
index 00000000..5a7d2e35
--- /dev/null
+++ b/libspu/compiler/tests/interpret/ceil.mlir
@@ -0,0 +1,66 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @ceil_op_test_f16_f16_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf16>
+   %1 = pphlo.ceil %0 : (tensor<3xf16>)->tensor<3xf16>
+   %2 = pphlo.constant dense<[-2.000000e+00, 0.000000e+00, 3.000000e+00]> : tensor<3xf16>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf16>, tensor<3xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @ceil_op_test_f16_f16_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf16>
+   %1 = pphlo.convert %0 : (tensor<3xf16>)->tensor<3x!pphlo.secret<f16>>
+   %2 = pphlo.ceil %1 : (tensor<3x!pphlo.secret<f16>>)->tensor<3x!pphlo.secret<f16>>
+   %3 = pphlo.constant dense<[-2.000000e+00, 0.000000e+00, 3.000000e+00]> : tensor<3xf16>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f16>>)->tensor<3xf16>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf16>, tensor<3xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @ceil_op_test_f32_f32_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf32>
+   %1 = pphlo.ceil %0 : (tensor<3xf32>)->tensor<3xf32>
+   %2 = pphlo.constant dense<[-2.000000e+00, 0.000000e+00, 3.000000e+00]> : tensor<3xf32>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf32>, tensor<3xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @ceil_op_test_f32_f32_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf32>
+   %1 = pphlo.convert %0 : (tensor<3xf32>)->tensor<3x!pphlo.secret<f32>>
+   %2 = pphlo.ceil %1 : (tensor<3x!pphlo.secret<f32>>)->tensor<3x!pphlo.secret<f32>>
+   %3 = pphlo.constant dense<[-2.000000e+00, 0.000000e+00, 3.000000e+00]> : tensor<3xf32>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f32>>)->tensor<3xf32>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf32>, tensor<3xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @ceil_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf64>
+   %1 = pphlo.ceil %0 : (tensor<3xf64>)->tensor<3xf64>
+   %2 = pphlo.constant dense<[-2.000000e+00, 0.000000e+00, 3.000000e+00]> : tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @ceil_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf64>
+   %1 = pphlo.convert %0 : (tensor<3xf64>)->tensor<3x!pphlo.secret<f64>>
+   %2 = pphlo.ceil %1 : (tensor<3x!pphlo.secret<f64>>)->tensor<3x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[-2.000000e+00, 0.000000e+00, 3.000000e+00]> : tensor<3xf64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f64>>)->tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/clamp.mlir b/libspu/compiler/tests/interpret/clamp.mlir
new file mode 100644
index 00000000..cf193911
--- /dev/null
+++ b/libspu/compiler/tests/interpret/clamp.mlir
@@ -0,0 +1,11 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @clamp_op_test_si64() {
+  %min = pphlo.constant dense<[1, 5, -5]> : tensor<3xi64>
+  %operand = pphlo.constant dense<[2, 3, -1]> : tensor<3xi64>
+  %max = pphlo.constant dense<[3, 7, -3]> : tensor<3xi64>
+  %result = pphlo.clamp %min, %operand, %max : (tensor<3xi64>, tensor<3xi64>, tensor<3xi64>) -> tensor<3xi64>
+  %expected = pphlo.constant dense<[2, 5, -3]> : tensor<3xi64>
+  pphlo.custom_call @expect_eq(%result, %expected) : (tensor<3xi64>,tensor<3xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/concatenate.mlir b/libspu/compiler/tests/interpret/concatenate.mlir
new file mode 100644
index 00000000..e16c5e1f
--- /dev/null
+++ b/libspu/compiler/tests/interpret/concatenate.mlir
@@ -0,0 +1,10 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @concatenate() {
+  %input0 = pphlo.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi64>
+  %input1 = pphlo.constant dense<[[7, 8]]> : tensor<1x2xi64>
+  %result = pphlo.concatenate %input0, %input1 dim = 0 : (tensor<3x2xi64>, tensor<1x2xi64>) -> tensor<4x2xi64>
+  %expected = pphlo.constant dense<[[1, 2], [3, 4] , [5, 6], [7, 8]]> : tensor<4x2xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<4x2xi64>,tensor<4x2xi64>)->()
+  func.return
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/convert.mlir b/libspu/compiler/tests/interpret/convert.mlir
new file mode 100644
index 00000000..64c6fa15
--- /dev/null
+++ b/libspu/compiler/tests/interpret/convert.mlir
@@ -0,0 +1,23 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @convert_op_test_1() {
+  %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi32>
+  %1 = pphlo.convert %0 : (tensor<5xi32>) -> tensor<5xf32>
+  %2 = pphlo.convert %1 : (tensor<5xf32>) -> tensor<5xi32>
+  %expected = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi32>
+  pphlo.custom_call @expect_eq (%2, %expected) : (tensor<5xi32>, tensor<5xi32>) -> ()
+  func.return
+}
+
+// -----
+
+func.func @convert_op_test_3() {
+  %0 = pphlo.constant() {value = dense<[0.0, 1.0, 8.0, -9.0, 10.0]> : tensor<5xf32>} : () -> tensor<5xf32>
+  %1 = pphlo.convert %0 : (tensor<5xf32>) -> tensor<5x!pphlo.secret<f32>>
+  %2 = pphlo.convert %1 : (tensor<5x!pphlo.secret<f32>>) -> tensor<5xf32>
+  %3 = pphlo.convert %2 : (tensor<5xf32>) -> tensor<5xi32>
+  %expected = pphlo.constant() {value = dense<[0, 1, 8, -9, 10]> : tensor<5xi32>} : () -> tensor<5xi32>
+  pphlo.custom_call @expect_almost_eq (%2, %0) : (tensor<5xf32>, tensor<5xf32>) -> ()
+  pphlo.custom_call @expect_eq (%3, %expected) : (tensor<5xi32>, tensor<5xi32>) -> ()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/convolution.mlir b/libspu/compiler/tests/interpret/convolution.mlir
new file mode 100644
index 00000000..81a12c05
--- /dev/null
+++ b/libspu/compiler/tests/interpret/convolution.mlir
@@ -0,0 +1,108 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @main() {
+    %0 = pphlo.constant dense<[[[[ 1.0, 2.0, 3.0, 4.0],
+                                 [ 5.0, 6.0, 7.0, 8.0],
+                                 [ 9.0,10.0,11.0,12.0],
+                                 [13.0,14.0,15.0,16.0]]]]> : tensor<1x1x4x4xf32>
+    %1 = pphlo.constant dense<[[[[5.0,6.0],
+                                 [7.0,8.0]]]]> : tensor<1x1x2x2xf32>
+    %2 = pphlo.constant dense<0.000000e+00> : tensor<f32>
+    %3 = pphlo.pad %0, %2, low = [0, 0, 0, 0], high = [0, 0, 1, 1], interior = [0, 0, 0, 0] : (tensor<1x1x4x4xf32>, tensor<f32>) -> tensor<1x1x5x5xf32>
+    %4 = pphlo.convolution(%3, %1)
+            dim_numbers = [b, f, 0, 1]x[o, i, 0, 1]->[b, f, 0, 1],
+            window = {stride = [1, 1]} : (tensor<1x1x5x5xf32>, tensor<1x1x2x2xf32>) -> tensor<1x1x4x4xf32>
+    %expected = pphlo.constant dense<[[[[100.0, 126.0, 152.0, 76.0 ],
+                                        [204.0, 230.0, 256.0, 124.0],
+                                        [308.0, 334.0, 360.0, 172.0],
+                                        [149.0, 160.0, 171.0, 80.0 ]]]]> : tensor<1x1x4x4xf32>
+    pphlo.custom_call @expect_almost_eq (%4, %expected) : (tensor<1x1x4x4xf32>, tensor<1x1x4x4xf32>) -> ()
+    return
+  }
+
+// -----
+
+func.func @main() {
+  %0 = pphlo.constant dense<[[[[ 1.0,  2.0,  3.0,  4.0]], [[ 5.0,  6.0,  7.0,  8.0]], [[ 9.0, 10.0, 11.0, 12.0]]],
+                             [[[13.0, 14.0, 15.0, 16.0]], [[17.0, 18.0, 19.0, 20.0]], [[21.0, 22.0, 23.0, 24.0]]]]> : tensor<2x3x1x4xf32>
+  %1 = pphlo.constant dense<[[[[1.0, 7.0, 13.0], [4.0, 10.0, 16.0]],
+                              [[2.0, 8.0, 14.0], [5.0, 11.0, 17.0]],
+                              [[3.0, 9.0, 15.0], [6.0, 12.0, 18.0]]]]> : tensor<1x3x2x3xf32>
+  %2 = pphlo.convolution(%0, %1)
+        dim_numbers = [f, 0, b, 1]x[o, 1, i, 0]->[f, 0, b, 1],
+        window = {stride = [1, 1]} : (tensor<2x3x1x4xf32>, tensor<1x3x2x3xf32>) -> tensor<1x1x1x2xf32>
+  %expected = pphlo.constant dense<[[[[2514.0, 2685.0]]]]> : tensor<1x1x1x2xf32>
+  pphlo.custom_call @expect_almost_eq (%2, %expected) : (tensor<1x1x1x2xf32>, tensor<1x1x1x2xf32>) -> ()
+  return
+}
+
+// -----
+
+func.func @main() {
+  %0 = pphlo.constant dense<[[[[ 1.0,  2.0,  3.0,  4.0],
+                               [ 5.0,  6.0,  7.0,  8.0],
+                               [ 9.0, 10.0, 11.0, 12.0],
+                               [13.0, 14.0, 15.0, 16.0]]]]>: tensor<1x1x4x4xf32>
+  %1 = pphlo.constant dense<[[[[5.0, 6.0],
+                               [7.0, 8.0]]]]>: tensor<1x1x2x2xf32>
+  %2 = pphlo.constant dense<0.000000e+00> : tensor<f32>
+  %3 = pphlo.pad %0, %2, low = [0, 0, 0, 0], high = [0, 0, 1, 1], interior = [0, 0, 1, 1] : (tensor<1x1x4x4xf32>, tensor<f32>) -> tensor<1x1x8x8xf32>
+  %4 = pphlo.convolution(%3, %1)
+              dim_numbers = [b, f, 0, 1]x[o, i, 0, 1]->[b, f, 0, 1],
+              window = {stride = [1, 1]} : (tensor<1x1x8x8xf32>, tensor<1x1x2x2xf32>) -> tensor<1x1x7x7xf32>
+  %expected = pphlo.constant dense<[[[[ 5.0,  12.0, 10.0,  18.0,  15.0,  24.0,  20.0],
+                                      [35.0,  48.0, 42.0,  56.0,  49.0,  64.0,  56.0],
+                                      [25.0,  36.0, 30.0,  42.0,  35.0,  48.0,  40.0],
+                                      [63.0,  80.0, 70.0,  88.0,  77.0,  96.0,  84.0],
+                                      [45.0,  60.0, 50.0,  66.0,  55.0,  72.0,  60.0],
+                                      [91.0, 112.0, 98.0, 120.0, 105.0, 128.0, 112.0],
+                                      [65.0,  84.0, 70.0,  90.0,  75.0,  96.0,  80.0]]]]> : tensor<1x1x7x7xf32>
+  pphlo.custom_call @expect_almost_eq (%4, %expected) : (tensor<1x1x7x7xf32>, tensor<1x1x7x7xf32>) -> ()
+  return
+}
+
+
+// -----
+
+func.func @main() {
+  %0 = pphlo.constant dense<[[[[ 1.0,  2.0,  3.0,  4.0],
+                                [ 5.0,  6.0,  7.0,  8.0],
+                                [ 9.0, 10.0, 11.0, 12.0],
+                                [13.0, 14.0, 15.0, 16.0]]]]>: tensor<1x1x4x4xf32>
+  %1 = pphlo.constant dense<[[[[5.0, 6.0],
+                                [7.0, 8.0]]]]>: tensor<1x1x2x2xf32>
+  %2 = pphlo.constant dense<0.000000e+00> : tensor<f32>
+  %3 = pphlo.pad %0, %2, low = [0, 0, 1, 1], high = [0, 0, 1, 1], interior = [0, 0, 1, 1] : (tensor<1x1x4x4xf32>, tensor<f32>) -> tensor<1x1x9x9xf32>
+  %4 = pphlo.convolution(%3, %1)
+        dim_numbers = [b, f, 0, 1]x[o, i, 0, 1]->[b, f, 0, 1],
+        window = {stride = [1, 1]} : (tensor<1x1x9x9xf32>, tensor<1x1x2x2xf32>) -> tensor<1x1x8x8xf32>
+  %expected = pphlo.constant dense<[[[[  8.0,  7.0,  16.0, 14.0,  24.0,  21.0,  32.0,  28.0],
+                                      [  6.0,  5.0,  12.0, 10.0,  18.0,  15.0,  24.0,  20.0],
+                                      [ 40.0, 35.0,  48.0, 42.0,  56.0,  49.0,  64.0,  56.0],
+                                      [ 30.0, 25.0,  36.0, 30.0,  42.0,  35.0,  48.0,  40.0],
+                                      [ 72.0, 63.0,  80.0, 70.0,  88.0,  77.0,  96.0,  84.0],
+                                      [ 54.0, 45.0,  60.0, 50.0,  66.0,  55.0,  72.0,  60.0],
+                                      [104.0, 91.0, 112.0, 98.0, 120.0, 105.0, 128.0, 112.0],
+                                      [ 78.0, 65.0,  84.0, 70.0,  90.0,  75.0,  96.0,  80.0]]]]> : tensor<1x1x8x8xf32>
+  pphlo.custom_call @expect_almost_eq (%4, %expected) : (tensor<1x1x8x8xf32>, tensor<1x1x8x8xf32>) -> ()
+  return
+}
+
+// -----
+
+func.func @main() {
+  %0 = pphlo.constant dense<[[[[ 0.0,  1.0,  2.0,  3.0,  4.0,  5.0],
+                               [ 6.0,  7.0,  8.0,  9.0, 10.0, 11.0],
+                               [12.0, 13.0, 14.0, 15.0, 16.0, 17.0],
+                               [18.0, 19.0, 20.0, 21.0, 22.0, 23.0]]]]> : tensor<1x1x4x6xf32>
+  %1 = pphlo.constant dense<[[[[1.0, 10.0, 100.0],
+                               [2.0, 20.0, 200.0]]]]> : tensor<1x1x2x3xf32>
+  %2 = pphlo.constant dense<0.000000e+00> : tensor<f32>
+  %3 = pphlo.pad %1, %2, low = [0, 0, 0, 0], high = [0, 0, 0, 0], interior = [0, 0, 1, 1] : (tensor<1x1x2x3xf32>, tensor<f32>) -> tensor<1x1x3x5xf32>
+  %4 = pphlo.convolution(%0, %3)
+        dim_numbers = [b, f, 0, 1]x[o, i, 0, 1]->[b, f, 0, 1],
+        window = {stride = [1, 1]} : (tensor<1x1x4x6xf32>, tensor<1x1x3x5xf32>) -> tensor<1x1x2x2xf32>
+  %expected = pphlo.constant dense<[[[[3924.0, 4257.0], [5922.0, 6255.0]]]]> : tensor<1x1x2x2xf32>
+  pphlo.custom_call @expect_almost_eq (%4, %expected) : (tensor<1x1x2x2xf32>, tensor<1x1x2x2xf32>) -> ()
+  return
+}
diff --git a/libspu/compiler/tests/interpret/cosine.mlir b/libspu/compiler/tests/interpret/cosine.mlir
new file mode 100644
index 00000000..2b89d421
--- /dev/null
+++ b/libspu/compiler/tests/interpret/cosine.mlir
@@ -0,0 +1,66 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @cosine_op_test_f16_f16_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.cosine %0 : (tensor<5xf16>)->tensor<5xf16>
+   %2 = pphlo.constant dense<[1.000000e+00, 0.540302277, 0.992197692, 0.995004177, -1.000000e+00]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @cosine_op_test_f16_f16_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %2 = pphlo.cosine %1 : (tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.constant dense<[1.000000e+00, 0.540302277, 0.992197692, 0.995004177, -1.000000e+00]> : tensor<5xf16>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @cosine_op_test_f32_f32_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.cosine %0 : (tensor<5xf32>)->tensor<5xf32>
+   %2 = pphlo.constant dense<[1.000000e+00, 0.540302277, 0.992197692, 0.995004177, -1.000000e+00]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @cosine_op_test_f32_f32_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %2 = pphlo.cosine %1 : (tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.constant dense<[1.000000e+00, 0.540302277, 0.992197692, 0.995004177, -1.000000e+00]> : tensor<5xf32>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @cosine_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.cosine %0 : (tensor<5xf64>)->tensor<5xf64>
+   %2 = pphlo.constant dense<[1.000000e+00, 0.54030230586813977, 0.992197667229329, 0.99500416527802582, -1.000000e+00]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @cosine_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %2 = pphlo.cosine %1 : (tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[1.000000e+00, 0.54030230586813977, 0.992197667229329, 0.99500416527802582, -1.000000e+00]> : tensor<5xf64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/divide.mlir b/libspu/compiler/tests/interpret/divide.mlir
new file mode 100644
index 00000000..6b5a95e1
--- /dev/null
+++ b/libspu/compiler/tests/interpret/divide.mlir
@@ -0,0 +1,75 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @divide_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[17, -17, 17, -17]> : tensor<4xi64>
+   %1 = pphlo.constant dense<[3, 3, -3, -3]> : tensor<4xi64>
+   %2 = pphlo.divide %0,%1 : (tensor<4xi64>,tensor<4xi64>)->tensor<4xi64>
+   %3 = pphlo.constant dense<[5, -5, -5, 5]> : tensor<4xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi64>, tensor<4xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @divide_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[17, -17, 17, -17]> : tensor<4xi64>
+   %1 = pphlo.constant dense<[3, 3, -3, -3]> : tensor<4xi64>
+   %2 = pphlo.convert %0 : (tensor<4xi64>)->tensor<4x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<4xi64>)->tensor<4x!pphlo.secret<i64>>
+   %4 = pphlo.divide %2, %3 : (tensor<4x!pphlo.secret<i64>>,tensor<4x!pphlo.secret<i64>>)->tensor<4x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[5, -5, -5, 5]> : tensor<4xi64>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i64>>)->tensor<4xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi64>, tensor<4xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @divide_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[17, 18, 19, 20]> : tensor<4xui64>
+   %1 = pphlo.constant dense<[3, 4, 5, 7]> : tensor<4xui64>
+   %2 = pphlo.divide %0,%1 : (tensor<4xui64>,tensor<4xui64>)->tensor<4xui64>
+   %3 = pphlo.constant dense<[5, 4, 3, 2]> : tensor<4xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xui64>, tensor<4xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @divide_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[17, 18, 19, 20]> : tensor<4xui64>
+   %1 = pphlo.constant dense<[3, 4, 5, 7]> : tensor<4xui64>
+   %2 = pphlo.convert %0 : (tensor<4xui64>)->tensor<4x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<4xui64>)->tensor<4x!pphlo.secret<ui64>>
+   %4 = pphlo.divide %2, %3 : (tensor<4x!pphlo.secret<ui64>>,tensor<4x!pphlo.secret<ui64>>)->tensor<4x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[5, 4, 3, 2]> : tensor<4xui64>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<ui64>>)->tensor<4xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xui64>, tensor<4xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @divide_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[17.1, -17.1, 17.1, -17.1]> : tensor<4xf64>
+   %1 = pphlo.constant dense<[3.0, 3.0, -3.0, -3.0]> : tensor<4xf64>
+   %2 = pphlo.divide %0,%1 : (tensor<4xf64>,tensor<4xf64>)->tensor<4xf64>
+   %3 = pphlo.constant dense<[5.700000e+00, -5.700000e+00, -5.700000e+00, 5.700000e+00]> : tensor<4xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<4xf64>, tensor<4xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @divide_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[17.1, -17.1, 17.1, -17.1]> : tensor<4xf64>
+   %1 = pphlo.constant dense<[3.0, 3.0, -3.0, -3.0]> : tensor<4xf64>
+   %2 = pphlo.convert %0 : (tensor<4xf64>)->tensor<4x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<4xf64>)->tensor<4x!pphlo.secret<f64>>
+   %4 = pphlo.divide %2, %3 : (tensor<4x!pphlo.secret<f64>>,tensor<4x!pphlo.secret<f64>>)->tensor<4x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[5.700000e+00, -5.700000e+00, -5.700000e+00, 5.700000e+00]> : tensor<4xf64>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<f64>>)->tensor<4xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<4xf64>, tensor<4xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/dot_general.mlir b/libspu/compiler/tests/interpret/dot_general.mlir
new file mode 100644
index 00000000..b97f06f7
--- /dev/null
+++ b/libspu/compiler/tests/interpret/dot_general.mlir
@@ -0,0 +1,51 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @dot_general_op_test_si64() {
+  %lhs = pphlo.constant dense<[[[1, 2], [3, 4]],
+                               [[5, 6], [7, 8]]]> : tensor<2x2x2xi64>
+  %rhs = pphlo.constant dense<[[[1, 0], [0, 1]],
+                               [[1, 0], [0, 1]]]> : tensor<2x2x2xi64>
+  %result = pphlo.dot_general %lhs, %rhs,
+    batching_dims = [0] x [0],
+    contracting_dims = [2] x [1]
+    : (tensor<2x2x2xi64>, tensor<2x2x2xi64>) -> tensor<2x2x2xi64>
+  %expected = pphlo.constant dense<[[[1, 2], [3, 4]],
+                                    [[5, 6], [7, 8]]]> : tensor<2x2x2xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<2x2x2xi64>, tensor<2x2x2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @dot_general_op_test_empty_dims() {
+  // %lhs = pphlo.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi64>
+  // %rhs = pphlo.constant dense<[[1, 0], [0, 1]]> : tensor<2x2xi64>
+  // %result = pphlo.dot_general %lhs, %rhs,
+  //   batching_dims = [] x [],
+  //   contracting_dims = [] x []
+  //   : (tensor<2x2xi64>, tensor<2x2xi64>) -> tensor<2x2x2x2xi64>
+  // %expected = pphlo.constant dense<[[[[1, 0], [0, 1]],
+  //                                    [[2, 0], [0, 2]]],
+  //                                   [[[3, 0], [0, 3]],
+  //                                    [[4, 0], [0, 4]]]]> : tensor<2x2x2x2xi64>
+  // pphlo.custom_call @expect_eq (%result, %expected) : (tensor<2x2x2x2xi64>,tensor<2x2x2x2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @main() {
+  %0 = pphlo.iota dim = 0 : tensor<12xi32>
+  %1 = pphlo.reshape %0 : (tensor<12xi32>) -> tensor<3x1x4xi32>
+  %2 = pphlo.iota dim = 0 : tensor<60xi32>
+  %3 = pphlo.reshape %2 : (tensor<60xi32>) -> tensor<3x4x5xi32>
+  %4 = pphlo.dot_general %1, %3,
+        batching_dims = [0] x [0],
+        contracting_dims = [2] x [1]
+        : (tensor<3x1x4xi32>, tensor<3x4x5xi32>) -> tensor<3x5xi32>
+  %5 = pphlo.constant dense<[[ 70,  76,  82,  88,  94],
+                             [630, 652, 674, 696, 718],
+                             [1830, 1868, 1906, 1944, 1982]]> : tensor<3x5xi32>
+  pphlo.custom_call @expect_eq (%4, %5) : (tensor<3x5xi32>, tensor<3x5xi32>)->()
+  return
+}
diff --git a/libspu/compiler/tests/interpret/dynamic_slice.mlir b/libspu/compiler/tests/interpret/dynamic_slice.mlir
new file mode 100644
index 00000000..a72577c6
--- /dev/null
+++ b/libspu/compiler/tests/interpret/dynamic_slice.mlir
@@ -0,0 +1,14 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @dynamic_slice() {
+  %operand = pphlo.constant dense<[[1, 1, 1],
+                                   [1, 1, 1],
+                                   [1, 1, 1]]> : tensor<3x3xi64>
+  %start_indices0 = pphlo.constant dense<3> : tensor<i64>
+  %start_indices1 = pphlo.constant dense<3> : tensor<i64>
+  %result = "pphlo.dynamic_slice"(%operand, %start_indices0, %start_indices1) {
+    slice_sizes = array<i64: 3, 3>
+  } : (tensor<3x3xi64>, tensor<i64>, tensor<i64>) -> tensor<3x3xi64>
+  pphlo.custom_call @expect_eq (%result, %operand) : (tensor<3x3xi64>,tensor<3x3xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/dynamic_update_slice.mlir b/libspu/compiler/tests/interpret/dynamic_update_slice.mlir
new file mode 100644
index 00000000..9177f938
--- /dev/null
+++ b/libspu/compiler/tests/interpret/dynamic_update_slice.mlir
@@ -0,0 +1,17 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @dynamic_update_slice() {
+  %operand = pphlo.constant dense<[[1, 1, 1, 1],
+                                   [1, 1, 1, 1],
+                                   [1, 2, 2, 2],
+                                   [1, 2, 2, 2]]> : tensor<4x4xi64>
+  %update = pphlo.constant dense<[[1, 1, 1],
+                                  [1, 1, 1]]> : tensor<2x3xi64>
+  %start_indices0 = pphlo.constant dense<4> : tensor<i64>
+  %start_indices1 = pphlo.constant dense<4> : tensor<i64>
+  %result = pphlo.dynamic_update_slice %operand, %update, %start_indices0, %start_indices1 :
+      (tensor<4x4xi64>, tensor<2x3xi64>, tensor<i64>, tensor<i64>) -> tensor<4x4xi64>
+  %expected = pphlo.constant dense<[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]> : tensor<4x4xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<4x4xi64>,tensor<4x4xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/equal.mlir b/libspu/compiler/tests/interpret/equal.mlir
new file mode 100644
index 00000000..c29d51e4
--- /dev/null
+++ b/libspu/compiler/tests/interpret/equal.mlir
@@ -0,0 +1,175 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @equal_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<-2> : tensor<i64>
+   %1 = pphlo.constant dense<-2> : tensor<i64>
+   %2 = pphlo.equal %0,%1 : (tensor<i64>,tensor<i64>)->tensor<i1>
+   %3 = pphlo.constant dense<true> : tensor<i1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<-2> : tensor<i64>
+   %1 = pphlo.constant dense<-2> : tensor<i64>
+   %2 = pphlo.convert %0 : (tensor<i64>)->tensor<!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<i64>)->tensor<!pphlo.secret<i64>>
+   %4 = pphlo.equal %2, %3 : (tensor<!pphlo.secret<i64>>,tensor<!pphlo.secret<i64>>)->tensor<!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<true> : tensor<i1>
+   %6 = pphlo.convert %4 : (tensor<!pphlo.secret<i1>>)->tensor<i1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.equal %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.equal %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<0> : tensor<ui64>
+   %1 = pphlo.constant dense<0> : tensor<ui64>
+   %2 = pphlo.equal %0,%1 : (tensor<ui64>,tensor<ui64>)->tensor<i1>
+   %3 = pphlo.constant dense<true> : tensor<i1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<0> : tensor<ui64>
+   %1 = pphlo.constant dense<0> : tensor<ui64>
+   %2 = pphlo.convert %0 : (tensor<ui64>)->tensor<!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<ui64>)->tensor<!pphlo.secret<ui64>>
+   %4 = pphlo.equal %2, %3 : (tensor<!pphlo.secret<ui64>>,tensor<!pphlo.secret<ui64>>)->tensor<!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<true> : tensor<i1>
+   %6 = pphlo.convert %4 : (tensor<!pphlo.secret<i1>>)->tensor<i1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.equal %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[true, false]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.equal %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<true> : tensor<i1>
+   %1 = pphlo.constant dense<true> : tensor<i1>
+   %2 = pphlo.equal %0,%1 : (tensor<i1>,tensor<i1>)->tensor<i1>
+   %3 = pphlo.constant dense<true> : tensor<i1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<true> : tensor<i1>
+   %1 = pphlo.constant dense<true> : tensor<i1>
+   %2 = pphlo.convert %0 : (tensor<i1>)->tensor<!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<i1>)->tensor<!pphlo.secret<i1>>
+   %4 = pphlo.equal %2, %3 : (tensor<!pphlo.secret<i1>>,tensor<!pphlo.secret<i1>>)->tensor<!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<true> : tensor<i1>
+   %6 = pphlo.convert %4 : (tensor<!pphlo.secret<i1>>)->tensor<i1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.equal %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[true, false, false, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.equal %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false, false, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_f64_i1_pp() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.equal %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @equal_op_test_f64_i1_ss() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.equal %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/exponential.mlir b/libspu/compiler/tests/interpret/exponential.mlir
new file mode 100644
index 00000000..4b505e63
--- /dev/null
+++ b/libspu/compiler/tests/interpret/exponential.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @exponential_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[[0.0, 1.0], [2.0, 3.0]]> : tensor<2x2xf64>
+   %1 = pphlo.exponential %0 : (tensor<2x2xf64>)->tensor<2x2xf64>
+   %2 = pphlo.constant dense<[[1.000000e+00, 2.7182818284590451], [7.3890560989306504, 20.085536923187668]]> : tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) { tol = 0.4 }: (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @exponential_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[[0.0, 1.0], [2.0, 3.0]]> : tensor<2x2xf64>
+   %1 = pphlo.convert %0 : (tensor<2x2xf64>)->tensor<2x2x!pphlo.secret<f64>>
+   %2 = pphlo.exponential %1 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[[1.000000e+00, 2.7182818284590451], [7.3890560989306504, 20.085536923187668]]> : tensor<2x2xf64>
+   %4 = pphlo.convert %2 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) { tol = 0.4 }: (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/exponential_minus_one.mlir b/libspu/compiler/tests/interpret/exponential_minus_one.mlir
new file mode 100644
index 00000000..46cf91f1
--- /dev/null
+++ b/libspu/compiler/tests/interpret/exponential_minus_one.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @exponential_minus_one_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0]> : tensor<2xf64>
+   %1 = pphlo.exponential_minus_one %0 : (tensor<2xf64>)->tensor<2xf64>
+   %2 = pphlo.constant dense<[0.0, 1.7182818284590451]> : tensor<2xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<2xf64>, tensor<2xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @exponential_minus_one_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0]> : tensor<2xf64>
+   %1 = pphlo.convert %0 : (tensor<2xf64>)->tensor<2x!pphlo.secret<f64>>
+   %2 = pphlo.exponential_minus_one %1 : (tensor<2x!pphlo.secret<f64>>)->tensor<2x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[0.0, 1.7182818284590451]> : tensor<2xf64>
+   %4 = pphlo.convert %2 : (tensor<2x!pphlo.secret<f64>>)->tensor<2xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<2xf64>, tensor<2xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/floor.mlir b/libspu/compiler/tests/interpret/floor.mlir
new file mode 100644
index 00000000..734266b9
--- /dev/null
+++ b/libspu/compiler/tests/interpret/floor.mlir
@@ -0,0 +1,66 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @floor_op_test_f16_f16_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf16>
+   %1 = pphlo.floor %0 : (tensor<3xf16>)->tensor<3xf16>
+   %2 = pphlo.constant dense<[-3.000000e+00, 0.000000e+00, 2.000000e+00]> : tensor<3xf16>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf16>, tensor<3xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @floor_op_test_f16_f16_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf16>
+   %1 = pphlo.convert %0 : (tensor<3xf16>)->tensor<3x!pphlo.secret<f16>>
+   %2 = pphlo.floor %1 : (tensor<3x!pphlo.secret<f16>>)->tensor<3x!pphlo.secret<f16>>
+   %3 = pphlo.constant dense<[-3.000000e+00, 0.000000e+00, 2.000000e+00]> : tensor<3xf16>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f16>>)->tensor<3xf16>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf16>, tensor<3xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @floor_op_test_f32_f32_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf32>
+   %1 = pphlo.floor %0 : (tensor<3xf32>)->tensor<3xf32>
+   %2 = pphlo.constant dense<[-3.000000e+00, 0.000000e+00, 2.000000e+00]> : tensor<3xf32>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf32>, tensor<3xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @floor_op_test_f32_f32_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf32>
+   %1 = pphlo.convert %0 : (tensor<3xf32>)->tensor<3x!pphlo.secret<f32>>
+   %2 = pphlo.floor %1 : (tensor<3x!pphlo.secret<f32>>)->tensor<3x!pphlo.secret<f32>>
+   %3 = pphlo.constant dense<[-3.000000e+00, 0.000000e+00, 2.000000e+00]> : tensor<3xf32>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f32>>)->tensor<3xf32>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf32>, tensor<3xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @floor_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf64>
+   %1 = pphlo.floor %0 : (tensor<3xf64>)->tensor<3xf64>
+   %2 = pphlo.constant dense<[-3.000000e+00, 0.000000e+00, 2.000000e+00]> : tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @floor_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.0, 2.5]> : tensor<3xf64>
+   %1 = pphlo.convert %0 : (tensor<3xf64>)->tensor<3x!pphlo.secret<f64>>
+   %2 = pphlo.floor %1 : (tensor<3x!pphlo.secret<f64>>)->tensor<3x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[-3.000000e+00, 0.000000e+00, 2.000000e+00]> : tensor<3xf64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f64>>)->tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/generate_mlir_tests.py b/libspu/compiler/tests/interpret/generate_mlir_tests.py
new file mode 100755
index 00000000..114e66a0
--- /dev/null
+++ b/libspu/compiler/tests/interpret/generate_mlir_tests.py
@@ -0,0 +1,124 @@
+#! /usr/bin/env python3
+
+# Copyright 2024 Ant Group Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import json
+
+Data = collections.namedtuple(
+    "Data",
+    ["data", "shape", "dtype"],
+)
+
+Case = collections.namedtuple(
+    "TestCase",
+    ["inputs", "expected", "checker", "tol"],
+)
+
+Record = collections.namedtuple(
+    "Record",
+    ["name", "template", "cases"],
+)
+
+
+def TestCase(inputs, expected, checker='expect_eq', tol=None):
+    return Case(inputs, expected, checker, tol)
+
+
+TESTS = [
+    "abs",
+    "add",
+    "and",
+    "atan2",
+    "ceil",
+    "cosine",
+    "divide",
+    "equal",
+    "not_equal",
+    "greater_equal",
+    "greater",
+    "less",
+    "less_equal",
+    "exponential_minus_one",
+    "exponential",
+    "floor",
+    "log_plus_one",
+    "log",
+    "logistic",
+    "max",
+    "min",
+    "multiply",
+    "negate",
+    "not",
+    "or",
+    # "popcnt",
+    "power",
+    "reshape",
+    "round_afz",
+    "rsqrt",
+    "arshift",
+    "rshift",
+    "sign",
+    "sine",
+    "sqrt",
+    "subtract",
+    "tanh",
+    "xor",
+]
+
+for test in TESTS:
+    with open(f"test_json/{test}.json", "r") as f:
+        test_contents = json.loads(f.read())
+
+    test_name = test_contents["name"]
+    template_name = test_contents["template"]
+
+    with open(f"template/{template_name}.template", "r") as f:
+        template = f.read()
+
+    with open(f"{test_name}.mlir", "w+") as f:
+        # emit run command
+        f.write("// RUN: spu-translate --interpret -split-input-file %s\n")
+        f.write("// AUTO GENERATED, DO NOT EDIT\n\n")
+
+        # Emit cases
+        cases = []
+        for case in test_contents["testcases"]:
+            c = template.replace('%OP%', test_name)
+            for idx, input in enumerate(case["inputs"]):
+                c = c.replace(f'%INPUT{idx}%', input["data"])
+                if not input["shape"]:
+                    c = c.replace(f'%IN{idx}_SHAPE%x', '')
+                else:
+                    c = c.replace(f'%IN{idx}_SHAPE%', input["shape"])
+                c = c.replace(f'%IN{idx}_DTYPE%', input["dtype"])
+            for idx, expect in enumerate(case["expected"]):
+                c = c.replace(f'%EXPECTED{idx}%', expect["data"])
+                if not expect["shape"]:
+                    c = c.replace(f'%OUT{idx}_SHAPE%x', '')
+                else:
+                    c = c.replace(f'%OUT{idx}_SHAPE%', expect["shape"])
+                c = c.replace(f'%OUT{idx}_DTYPE%', expect["dtype"])
+            if "checker" in case:
+                c = c.replace('%CHECKER%', case["checker"])
+            else:
+                c = c.replace('%CHECKER%', 'expect_eq')
+            if "tol" in case:
+                c = c.replace('%ATTR%', f'{{ tol = {case["tol"]} }}')
+            else:
+                c = c.replace('%ATTR%', '')
+            cases.append(c)
+
+        f.write("\n// -----\n\n".join(cases))
diff --git a/libspu/compiler/tests/interpret/greater.mlir b/libspu/compiler/tests/interpret/greater.mlir
new file mode 100644
index 00000000..e6b6b0d1
--- /dev/null
+++ b/libspu/compiler/tests/interpret/greater.mlir
@@ -0,0 +1,100 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @greater_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.greater %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.greater %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.greater %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.greater %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.greater %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, true, false, false]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.greater %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, false, false]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_f64_i1_pp() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.greater %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[false, false, false, false, false]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_op_test_f64_i1_ss() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.greater %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false, false, false, false]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/greater_equal.mlir b/libspu/compiler/tests/interpret/greater_equal.mlir
new file mode 100644
index 00000000..b1b32c14
--- /dev/null
+++ b/libspu/compiler/tests/interpret/greater_equal.mlir
@@ -0,0 +1,100 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @greater_equal_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.greater_equal %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[true, true, true, true, true]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.greater_equal %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, true, true, true, true]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.greater_equal %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[true, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.greater_equal %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.greater_equal %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[true, true, false, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.greater_equal %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, true, false, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_f64_i1_pp() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.greater_equal %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @greater_equal_op_test_f64_i1_ss() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.greater_equal %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/if.mlir b/libspu/compiler/tests/interpret/if.mlir
new file mode 100644
index 00000000..7f98d8b0
--- /dev/null
+++ b/libspu/compiler/tests/interpret/if.mlir
@@ -0,0 +1,33 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @if_ops_true_branch() {
+  %pred = pphlo.constant dense<true> : tensor<i1>
+  %result0, %result1 = "pphlo.if"(%pred) ({
+    %0 = pphlo.constant dense<0> : tensor<2xi64>
+    pphlo.return %0, %0 : tensor<2xi64>, tensor<2xi64>
+  }, {
+    %1 = pphlo.constant dense<1> : tensor<2xi64>
+    pphlo.return %1, %1 : tensor<2xi64>, tensor<2xi64>
+  }) : (tensor<i1>) -> (tensor<2xi64>, tensor<2xi64>)
+  %expected = pphlo.constant dense<[0,0]> : tensor<2xi64>
+  pphlo.custom_call @expect_eq (%result0, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  pphlo.custom_call @expect_eq (%result1, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @if_ops_false_branch() {
+  %pred = pphlo.constant dense<false> : tensor<i1>
+  %result0, %result1 = "pphlo.if"(%pred) ({
+    %0 = pphlo.constant dense<0> : tensor<2xi64>
+    pphlo.return %0, %0 : tensor<2xi64>, tensor<2xi64>
+  }, {
+    %1 = pphlo.constant dense<1> : tensor<2xi64>
+    pphlo.return %1, %1 : tensor<2xi64>, tensor<2xi64>
+  }) : (tensor<i1>) -> (tensor<2xi64>, tensor<2xi64>)
+  %expected = pphlo.constant dense<[1, 1]> : tensor<2xi64>
+  pphlo.custom_call @expect_eq (%result0, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  pphlo.custom_call @expect_eq (%result1, %expected) : (tensor<2xi64>,tensor<2xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/iota.mlir b/libspu/compiler/tests/interpret/iota.mlir
new file mode 100644
index 00000000..40f7800d
--- /dev/null
+++ b/libspu/compiler/tests/interpret/iota.mlir
@@ -0,0 +1,152 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @iota_op_test_si8_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xi8>
+  %expected = pphlo.constant dense<[[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]]> : tensor<3x4xi8>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi8>,tensor<3x4xi8>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_si8_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xi8>
+  %expected = pphlo.constant dense<[[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]> : tensor<3x4xi8>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi8>,tensor<3x4xi8>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_si16_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xi16>
+  %expected = pphlo.constant dense<[[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]]> : tensor<3x4xi16>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi16>,tensor<3x4xi16>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_si16_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xi16>
+  %expected = pphlo.constant dense<[[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]> : tensor<3x4xi16>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi16>,tensor<3x4xi16>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_si32_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xi32>
+  %expected = pphlo.constant dense<[[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]]> : tensor<3x4xi32>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi32>,tensor<3x4xi32>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_si32_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xi32>
+  %expected = pphlo.constant dense<[[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]> : tensor<3x4xi32>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi32>,tensor<3x4xi32>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_si64_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xi64>
+  %expected = pphlo.constant dense<[[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]]> : tensor<3x4xi64>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi64>,tensor<3x4xi64>)->()
+  func.return
+}
+// -----
+
+
+func.func @iota_op_test_si64_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xi64>
+  %expected = pphlo.constant dense<[[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]> : tensor<3x4xi64>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<3x4xi64>,tensor<3x4xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_ui64_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<2x3x2xui64>
+  %expected = pphlo.constant dense<[[[0, 0], [0, 0], [0, 0]], [[1, 1], [1, 1], [1, 1]]]> : tensor<2x3x2xui64>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<2x3x2xui64>,tensor<2x3x2xui64>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_ui64_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<2x3x2xui64>
+  %expected = pphlo.constant dense<[[[0, 0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]]]> : tensor<2x3x2xui64>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<2x3x2xui64>,tensor<2x3x2xui64>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_ui64_dim_2() {
+  %0 = pphlo.iota dim = 2 : tensor<2x3x2xui64>
+  %expected = pphlo.constant dense<[[[0, 1], [0, 1], [0, 1]], [[0, 1], [0, 1], [0, 1]]]> : tensor<2x3x2xui64>
+  pphlo.custom_call @expect_eq (%0, %expected) : (tensor<2x3x2xui64>,tensor<2x3x2xui64>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_f16_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xf16>
+  %expected = pphlo.constant dense<[[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00], [1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00], [2.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00]]> : tensor<3x4xf16>
+  pphlo.custom_call @expect_almost_eq (%0, %expected) : (tensor<3x4xf16>,tensor<3x4xf16>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_f16_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xf16>
+  %expected = pphlo.constant dense<[[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00], [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00], [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00]]> : tensor<3x4xf16>
+  pphlo.custom_call @expect_almost_eq (%0, %expected) : (tensor<3x4xf16>,tensor<3x4xf16>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_f32_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xf32>
+  %expected = pphlo.constant dense<[[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00], [1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00], [2.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00]]> : tensor<3x4xf32>
+  pphlo.custom_call @expect_almost_eq (%0, %expected) : (tensor<3x4xf32>,tensor<3x4xf32>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_f32_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xf32>
+  %expected = pphlo.constant dense<[[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00], [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00], [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00]]> : tensor<3x4xf32>
+  pphlo.custom_call @expect_almost_eq (%0, %expected) : (tensor<3x4xf32>,tensor<3x4xf32>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_f64_dim_0() {
+  %0 = pphlo.iota dim = 0 : tensor<3x4xf64>
+  %expected = pphlo.constant dense<[[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00], [1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00], [2.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00]]> : tensor<3x4xf64>
+  pphlo.custom_call @expect_almost_eq (%0, %expected) : (tensor<3x4xf64>,tensor<3x4xf64>)->()
+  func.return
+}
+
+// -----
+
+func.func @iota_op_test_f64_dim_1() {
+  %0 = pphlo.iota dim = 1 : tensor<3x4xf64>
+  %expected = pphlo.constant dense<[[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00], [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00], [0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00]]> : tensor<3x4xf64>
+  pphlo.custom_call @expect_almost_eq (%0, %expected) : (tensor<3x4xf64>,tensor<3x4xf64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/less.mlir b/libspu/compiler/tests/interpret/less.mlir
new file mode 100644
index 00000000..21f12d45
--- /dev/null
+++ b/libspu/compiler/tests/interpret/less.mlir
@@ -0,0 +1,100 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @less_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.less %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[false, false, false, false, false]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.less %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false, false, false, false]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.less %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, false]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.less %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.less %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, false, true, false]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.less %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false, true, false]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_f64_i1_pp() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.less %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_op_test_f64_i1_ss() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.less %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/less_equal.mlir b/libspu/compiler/tests/interpret/less_equal.mlir
new file mode 100644
index 00000000..2715e3c6
--- /dev/null
+++ b/libspu/compiler/tests/interpret/less_equal.mlir
@@ -0,0 +1,100 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @less_equal_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.less_equal %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.less_equal %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false, true, false, true]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.less_equal %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[true, false]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.less_equal %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.less_equal %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[true, false, true, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.less_equal %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, false, true, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_f64_i1_pp() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.less_equal %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[true, true, true, true, true]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @less_equal_op_test_f64_i1_ss() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.less_equal %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, true, true, true, true]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/log.mlir b/libspu/compiler/tests/interpret/log.mlir
new file mode 100644
index 00000000..56c51db7
--- /dev/null
+++ b/libspu/compiler/tests/interpret/log.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @log_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf64>
+   %1 = pphlo.log %0 : (tensor<2x2xf64>)->tensor<2x2xf64>
+   %2 = pphlo.constant dense<[[0.000000e+00, 0.69314718055994529], [1.0986122886681098, 1.3862943611198906]]> : tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @log_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf64>
+   %1 = pphlo.convert %0 : (tensor<2x2xf64>)->tensor<2x2x!pphlo.secret<f64>>
+   %2 = pphlo.log %1 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[[0.000000e+00, 0.69314718055994529], [1.0986122886681098, 1.3862943611198906]]> : tensor<2x2xf64>
+   %4 = pphlo.convert %2 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/log_plus_one.mlir b/libspu/compiler/tests/interpret/log_plus_one.mlir
new file mode 100644
index 00000000..721f72b9
--- /dev/null
+++ b/libspu/compiler/tests/interpret/log_plus_one.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @log_plus_one_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[0.0, -0.999, 7.0, 6.38905621, 15.0]> : tensor<5xf64>
+   %1 = pphlo.log_plus_one %0 : (tensor<5xf64>)->tensor<5xf64>
+   %2 = pphlo.constant dense<[0.0, -6.90776825, 2.07944155, 2.0, 2.77258873]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @log_plus_one_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[0.0, -0.999, 7.0, 6.38905621, 15.0]> : tensor<5xf64>
+   %1 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %2 = pphlo.log_plus_one %1 : (tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[0.0, -6.90776825, 2.07944155, 2.0, 2.77258873]> : tensor<5xf64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/logistic.mlir b/libspu/compiler/tests/interpret/logistic.mlir
new file mode 100644
index 00000000..886ef64a
--- /dev/null
+++ b/libspu/compiler/tests/interpret/logistic.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @logistic_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf64>
+   %1 = pphlo.logistic %0 : (tensor<2x2xf64>)->tensor<2x2xf64>
+   %2 = pphlo.constant dense<[[0.73105857863000488, 0.88079707797788244],[0.95257412682243322, 0.98201379003790844]]> : tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @logistic_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf64>
+   %1 = pphlo.convert %0 : (tensor<2x2xf64>)->tensor<2x2x!pphlo.secret<f64>>
+   %2 = pphlo.logistic %1 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[[0.73105857863000488, 0.88079707797788244],[0.95257412682243322, 0.98201379003790844]]> : tensor<2x2xf64>
+   %4 = pphlo.convert %2 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/maximum.mlir b/libspu/compiler/tests/interpret/maximum.mlir
new file mode 100644
index 00000000..f05a0048
--- /dev/null
+++ b/libspu/compiler/tests/interpret/maximum.mlir
@@ -0,0 +1,300 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @maximum_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xi8>,tensor<5xi8>)->tensor<5xi8>
+   %3 = pphlo.constant dense<[0, 1, 8, -9, 127]> : tensor<5xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.convert %0 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<i8>>,tensor<5x!pphlo.secret<i8>>)->tensor<5x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[0, 1, 8, -9, 127]> : tensor<5xi8>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i8>>)->tensor<5xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.maximum %0,%1 : (tensor<2xui8>,tensor<2xui8>)->tensor<2xui8>
+   %3 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.convert %0 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %4 = pphlo.maximum %2, %3 : (tensor<2x!pphlo.secret<ui8>>,tensor<2x!pphlo.secret<ui8>>)->tensor<2x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui8>>)->tensor<2xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xi16>,tensor<5xi16>)->tensor<5xi16>
+   %3 = pphlo.constant dense<[0, 1, 128, -129, 32767]> : tensor<5xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.convert %0 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<i16>>,tensor<5x!pphlo.secret<i16>>)->tensor<5x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[0, 1, 128, -129, 32767]> : tensor<5xi16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i16>>)->tensor<5xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.maximum %0,%1 : (tensor<2xui16>,tensor<2xui16>)->tensor<2xui16>
+   %3 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.convert %0 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %4 = pphlo.maximum %2, %3 : (tensor<2x!pphlo.secret<ui16>>,tensor<2x!pphlo.secret<ui16>>)->tensor<2x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui16>>)->tensor<2xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xi32>,tensor<5xi32>)->tensor<5xi32>
+   %3 = pphlo.constant dense<[0, 1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.convert %0 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<i32>>,tensor<5x!pphlo.secret<i32>>)->tensor<5x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[0, 1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i32>>)->tensor<5xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.maximum %0,%1 : (tensor<2xui32>,tensor<2xui32>)->tensor<2xui32>
+   %3 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.convert %0 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %4 = pphlo.maximum %2, %3 : (tensor<2x!pphlo.secret<ui32>>,tensor<2x!pphlo.secret<ui32>>)->tensor<2x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui32>>)->tensor<2xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi64>
+   %3 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i64>>)->tensor<5xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.maximum %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xui64>
+   %3 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.maximum %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui64>>)->tensor<2xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.maximum %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, true, true, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.maximum %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, true, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_f16_f16_pp() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf16>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xf16>,tensor<5xf16>)->tensor<5xf16>
+   %3 = pphlo.constant dense<[1.0, -1.0, 0.0, 1.0, 1.0]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_f16_f16_ss() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf16>
+   %2 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.convert %1 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<f16>>,tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %5 = pphlo.constant dense<[1.0, -1.0, 0.0, 1.0, 1.0]> : tensor<5xf16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_f32_f32_pp() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf32>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xf32>,tensor<5xf32>)->tensor<5xf32>
+   %3 = pphlo.constant dense<[1.0, -1.0, 0.0, 1.0, 1.0]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_f32_f32_ss() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf32>
+   %2 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.convert %1 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<f32>>,tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %5 = pphlo.constant dense<[1.0, -1.0, 0.0, 1.0, 1.0]> : tensor<5xf32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf64>
+   %2 = pphlo.maximum %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xf64>
+   %3 = pphlo.constant dense<[1.0, -1.0, 0.0, 1.0, 1.0]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @maximum_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.maximum %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[1.0, -1.0, 0.0, 1.0, 1.0]> : tensor<5xf64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/minimum.mlir b/libspu/compiler/tests/interpret/minimum.mlir
new file mode 100644
index 00000000..763aa66b
--- /dev/null
+++ b/libspu/compiler/tests/interpret/minimum.mlir
@@ -0,0 +1,300 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @minimum_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xi8>,tensor<5xi8>)->tensor<5xi8>
+   %3 = pphlo.constant dense<[-128, -1, 8, -9, 0]> : tensor<5xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.convert %0 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<i8>>,tensor<5x!pphlo.secret<i8>>)->tensor<5x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[-128, -1, 8, -9, 0]> : tensor<5xi8>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i8>>)->tensor<5xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.minimum %0,%1 : (tensor<2xui8>,tensor<2xui8>)->tensor<2xui8>
+   %3 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.convert %0 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %4 = pphlo.minimum %2, %3 : (tensor<2x!pphlo.secret<ui8>>,tensor<2x!pphlo.secret<ui8>>)->tensor<2x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui8>>)->tensor<2xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xi16>,tensor<5xi16>)->tensor<5xi16>
+   %3 = pphlo.constant dense<[-32768, -1, 128, -129, 0]> : tensor<5xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.convert %0 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<i16>>,tensor<5x!pphlo.secret<i16>>)->tensor<5x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[-32768, -1, 128, -129, 0]> : tensor<5xi16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i16>>)->tensor<5xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.minimum %0,%1 : (tensor<2xui16>,tensor<2xui16>)->tensor<2xui16>
+   %3 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.convert %0 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %4 = pphlo.minimum %2, %3 : (tensor<2x!pphlo.secret<ui16>>,tensor<2x!pphlo.secret<ui16>>)->tensor<2x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui16>>)->tensor<2xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xi32>,tensor<5xi32>)->tensor<5xi32>
+   %3 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 0]> : tensor<5xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.convert %0 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<i32>>,tensor<5x!pphlo.secret<i32>>)->tensor<5x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 0]> : tensor<5xi32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i32>>)->tensor<5xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.minimum %0,%1 : (tensor<2xui32>,tensor<2xui32>)->tensor<2xui32>
+   %3 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.convert %0 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %4 = pphlo.minimum %2, %3 : (tensor<2x!pphlo.secret<ui32>>,tensor<2x!pphlo.secret<ui32>>)->tensor<2x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui32>>)->tensor<2xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-223372036854775808, -1, 2147483648, -2147483649, 223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi64>
+   %3 = pphlo.constant dense<[-223372036854775808, -1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-223372036854775808, -1, 2147483648, -2147483649, 223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[-223372036854775808, -1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i64>>)->tensor<5xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.minimum %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xui64>
+   %3 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.minimum %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui64>>)->tensor<2xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.minimum %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, false, false, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.minimum %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false, false, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_f16_f16_pp() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf16>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xf16>,tensor<5xf16>)->tensor<5xf16>
+   %3 = pphlo.constant dense<[-1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_f16_f16_ss() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf16>
+   %2 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.convert %1 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<f16>>,tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %5 = pphlo.constant dense<[-1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_f32_f32_pp() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf32>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xf32>,tensor<5xf32>)->tensor<5xf32>
+   %3 = pphlo.constant dense<[-1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_f32_f32_ss() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf32>
+   %2 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.convert %1 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<f32>>,tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %5 = pphlo.constant dense<[-1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf64>
+   %2 = pphlo.minimum %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xf64>
+   %3 = pphlo.constant dense<[-1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @minimum_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[-1.0, -1.0, 0.0,  1.0, 1.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[ 1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.minimum %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[-1.0, -1.0, 0.0, -1.0, 1.0]> : tensor<5xf64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/multiply.mlir b/libspu/compiler/tests/interpret/multiply.mlir
new file mode 100644
index 00000000..930b6458
--- /dev/null
+++ b/libspu/compiler/tests/interpret/multiply.mlir
@@ -0,0 +1,300 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @multiply_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xi8>,tensor<5xi8>)->tensor<5xi8>
+   %3 = pphlo.constant dense<[0, -1, 64, 81, 0]> : tensor<5xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.convert %0 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<i8>>,tensor<5x!pphlo.secret<i8>>)->tensor<5x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[0, -1, 64, 81, 0]> : tensor<5xi8>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i8>>)->tensor<5xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 16, 16]> : tensor<3xui8>
+   %1 = pphlo.constant dense<[255, 16, 17]> : tensor<3xui8>
+   %2 = pphlo.multiply %0,%1 : (tensor<3xui8>,tensor<3xui8>)->tensor<3xui8>
+   %3 = pphlo.constant dense<[0, 0, 16]> : tensor<3xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 16, 16]> : tensor<3xui8>
+   %1 = pphlo.constant dense<[255, 16, 17]> : tensor<3xui8>
+   %2 = pphlo.convert %0 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %4 = pphlo.multiply %2, %3 : (tensor<3x!pphlo.secret<ui8>>,tensor<3x!pphlo.secret<ui8>>)->tensor<3x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[0, 0, 16]> : tensor<3xui8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xi16>,tensor<5xi16>)->tensor<5xi16>
+   %3 = pphlo.constant dense<[0, -1, 16384, 16641, 0]> : tensor<5xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.convert %0 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<i16>>,tensor<5x!pphlo.secret<i16>>)->tensor<5x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[0, -1, 16384, 16641, 0]> : tensor<5xi16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i16>>)->tensor<5xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.multiply %0,%1 : (tensor<2xui16>,tensor<2xui16>)->tensor<2xui16>
+   %3 = pphlo.constant dense<[0, 0]> : tensor<2xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.convert %0 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %4 = pphlo.multiply %2, %3 : (tensor<2x!pphlo.secret<ui16>>,tensor<2x!pphlo.secret<ui16>>)->tensor<2x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[0, 0]> : tensor<2xui16>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui16>>)->tensor<2xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xi32>,tensor<5xi32>)->tensor<5xi32>
+   %3 = pphlo.constant dense<[0, -1, 1073741824, 1073807361, 0]> : tensor<5xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.convert %0 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<i32>>,tensor<5x!pphlo.secret<i32>>)->tensor<5x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[0, -1, 1073741824, 1073807361, 0]> : tensor<5xi32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i32>>)->tensor<5xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.multiply %0,%1 : (tensor<2xui32>,tensor<2xui32>)->tensor<2xui32>
+   %3 = pphlo.constant dense<[0, 0]> : tensor<2xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.convert %0 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %4 = pphlo.multiply %2, %3 : (tensor<2x!pphlo.secret<ui32>>,tensor<2x!pphlo.secret<ui32>>)->tensor<2x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[0, 0]> : tensor<2xui32>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui32>>)->tensor<2xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi64>
+   %3 = pphlo.constant dense<[0, -1, 4611686018427387904, 4611686022722355201, 0]> : tensor<5xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[0, -1, 4611686018427387904, 4611686022722355201, 0]> : tensor<5xi64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i64>>)->tensor<5xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.multiply %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xui64>
+   %3 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[8446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.multiply %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui64>>)->tensor<2xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.multiply %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, false, false, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.multiply %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, false, false, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_f16_f16_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.141]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75,  0.3, 3.141]> : tensor<5xf16>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xf16>,tensor<5xf16>)->tensor<5xf16>
+   %3 = pphlo.constant dense<[0.000000e+00, 7.000000e+00, 9.375000e-02, 2.999880e-02, 9.867180e+00]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_f16_f16_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.141]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75,  0.3, 3.141]> : tensor<5xf16>
+   %2 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.convert %1 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<f16>>,tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %5 = pphlo.constant dense<[0.000000e+00, 7.000000e+00, 9.375000e-02, 2.999880e-02, 9.867180e+00]> : tensor<5xf16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_f32_f32_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[0.0, 7.0,  0.75, 0.3, 3.14159265]> : tensor<5xf32>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xf32>,tensor<5xf32>)->tensor<5xf32>
+   %3 = pphlo.constant dense<[0.000000e+00, 7.000000e+00, 9.375000e-02, 0.0300000012, 9.86960506]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_f32_f32_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[0.0, 7.0,  0.75, 0.3, 3.14159265]> : tensor<5xf32>
+   %2 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.convert %1 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<f32>>,tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %5 = pphlo.constant dense<[0.000000e+00, 7.000000e+00, 9.375000e-02, 0.0300000012, 9.86960506]> : tensor<5xf32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75,  0.3, 3.14159265358979323846]> : tensor<5xf64>
+   %2 = pphlo.multiply %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xf64>
+   %3 = pphlo.constant dense<[0.000000e+00, 7.000000e+00, 9.375000e-02, 3.000000e-02, 9.869604401089358]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @multiply_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75,  0.3, 3.14159265358979323846]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.multiply %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[0.000000e+00, 7.000000e+00, 9.375000e-02, 3.000000e-02, 9.869604401089358]> : tensor<5xf64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/negate.mlir b/libspu/compiler/tests/interpret/negate.mlir
new file mode 100644
index 00000000..fa5fe4c3
--- /dev/null
+++ b/libspu/compiler/tests/interpret/negate.mlir
@@ -0,0 +1,242 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @negate_op_test_i8_i8_p() {
+   %0 = pphlo.constant dense<[-128, -9, 0, 8, 127]> : tensor<5xi8>
+   %1 = pphlo.negate %0 : (tensor<5xi8>)->tensor<5xi8>
+   %2 = pphlo.constant dense<[-128, 9, 0, -8, -127]> : tensor<5xi8>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i8_i8_s() {
+   %0 = pphlo.constant dense<[-128, -9, 0, 8, 127]> : tensor<5xi8>
+   %1 = pphlo.convert %0 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<i8>>)->tensor<5x!pphlo.secret<i8>>
+   %3 = pphlo.constant dense<[-128, 9, 0, -8, -127]> : tensor<5xi8>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<i8>>)->tensor<5xi8>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui8_ui8_p() {
+   %0 = pphlo.constant dense<[0, 16, 255]> : tensor<3xui8>
+   %1 = pphlo.negate %0 : (tensor<3xui8>)->tensor<3xui8>
+   %2 = pphlo.constant dense<[0, 240, 1]> : tensor<3xui8>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui8_ui8_s() {
+   %0 = pphlo.constant dense<[0, 16, 255]> : tensor<3xui8>
+   %1 = pphlo.convert %0 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %2 = pphlo.negate %1 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3x!pphlo.secret<ui8>>
+   %3 = pphlo.constant dense<[0, 240, 1]> : tensor<3xui8>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3xui8>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i16_i16_p() {
+   %0 = pphlo.constant dense<[-32768, -129, 0, 128, 32767]> : tensor<5xi16>
+   %1 = pphlo.negate %0 : (tensor<5xi16>)->tensor<5xi16>
+   %2 = pphlo.constant dense<[-32768, 129, 0, -128, -32767]> : tensor<5xi16>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i16_i16_s() {
+   %0 = pphlo.constant dense<[-32768, -129, 0, 128, 32767]> : tensor<5xi16>
+   %1 = pphlo.convert %0 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<i16>>)->tensor<5x!pphlo.secret<i16>>
+   %3 = pphlo.constant dense<[-32768, 129, 0, -128, -32767]> : tensor<5xi16>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<i16>>)->tensor<5xi16>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui16_ui16_p() {
+   %0 = pphlo.constant dense<[0, 256, 65535]> : tensor<3xui16>
+   %1 = pphlo.negate %0 : (tensor<3xui16>)->tensor<3xui16>
+   %2 = pphlo.constant dense<[0, 65280, 1]> : tensor<3xui16>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui16_ui16_s() {
+   %0 = pphlo.constant dense<[0, 256, 65535]> : tensor<3xui16>
+   %1 = pphlo.convert %0 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %2 = pphlo.negate %1 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3x!pphlo.secret<ui16>>
+   %3 = pphlo.constant dense<[0, 65280, 1]> : tensor<3xui16>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3xui16>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i32_i32_p() {
+   %0 = pphlo.constant dense<[-2147483648, -65537, 0, 65536, 2147483647]> : tensor<5xi32>
+   %1 = pphlo.negate %0 : (tensor<5xi32>)->tensor<5xi32>
+   %2 = pphlo.constant dense<[-2147483648, 65537, 0, -65536, -2147483647]> : tensor<5xi32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i32_i32_s() {
+   %0 = pphlo.constant dense<[-2147483648, -65537, 0, 65536, 2147483647]> : tensor<5xi32>
+   %1 = pphlo.convert %0 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<i32>>)->tensor<5x!pphlo.secret<i32>>
+   %3 = pphlo.constant dense<[-2147483648, 65537, 0, -65536, -2147483647]> : tensor<5xi32>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<i32>>)->tensor<5xi32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui32_ui32_p() {
+   %0 = pphlo.constant dense<[0, 65536, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.negate %0 : (tensor<3xui32>)->tensor<3xui32>
+   %2 = pphlo.constant dense<[0, 4294901760, 1]> : tensor<3xui32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui32_ui32_s() {
+   %0 = pphlo.constant dense<[0, 65536, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.convert %0 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %2 = pphlo.negate %1 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3x!pphlo.secret<ui32>>
+   %3 = pphlo.constant dense<[0, 4294901760, 1]> : tensor<3xui32>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3xui32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i64_i64_p() {
+   %0 = pphlo.constant dense<[-9223372036854775808, -2147483649, 0, 2147483648, 9223372036854775807]> : tensor<5xi64>
+   %1 = pphlo.negate %0 : (tensor<5xi64>)->tensor<5xi64>
+   %2 = pphlo.constant dense<[-9223372036854775808, 2147483649, 0, -2147483648, -9223372036854775807]> : tensor<5xi64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_i64_i64_s() {
+   %0 = pphlo.constant dense<[-9223372036854775808, -2147483649, 0, 2147483648, 9223372036854775807]> : tensor<5xi64>
+   %1 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.constant dense<[-9223372036854775808, 2147483649, 0, -2147483648, -9223372036854775807]> : tensor<5xi64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<i64>>)->tensor<5xi64>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui64_ui64_p() {
+   %0 = pphlo.constant dense<[0, 4294967296, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.negate %0 : (tensor<3xui64>)->tensor<3xui64>
+   %2 = pphlo.constant dense<[0, 18446744069414584320, 1]> : tensor<3xui64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_ui64_ui64_s() {
+   %0 = pphlo.constant dense<[0, 4294967296, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.convert %0 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %2 = pphlo.negate %1 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3x!pphlo.secret<ui64>>
+   %3 = pphlo.constant dense<[0, 18446744069414584320, 1]> : tensor<3xui64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3xui64>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_f16_f16_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.negate %0 : (tensor<5xf16>)->tensor<5xf16>
+   %2 = pphlo.constant dense<[0.000000e+00, -1.000000e+00, -1.250000e-01, -9.997550e-02, -3.140630e+00]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_f16_f16_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.constant dense<[0.000000e+00, -1.000000e+00, -1.250000e-01, -9.997550e-02, -3.140630e+00]> : tensor<5xf16>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_f32_f32_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.negate %0 : (tensor<5xf32>)->tensor<5xf32>
+   %2 = pphlo.constant dense<[0.000000e+00, -1.000000e+00, -1.250000e-01, -1.000000e-01, -3.14159274]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_f32_f32_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.constant dense<[0.000000e+00, -1.000000e+00, -1.250000e-01, -1.000000e-01, -3.14159274]> : tensor<5xf32>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.negate %0 : (tensor<5xf64>)->tensor<5xf64>
+   %2 = pphlo.constant dense<[0.000000e+00, -1.000000e+00, -1.250000e-01, -1.000000e-01, -3.1415926535897931]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @negate_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %2 = pphlo.negate %1 : (tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[0.000000e+00, -1.000000e+00, -1.250000e-01, -1.000000e-01, -3.1415926535897931]> : tensor<5xf64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/not.mlir b/libspu/compiler/tests/interpret/not.mlir
new file mode 100644
index 00000000..77c91d25
--- /dev/null
+++ b/libspu/compiler/tests/interpret/not.mlir
@@ -0,0 +1,242 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @not_op_test_i8_i8_p() {
+   %0 = pphlo.constant dense<[127, -128, 0]> : tensor<3xi8>
+   %1 = pphlo.not %0 : (tensor<3xi8>)->tensor<3xi8>
+   %2 = pphlo.constant dense<[-128, 127, -1]> : tensor<3xi8>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i8_i8_s() {
+   %0 = pphlo.constant dense<[127, -128, 0]> : tensor<3xi8>
+   %1 = pphlo.convert %0 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<i8>>)->tensor<3x!pphlo.secret<i8>>
+   %3 = pphlo.constant dense<[-128, 127, -1]> : tensor<3xi8>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<i8>>)->tensor<3xi8>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui8_ui8_p() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.not %0 : (tensor<3xui8>)->tensor<3xui8>
+   %2 = pphlo.constant dense<[255, 128, 0]> : tensor<3xui8>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui8_ui8_s() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.convert %0 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3x!pphlo.secret<ui8>>
+   %3 = pphlo.constant dense<[255, 128, 0]> : tensor<3xui8>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3xui8>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i16_i16_p() {
+   %0 = pphlo.constant dense<[32767, -32768, 0]> : tensor<3xi16>
+   %1 = pphlo.not %0 : (tensor<3xi16>)->tensor<3xi16>
+   %2 = pphlo.constant dense<[-32768, 32767, -1]> : tensor<3xi16>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i16_i16_s() {
+   %0 = pphlo.constant dense<[32767, -32768, 0]> : tensor<3xi16>
+   %1 = pphlo.convert %0 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<i16>>)->tensor<3x!pphlo.secret<i16>>
+   %3 = pphlo.constant dense<[-32768, 32767, -1]> : tensor<3xi16>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<i16>>)->tensor<3xi16>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui16_ui16_p() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.not %0 : (tensor<3xui16>)->tensor<3xui16>
+   %2 = pphlo.constant dense<[65535, 32768, 0]> : tensor<3xui16>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui16_ui16_s() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.convert %0 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3x!pphlo.secret<ui16>>
+   %3 = pphlo.constant dense<[65535, 32768, 0]> : tensor<3xui16>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3xui16>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i32_i32_p() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, 0]> : tensor<3xi32>
+   %1 = pphlo.not %0 : (tensor<3xi32>)->tensor<3xi32>
+   %2 = pphlo.constant dense<[-2147483648, 2147483647, -1]> : tensor<3xi32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i32_i32_s() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, 0]> : tensor<3xi32>
+   %1 = pphlo.convert %0 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<i32>>)->tensor<3x!pphlo.secret<i32>>
+   %3 = pphlo.constant dense<[-2147483648, 2147483647, -1]> : tensor<3xi32>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<i32>>)->tensor<3xi32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui32_ui32_p() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.not %0 : (tensor<3xui32>)->tensor<3xui32>
+   %2 = pphlo.constant dense<[4294967295, 2147483648, 0]> : tensor<3xui32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui32_ui32_s() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.convert %0 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3x!pphlo.secret<ui32>>
+   %3 = pphlo.constant dense<[4294967295, 2147483648, 0]> : tensor<3xui32>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3xui32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i64_i64_p() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, 0]> : tensor<3xi64>
+   %1 = pphlo.not %0 : (tensor<3xi64>)->tensor<3xi64>
+   %2 = pphlo.constant dense<[-9223372036854775808, 9223372036854775807, -1]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i64_i64_s() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, 0]> : tensor<3xi64>
+   %1 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.constant dense<[-9223372036854775808, 9223372036854775807, -1]> : tensor<3xi64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui64_ui64_p() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.not %0 : (tensor<3xui64>)->tensor<3xui64>
+   %2 = pphlo.constant dense<[18446744073709551615, 9223372036854775808, 0]> : tensor<3xui64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_ui64_ui64_s() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.convert %0 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %2 = pphlo.not %1 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3x!pphlo.secret<ui64>>
+   %3 = pphlo.constant dense<[18446744073709551615, 9223372036854775808, 0]> : tensor<3xui64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3xui64>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i1_i1_p() {
+   %0 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %1 = pphlo.not %0 : (tensor<2xi1>)->tensor<2xi1>
+   %2 = pphlo.constant dense<[true, false]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i1_i1_s() {
+   %0 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %1 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %2 = pphlo.not %1 : (tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.constant dense<[true, false]> : tensor<2xi1>
+   %4 = pphlo.convert %2 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i1_i1_p() {
+   %0 = pphlo.constant dense<false> : tensor<i1>
+   %1 = pphlo.not %0 : (tensor<i1>)->tensor<i1>
+   %2 = pphlo.constant dense<true> : tensor<i1>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i1_i1_s() {
+   %0 = pphlo.constant dense<false> : tensor<i1>
+   %1 = pphlo.convert %0 : (tensor<i1>)->tensor<!pphlo.secret<i1>>
+   %2 = pphlo.not %1 : (tensor<!pphlo.secret<i1>>)->tensor<!pphlo.secret<i1>>
+   %3 = pphlo.constant dense<true> : tensor<i1>
+   %4 = pphlo.convert %2 : (tensor<!pphlo.secret<i1>>)->tensor<i1>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i1_i1_p() {
+   %0 = pphlo.constant dense<true> : tensor<i1>
+   %1 = pphlo.not %0 : (tensor<i1>)->tensor<i1>
+   %2 = pphlo.constant dense<false> : tensor<i1>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_op_test_i1_i1_s() {
+   %0 = pphlo.constant dense<true> : tensor<i1>
+   %1 = pphlo.convert %0 : (tensor<i1>)->tensor<!pphlo.secret<i1>>
+   %2 = pphlo.not %1 : (tensor<!pphlo.secret<i1>>)->tensor<!pphlo.secret<i1>>
+   %3 = pphlo.constant dense<false> : tensor<i1>
+   %4 = pphlo.convert %2 : (tensor<!pphlo.secret<i1>>)->tensor<i1>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<i1>, tensor<i1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/not_equal.mlir b/libspu/compiler/tests/interpret/not_equal.mlir
new file mode 100644
index 00000000..aabacc0b
--- /dev/null
+++ b/libspu/compiler/tests/interpret/not_equal.mlir
@@ -0,0 +1,100 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @not_equal_op_test_i64_i1_pp() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.not_equal %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_i64_i1_ss() {
+   %0 = pphlo.constant dense<[-2, -1, 0, 2, 2]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-2, -2, 0, 1, 2]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.not_equal %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_ui64_i1_pp() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.not_equal %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_ui64_i1_ss() {
+   %0 = pphlo.constant dense<[0, 1]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[0, 0]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.not_equal %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.not_equal %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, true, true, false]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[true, true, false, false]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[true, false, true, false]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.not_equal %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, true, false]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_f64_i1_pp() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.not_equal %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xi1>
+   %3 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @not_equal_op_test_f64_i1_ss() {
+   %0 = pphlo.constant dense<[-2.0, -2.0, 0.0, 1.0, 2.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[-2.0, -1.0, 0.0, 2.0, 2.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.not_equal %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, false, true, false]> : tensor<5xi1>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i1>>)->tensor<5xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi1>, tensor<5xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/or.mlir b/libspu/compiler/tests/interpret/or.mlir
new file mode 100644
index 00000000..2856bba6
--- /dev/null
+++ b/libspu/compiler/tests/interpret/or.mlir
@@ -0,0 +1,275 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @or_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[127, -128, -128]> : tensor<3xi8>
+   %1 = pphlo.constant dense<[0, 127, -128]> : tensor<3xi8>
+   %2 = pphlo.or %0,%1 : (tensor<3xi8>,tensor<3xi8>)->tensor<3xi8>
+   %3 = pphlo.constant dense<[127, -1, -128]> : tensor<3xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[127, -128, -128]> : tensor<3xi8>
+   %1 = pphlo.constant dense<[0, 127, -128]> : tensor<3xi8>
+   %2 = pphlo.convert %0 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<i8>>,tensor<3x!pphlo.secret<i8>>)->tensor<3x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[127, -1, -128]> : tensor<3xi8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i8>>)->tensor<3xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.constant dense<255> : tensor<3xui8>
+   %2 = pphlo.or %0,%1 : (tensor<3xui8>,tensor<3xui8>)->tensor<3xui8>
+   %3 = pphlo.constant dense<[255, 255, 255]> : tensor<3xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.constant dense<255> : tensor<3xui8>
+   %2 = pphlo.convert %0 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<ui8>>,tensor<3x!pphlo.secret<ui8>>)->tensor<3x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[255, 255, 255]> : tensor<3xui8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[32767, -32768, -32768]> : tensor<3xi16>
+   %1 = pphlo.constant dense<[0, 32767, -32768]> : tensor<3xi16>
+   %2 = pphlo.or %0,%1 : (tensor<3xi16>,tensor<3xi16>)->tensor<3xi16>
+   %3 = pphlo.constant dense<[32767, -1, -32768]> : tensor<3xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[32767, -32768, -32768]> : tensor<3xi16>
+   %1 = pphlo.constant dense<[0, 32767, -32768]> : tensor<3xi16>
+   %2 = pphlo.convert %0 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<i16>>,tensor<3x!pphlo.secret<i16>>)->tensor<3x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[32767, -1, -32768]> : tensor<3xi16>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i16>>)->tensor<3xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.constant dense<65535> : tensor<3xui16>
+   %2 = pphlo.or %0,%1 : (tensor<3xui16>,tensor<3xui16>)->tensor<3xui16>
+   %3 = pphlo.constant dense<[65535, 65535, 65535]> : tensor<3xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.constant dense<65535> : tensor<3xui16>
+   %2 = pphlo.convert %0 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<ui16>>,tensor<3x!pphlo.secret<ui16>>)->tensor<3x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[65535, 65535, 65535]> : tensor<3xui16>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, -2147483648]> : tensor<3xi32>
+   %1 = pphlo.constant dense<[0, 2147483647, -2147483648]> : tensor<3xi32>
+   %2 = pphlo.or %0,%1 : (tensor<3xi32>,tensor<3xi32>)->tensor<3xi32>
+   %3 = pphlo.constant dense<[2147483647, -1, -2147483648]> : tensor<3xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, -2147483648]> : tensor<3xi32>
+   %1 = pphlo.constant dense<[0, 2147483647, -2147483648]> : tensor<3xi32>
+   %2 = pphlo.convert %0 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<i32>>,tensor<3x!pphlo.secret<i32>>)->tensor<3x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[2147483647, -1, -2147483648]> : tensor<3xi32>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i32>>)->tensor<3xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.constant dense<4294967295> : tensor<3xui32>
+   %2 = pphlo.or %0,%1 : (tensor<3xui32>,tensor<3xui32>)->tensor<3xui32>
+   %3 = pphlo.constant dense<[4294967295, 4294967295, 4294967295]> : tensor<3xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.constant dense<4294967295> : tensor<3xui32>
+   %2 = pphlo.convert %0 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<ui32>>,tensor<3x!pphlo.secret<ui32>>)->tensor<3x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[4294967295, 4294967295, 4294967295]> : tensor<3xui32>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, -9223372036854775808]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[0, 9223372036854775807, -9223372036854775808]> : tensor<3xi64>
+   %2 = pphlo.or %0,%1 : (tensor<3xi64>,tensor<3xi64>)->tensor<3xi64>
+   %3 = pphlo.constant dense<[9223372036854775807, -1, -9223372036854775808]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, -9223372036854775808]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[0, 9223372036854775807, -9223372036854775808]> : tensor<3xi64>
+   %2 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<i64>>,tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[9223372036854775807, -1, -9223372036854775808]> : tensor<3xi64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.constant dense<18446744073709551615> : tensor<3xui64>
+   %2 = pphlo.or %0,%1 : (tensor<3xui64>,tensor<3xui64>)->tensor<3xui64>
+   %3 = pphlo.constant dense<[18446744073709551615, 18446744073709551615, 18446744073709551615]> : tensor<3xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.constant dense<18446744073709551615> : tensor<3xui64>
+   %2 = pphlo.convert %0 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %4 = pphlo.or %2, %3 : (tensor<3x!pphlo.secret<ui64>>,tensor<3x!pphlo.secret<ui64>>)->tensor<3x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[18446744073709551615, 18446744073709551615, 18446744073709551615]> : tensor<3xui64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.or %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, true, true, true]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.or %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, true, true]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.or %0,%1 : (tensor<2xi1>,tensor<2xi1>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %4 = pphlo.or %2, %3 : (tensor<2x!pphlo.secret<i1>>,tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<true> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.or %0,%1 : (tensor<2xi1>,tensor<2xi1>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[true, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @or_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<true> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %4 = pphlo.or %2, %3 : (tensor<2x!pphlo.secret<i1>>,tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[true, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/pad.mlir b/libspu/compiler/tests/interpret/pad.mlir
new file mode 100644
index 00000000..85436480
--- /dev/null
+++ b/libspu/compiler/tests/interpret/pad.mlir
@@ -0,0 +1,17 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @pad() {
+  %operand = pphlo.constant dense<[[0, 0, 0, 0],
+                                   [0, 1, 2, 0],
+                                   [0, 3, 4, 0],
+                                   [0, 5, 6, 0],
+                                   [0, 0, 0, 0]]> : tensor<5x4xi64>
+  %padding_value = pphlo.constant dense<-1> : tensor<i64>
+  %result = pphlo.pad %operand, %padding_value, low = [1, -1], high = [1, -1], interior = [0, 1]
+    : (tensor<5x4xi64>, tensor<i64>) -> tensor<7x5xi64>
+  %expected = pphlo.constant dense<[[-1, -1, -1, -1, -1], [-1, 0, -1, 0, -1], [-1, 1, -1, 2, -1],
+                                    [-1, 3, -1, 4, -1], [-1, 5, -1, 6, -1], [-1, 0, -1, 0, -1],
+                                    [-1, -1, -1, -1, -1]]> : tensor<7x5xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<7x5xi64>,tensor<7x5xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/popcnt.mlir b/libspu/compiler/tests/interpret/popcnt.mlir
new file mode 100644
index 00000000..8d741134
--- /dev/null
+++ b/libspu/compiler/tests/interpret/popcnt.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @popcnt_op_test_i64_i64_p() {
+   %0 = pphlo.constant dense<[0, 1, 2, 127]> : tensor<4xi64>
+   %1 = pphlo.popcnt %0 : (tensor<4xi64>)->tensor<4xi64>
+   %2 = pphlo.constant dense<[0, 1, 1, 7]> : tensor<4xi64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<4xi64>, tensor<4xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @popcnt_op_test_i64_i64_s() {
+   %0 = pphlo.constant dense<[0, 1, 2, 127]> : tensor<4xi64>
+   %1 = pphlo.convert %0 : (tensor<4xi64>)->tensor<4x!pphlo.secret<i64>>
+   %2 = pphlo.popcnt %1 : (tensor<4x!pphlo.secret<i64>>)->tensor<4x!pphlo.secret<i64>>
+   %3 = pphlo.constant dense<[0, 1, 1, 7]> : tensor<4xi64>
+   %4 = pphlo.convert %2 : (tensor<4x!pphlo.secret<i64>>)->tensor<4xi64>
+   // pphlo.custom_call @expect_eq(%3, %4) : (tensor<4xi64>, tensor<4xi64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/power.mlir b/libspu/compiler/tests/interpret/power.mlir
new file mode 100644
index 00000000..8041418a
--- /dev/null
+++ b/libspu/compiler/tests/interpret/power.mlir
@@ -0,0 +1,75 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @power_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[-1, -1, -3, 1, -3, 0]> : tensor<6xi64>
+   %1 = pphlo.constant dense<[1, 0, -3, -3, 3, 2]> : tensor<6xi64>
+   %2 = pphlo.power %0,%1 : (tensor<6xi64>,tensor<6xi64>)->tensor<6xi64>
+   %3 = pphlo.constant dense<[-1, 1, 0, 1, -27, 0]> : tensor<6xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<6xi64>, tensor<6xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @power_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[-1, -1, -3, 1, -3, 0]> : tensor<6xi64>
+   %1 = pphlo.constant dense<[1, 0, -3, -3, 3, 2]> : tensor<6xi64>
+   %2 = pphlo.convert %0 : (tensor<6xi64>)->tensor<6x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<6xi64>)->tensor<6x!pphlo.secret<i64>>
+   %4 = pphlo.power %2, %3 : (tensor<6x!pphlo.secret<i64>>,tensor<6x!pphlo.secret<i64>>)->tensor<6x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[-1, 1, 0, 1, -27, 0]> : tensor<6xi64>
+   %6 = pphlo.convert %4 : (tensor<6x!pphlo.secret<i64>>)->tensor<6xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<6xi64>, tensor<6xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @power_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 0, 1, 1, 5]> : tensor<5xui64>
+   %1 = pphlo.constant dense<[0, 1, 0, 2, 5]> : tensor<5xui64>
+   %2 = pphlo.power %0,%1 : (tensor<5xui64>,tensor<5xui64>)->tensor<5xui64>
+   %3 = pphlo.constant dense<[1, 0, 1, 1, 3125]> : tensor<5xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xui64>, tensor<5xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @power_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 0, 1, 1, 5]> : tensor<5xui64>
+   %1 = pphlo.constant dense<[0, 1, 0, 2, 5]> : tensor<5xui64>
+   %2 = pphlo.convert %0 : (tensor<5xui64>)->tensor<5x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<5xui64>)->tensor<5x!pphlo.secret<ui64>>
+   %4 = pphlo.power %2, %3 : (tensor<5x!pphlo.secret<ui64>>,tensor<5x!pphlo.secret<ui64>>)->tensor<5x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[1, 0, 1, 1, 3125]> : tensor<5xui64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<ui64>>)->tensor<5xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xui64>, tensor<5xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @power_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[-2.0, -0.0, 5.0, 3.0, 10000.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[2.0, 2.0, 2.0, -1.0, 1.0]> : tensor<5xf64>
+   %2 = pphlo.power %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xf64>
+   %3 = pphlo.constant dense<[4.000000e+00, 0.000000e+00, 2.500000e+01, 0.33333333333333331, 10000.0]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) { tol = 0.5 }: (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @power_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[-2.0, -0.0, 5.0, 3.0, 10000.0]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[2.0, 2.0, 2.0, -1.0, 1.0]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.power %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[4.000000e+00, 0.000000e+00, 2.500000e+01, 0.33333333333333331, 10000.0]> : tensor<5xf64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) { tol = 0.5 }: (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/reduce.mlir b/libspu/compiler/tests/interpret/reduce.mlir
new file mode 100644
index 00000000..8fc4ced7
--- /dev/null
+++ b/libspu/compiler/tests/interpret/reduce.mlir
@@ -0,0 +1,16 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @reduce() {
+  %input = pphlo.constant dense<[[0, 1, 2, 3, 4, 5]]> : tensor<1x6xi64>
+  %init_value = pphlo.constant dense<0> : tensor<i64>
+  %result = "pphlo.reduce"(%input, %init_value) ({
+    ^bb0(%arg0: tensor<i64>, %arg1: tensor<i64>):
+      %0 = pphlo.add %arg0, %arg1 : tensor<i64>
+      pphlo.return %0 : tensor<i64>
+  }) {
+    dimensions = array<i64: 1>
+  } : (tensor<1x6xi64>, tensor<i64>) -> tensor<1xi64>
+  %expected = pphlo.constant dense<[15]> : tensor<1xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<1xi64>,tensor<1xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/reduce_window.mlir b/libspu/compiler/tests/interpret/reduce_window.mlir
new file mode 100644
index 00000000..1b28aef1
--- /dev/null
+++ b/libspu/compiler/tests/interpret/reduce_window.mlir
@@ -0,0 +1,39 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @reduce_window() {
+  %input = pphlo.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi64>
+  %init_value = pphlo.constant dense<0> : tensor<i64>
+  %input_pad = pphlo.pad %input, %init_value, low = [2, 0], high = [1, 0], interior = [1, 0] : (tensor<3x2xi64>, tensor<i64>) -> tensor<8x2xi64>
+  %result = "pphlo.reduce_window"(%input_pad, %init_value) ({
+    ^bb0(%arg0: tensor<i64>, %arg1: tensor<i64>):
+      %0 = pphlo.add %arg0, %arg1 : tensor<i64>
+      pphlo.return %0 : tensor<i64>
+  }) {
+    window_dilations = array<i64: 3, 1>,
+    window_dimensions = array<i64: 2, 1>,
+    window_strides = array<i64: 4, 1>
+  } :  (tensor<8x2xi64>, tensor<i64>) -> tensor<2x2xi64>
+  %expected = pphlo.constant dense<[[0, 0], [3, 4]]> : tensor<2x2xi64>
+  pphlo.custom_call @expect_eq(%result, %expected) : (tensor<2x2xi64>, tensor<2x2xi64>)->()
+  func.return
+}
+
+// -----
+
+func.func @reduce_window_issue_1662() {
+  %input = pphlo.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi64>
+  %init_value = pphlo.constant dense<0> : tensor<i64>
+  %input_pad = pphlo.pad %input, %init_value, low = [2, 0], high = [1, 0], interior = [1, 0] : (tensor<3x2xi64>, tensor<i64>) -> tensor<8x2xi64>
+  %result = "pphlo.reduce_window"(%input_pad, %init_value) ({
+    ^bb0(%arg0: tensor<i64>, %arg1: tensor<i64>):
+      %0 = pphlo.add %arg0, %arg1 : tensor<i64>
+      pphlo.return %0 : tensor<i64>
+  }) {
+    window_dilations = array<i64: 3, 1>,
+    window_dimensions = array<i64: 3, 1>,
+    window_strides = array<i64: 4, 1>
+  } :  (tensor<8x2xi64>, tensor<i64>) -> tensor<1x2xi64>
+  %expected = pphlo.constant dense<[[5, 6]]> : tensor<1x2xi64>
+  pphlo.custom_call  @expect_eq(%result, %expected) : (tensor<1x2xi64>, tensor<1x2xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/reshape.mlir b/libspu/compiler/tests/interpret/reshape.mlir
new file mode 100644
index 00000000..fa3551c6
--- /dev/null
+++ b/libspu/compiler/tests/interpret/reshape.mlir
@@ -0,0 +1,88 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @reshape_op_test_i32_i32_p() {
+   %0 = pphlo.constant dense<[[1,2,3,4,5,6]]> : tensor<1x6xi32>
+   %1 = pphlo.reshape %0 : (tensor<1x6xi32>)->tensor<6xi32>
+   %2 = pphlo.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<6xi32>, tensor<6xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_s() {
+   %0 = pphlo.constant dense<[[1,2,3,4,5,6]]> : tensor<1x6xi32>
+   %1 = pphlo.convert %0 : (tensor<1x6xi32>)->tensor<1x6x!pphlo.secret<i32>>
+   %2 = pphlo.reshape %1 : (tensor<1x6x!pphlo.secret<i32>>)->tensor<6x!pphlo.secret<i32>>
+   %3 = pphlo.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32>
+   %4 = pphlo.convert %2 : (tensor<6x!pphlo.secret<i32>>)->tensor<6xi32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<6xi32>, tensor<6xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_p() {
+   %0 = pphlo.constant dense<[1,2,3,4,5,6]> : tensor<6xi32>
+   %1 = pphlo.reshape %0 : (tensor<6xi32>)->tensor<2x3xi32>
+   %2 = pphlo.constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<2x3xi32>, tensor<2x3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_s() {
+   %0 = pphlo.constant dense<[1,2,3,4,5,6]> : tensor<6xi32>
+   %1 = pphlo.convert %0 : (tensor<6xi32>)->tensor<6x!pphlo.secret<i32>>
+   %2 = pphlo.reshape %1 : (tensor<6x!pphlo.secret<i32>>)->tensor<2x3x!pphlo.secret<i32>>
+   %3 = pphlo.constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32>
+   %4 = pphlo.convert %2 : (tensor<2x3x!pphlo.secret<i32>>)->tensor<2x3xi32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<2x3xi32>, tensor<2x3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_p() {
+   %0 = pphlo.constant dense<[[1,2,3],[4,5,6]]> : tensor<2x3xi32>
+   %1 = pphlo.reshape %0 : (tensor<2x3xi32>)->tensor<3x2xi32>
+   %2 = pphlo.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3x2xi32>, tensor<3x2xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_s() {
+   %0 = pphlo.constant dense<[[1,2,3],[4,5,6]]> : tensor<2x3xi32>
+   %1 = pphlo.convert %0 : (tensor<2x3xi32>)->tensor<2x3x!pphlo.secret<i32>>
+   %2 = pphlo.reshape %1 : (tensor<2x3x!pphlo.secret<i32>>)->tensor<3x2x!pphlo.secret<i32>>
+   %3 = pphlo.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32>
+   %4 = pphlo.convert %2 : (tensor<3x2x!pphlo.secret<i32>>)->tensor<3x2xi32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3x2xi32>, tensor<3x2xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_p() {
+   %0 = pphlo.constant dense<[[1,2],[3,4],[5,6]]> : tensor<3x2xi32>
+   %1 = pphlo.reshape %0 : (tensor<3x2xi32>)->tensor<6xi32>
+   %2 = pphlo.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<6xi32>, tensor<6xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @reshape_op_test_i32_i32_s() {
+   %0 = pphlo.constant dense<[[1,2],[3,4],[5,6]]> : tensor<3x2xi32>
+   %1 = pphlo.convert %0 : (tensor<3x2xi32>)->tensor<3x2x!pphlo.secret<i32>>
+   %2 = pphlo.reshape %1 : (tensor<3x2x!pphlo.secret<i32>>)->tensor<6x!pphlo.secret<i32>>
+   %3 = pphlo.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32>
+   %4 = pphlo.convert %2 : (tensor<6x!pphlo.secret<i32>>)->tensor<6xi32>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<6xi32>, tensor<6xi32>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/reverse.mlir b/libspu/compiler/tests/interpret/reverse.mlir
new file mode 100644
index 00000000..0489311b
--- /dev/null
+++ b/libspu/compiler/tests/interpret/reverse.mlir
@@ -0,0 +1,11 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @reverse() {
+  %operand = pphlo.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi64>
+  %result = "pphlo.reverse"(%operand) {
+    dimensions = array<i64: 1, 0>
+  } : (tensor<3x2xi64>) -> tensor<3x2xi64>
+  %expected = pphlo.constant dense<[[6, 5], [4, 3], [2, 1]]> : tensor<3x2xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<3x2xi64>,tensor<3x2xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/round_nearest_afz.mlir b/libspu/compiler/tests/interpret/round_nearest_afz.mlir
new file mode 100644
index 00000000..79eba66d
--- /dev/null
+++ b/libspu/compiler/tests/interpret/round_nearest_afz.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @round_nearest_afz_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[-2.5, 0.4, 0.5, 0.6, 2.5]> : tensor<5xf64>
+   %1 = pphlo.round_nearest_afz %0 : (tensor<5xf64>)->tensor<5xf64>
+   %2 = pphlo.constant dense<[-3.0, 0.0, 1.0, 1.0, 3.0]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @round_nearest_afz_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[-2.5, 0.4, 0.5, 0.6, 2.5]> : tensor<5xf64>
+   %1 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %2 = pphlo.round_nearest_afz %1 : (tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[-3.0, 0.0, 1.0, 1.0, 3.0]> : tensor<5xf64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/rsqrt.mlir b/libspu/compiler/tests/interpret/rsqrt.mlir
new file mode 100644
index 00000000..0ef05654
--- /dev/null
+++ b/libspu/compiler/tests/interpret/rsqrt.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @rsqrt_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[[1.0, 4.0], [9.0, 25.0]]> : tensor<2x2xf64>
+   %1 = pphlo.rsqrt %0 : (tensor<2x2xf64>)->tensor<2x2xf64>
+   %2 = pphlo.constant dense<[[1.000000e+00, 5.000000e-01], [0.33333333333333331, 2.000000e-01]]> : tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @rsqrt_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[[1.0, 4.0], [9.0, 25.0]]> : tensor<2x2xf64>
+   %1 = pphlo.convert %0 : (tensor<2x2xf64>)->tensor<2x2x!pphlo.secret<f64>>
+   %2 = pphlo.rsqrt %1 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[[1.000000e+00, 5.000000e-01], [0.33333333333333331, 2.000000e-01]]> : tensor<2x2xf64>
+   %4 = pphlo.convert %2 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/select.mlir b/libspu/compiler/tests/interpret/select.mlir
new file mode 100644
index 00000000..eb15e544
--- /dev/null
+++ b/libspu/compiler/tests/interpret/select.mlir
@@ -0,0 +1,23 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @select_op_test_si64() {
+  %pred = pphlo.constant dense<[true, false, true]> : tensor<3xi1>
+  %on_true = pphlo.constant dense<[2, 3, -1]> : tensor<3xi64>
+  %on_false = pphlo.constant dense<[3, 7, -3]> : tensor<3xi64>
+  %result = pphlo.select %pred, %on_true, %on_false : (tensor<3xi1>, tensor<3xi64>, tensor<3xi64>) -> tensor<3xi64>
+  %expected = pphlo.constant dense<[2, 7, -1]> : tensor<3xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<3xi64>,tensor<3xi64>)->()
+  func.return
+}
+
+// -----
+// FIXME
+func.func @select_op_test_si64_scalar() {
+  %pred = pphlo.constant dense<false> : tensor<i1>
+  %on_true = pphlo.constant dense<[2, 3, -1]> : tensor<3xi64>
+  %on_false = pphlo.constant dense<[3, 7, -3]> : tensor<3xi64>
+//   %result = pphlo.select %pred, %on_true, %on_false : (tensor<i1>, tensor<3xi64>, tensor<3xi64>) -> tensor<3xi64>
+//   %expected = pphlo.constant dense<[3, 7, -3]> : tensor<3xi64>
+//   pphlo.custom_call @expect_eq %result, %expected : tensor<3xi64>
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/select_and_scatter.mlir b/libspu/compiler/tests/interpret/select_and_scatter.mlir
new file mode 100644
index 00000000..6ae7fc19
--- /dev/null
+++ b/libspu/compiler/tests/interpret/select_and_scatter.mlir
@@ -0,0 +1,31 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+// FIXME
+func.func @select_and_scatter_op_test() {
+  %operand = pphlo.constant dense<[[1, 5],
+                                       [2, 5],
+                                       [3, 6],
+                                       [4, 4]]> : tensor<4x2xi64>
+  %source = pphlo.constant dense<[[5, 6],
+                                      [7, 8]]> : tensor<2x2xi64>
+  %init_value = pphlo.constant dense<0> : tensor<i64>
+//   %result = "pphlo.select_and_scatter"(%operand, %source, %init_value) ({
+//     ^bb0(%arg0: tensor<i64>, %arg1: tensor<i64>):
+//       %0 = pphlo.greater_equal %arg0, %arg1 : (tensor<i64>, tensor<i64>) -> tensor<i1>
+//       pphlo.return %0 : tensor<i1>
+//   }, {
+//     ^bb0(%arg0: tensor<i64>, %arg1: tensor<i64>):
+//       %0 = pphlo.add %arg0, %arg1 : tensor<i64>
+//       pphlo.return %0 : tensor<i64>
+//   }) {
+//     window_dimensions = array<i64: 3, 1>,
+//     window_strides = array<i64: 2, 1>,
+//     padding = dense<[[0, 1], [0, 0]]> : tensor<2x2xi64>
+//   } : (tensor<4x2xi64>, tensor<2x2xi64>, tensor<i64>) -> tensor<4x2xi64>
+//   %expected = pphlo.constant dense<[[0, 0],
+//                                     [0, 0],
+//                                     [5, 14],
+//                                     [7, 0]]> : tensor<4x2xi64>
+//   pphlo.custom_call @expect_eq %result, %expected : tensor<4x2xi64>
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/shift_right_arithmetic.mlir b/libspu/compiler/tests/interpret/shift_right_arithmetic.mlir
new file mode 100644
index 00000000..7cc71a93
--- /dev/null
+++ b/libspu/compiler/tests/interpret/shift_right_arithmetic.mlir
@@ -0,0 +1,25 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @shift_right_arithmetic_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[-1, 0, 8]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[1, 2, 3]> : tensor<3xi64>
+   %2 = pphlo.shift_right_arithmetic %0,%1 : (tensor<3xi64>,tensor<3xi64>)->tensor<3xi64>
+   %3 = pphlo.constant dense<[-1, 0, 1]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @shift_right_arithmetic_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[-1, 0, 8]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[1, 2, 3]> : tensor<3xi64>
+   %2 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %4 = pphlo.shift_right_arithmetic %2, %3 : (tensor<3x!pphlo.secret<i64>>,tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[-1, 0, 1]> : tensor<3xi64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/shift_right_logical.mlir b/libspu/compiler/tests/interpret/shift_right_logical.mlir
new file mode 100644
index 00000000..f7331033
--- /dev/null
+++ b/libspu/compiler/tests/interpret/shift_right_logical.mlir
@@ -0,0 +1,25 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @shift_right_logical_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[-1, 0, 8]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[1, 2, 3]> : tensor<3xi64>
+   %2 = pphlo.shift_right_logical %0,%1 : (tensor<3xi64>,tensor<3xi64>)->tensor<3xi64>
+   %3 = pphlo.constant dense<[9223372036854775807, 0, 1]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @shift_right_logical_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[-1, 0, 8]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[1, 2, 3]> : tensor<3xi64>
+   %2 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %4 = pphlo.shift_right_logical %2, %3 : (tensor<3x!pphlo.secret<i64>>,tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[9223372036854775807, 0, 1]> : tensor<3xi64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/sign.mlir b/libspu/compiler/tests/interpret/sign.mlir
new file mode 100644
index 00000000..3c1b2bed
--- /dev/null
+++ b/libspu/compiler/tests/interpret/sign.mlir
@@ -0,0 +1,44 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @sign_op_test_i64_i64_p() {
+   %0 = pphlo.constant dense<[-1, 0, 1]> : tensor<3xi64>
+   %1 = pphlo.sign %0 : (tensor<3xi64>)->tensor<3xi64>
+   %2 = pphlo.constant dense<[-1, 0, 1]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%1, %2) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @sign_op_test_i64_i64_s() {
+   %0 = pphlo.constant dense<[-1, 0, 1]> : tensor<3xi64>
+   %1 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %2 = pphlo.sign %1 : (tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.constant dense<[-1, 0, 1]> : tensor<3xi64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%3, %4) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @sign_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[-1.0, 0.0, 1.0]> : tensor<3xf64>
+   %1 = pphlo.sign %0 : (tensor<3xf64>)->tensor<3xf64>
+   %2 = pphlo.constant dense<[-1.0, 0.0, 1.0]> : tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @sign_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[-1.0, 0.0, 1.0]> : tensor<3xf64>
+   %1 = pphlo.convert %0 : (tensor<3xf64>)->tensor<3x!pphlo.secret<f64>>
+   %2 = pphlo.sign %1 : (tensor<3x!pphlo.secret<f64>>)->tensor<3x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[-1.0, 0.0, 1.0]> : tensor<3xf64>
+   %4 = pphlo.convert %2 : (tensor<3x!pphlo.secret<f64>>)->tensor<3xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<3xf64>, tensor<3xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/sine.mlir b/libspu/compiler/tests/interpret/sine.mlir
new file mode 100644
index 00000000..52825da5
--- /dev/null
+++ b/libspu/compiler/tests/interpret/sine.mlir
@@ -0,0 +1,66 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @sine_op_test_f16_f16_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.sine %0 : (tensor<5xf16>)->tensor<5xf16>
+   %2 = pphlo.constant dense<[0.000000e+00, 8.413080e-01, 1.246950e-01, 9.979240e-02, 9.675020e-04]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @sine_op_test_f16_f16_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %2 = pphlo.sine %1 : (tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.constant dense<[0.000000e+00, 8.413080e-01, 1.246950e-01, 9.979240e-02, 9.675020e-04]> : tensor<5xf16>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @sine_op_test_f32_f32_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.sine %0 : (tensor<5xf32>)->tensor<5xf32>
+   %2 = pphlo.constant dense<[0.000000e+00, 0.841470957, 0.12467473, 0.0998334214, -8.74227765E-8]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @sine_op_test_f32_f32_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %2 = pphlo.sine %1 : (tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.constant dense<[0.000000e+00, 0.841470957, 0.12467473, 0.0998334214, -8.74227765E-8]> : tensor<5xf32>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @sine_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.sine %0 : (tensor<5xf64>)->tensor<5xf64>
+   %2 = pphlo.constant dense<[0.000000e+00, 0.8414709848078965, 0.12467473338522769, 0.099833416646828154, 1.2246467991473532E-16]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @sine_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %2 = pphlo.sine %1 : (tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[0.000000e+00, 0.8414709848078965, 0.12467473338522769, 0.099833416646828154, 1.2246467991473532E-16]> : tensor<5xf64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/slice.mlir b/libspu/compiler/tests/interpret/slice.mlir
new file mode 100644
index 00000000..e6ce68b2
--- /dev/null
+++ b/libspu/compiler/tests/interpret/slice.mlir
@@ -0,0 +1,15 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @slice_op() {
+  %operand = pphlo.constant dense<[[0, 0, 1, 0, 0, 1],
+                                   [0, 0, 0, 0, 0, 0],
+                                   [0, 0, 1, 0, 0, 1]]> : tensor<3x6xi64>
+  %result = "pphlo.slice"(%operand) {
+    start_indices = array<i64: 0, 2>,
+    limit_indices = array<i64: 3, 6>,
+    strides = array<i64: 2, 3>
+  } : (tensor<3x6xi64>) -> tensor<2x2xi64>
+  %expected = pphlo.constant dense<[[1, 1], [1, 1]]> : tensor<2x2xi64>
+  pphlo.custom_call @expect_eq (%result, %expected) : (tensor<2x2xi64>,tensor<2x2xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/sort.mlir b/libspu/compiler/tests/interpret/sort.mlir
new file mode 100644
index 00000000..c0def6c4
--- /dev/null
+++ b/libspu/compiler/tests/interpret/sort.mlir
@@ -0,0 +1,19 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @sort_stable() {
+  %input0 = pphlo.constant dense<[[1, 2, 3], [3, 2, 1]]> : tensor<2x3xi64>
+  %input1 = pphlo.constant dense<[[3, 2, 1], [1, 2, 3]]> : tensor<2x3xi64>
+  %result0, %result1 = "pphlo.sort"(%input0, %input1) ({
+    ^bb0(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<i64>, %arg3: tensor<i64>):
+      %predicate = pphlo.greater %arg0, %arg1 : (tensor<i64>, tensor<i64>) -> tensor<i1>
+      pphlo.return %predicate : tensor<i1>
+  }) {
+    dimension = 0 : i64,
+    is_stable = true
+  } : (tensor<2x3xi64>, tensor<2x3xi64>) -> (tensor<2x3xi64>, tensor<2x3xi64>)
+  %expected0 = pphlo.constant dense<[[3, 2, 3], [1, 2, 1]]> : tensor<2x3xi64>
+  %expected1 = pphlo.constant dense<[[1, 2, 1], [3, 2, 3]]> : tensor<2x3xi64>
+  pphlo.custom_call @expect_eq (%result0, %expected0) : (tensor<2x3xi64>,tensor<2x3xi64>)->()
+  pphlo.custom_call @expect_eq (%result1, %expected1) : (tensor<2x3xi64>,tensor<2x3xi64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/sqrt.mlir b/libspu/compiler/tests/interpret/sqrt.mlir
new file mode 100644
index 00000000..249355fa
--- /dev/null
+++ b/libspu/compiler/tests/interpret/sqrt.mlir
@@ -0,0 +1,22 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @sqrt_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[[0.0, 1.0], [4.0, 9.0]]> : tensor<2x2xf64>
+   %1 = pphlo.sqrt %0 : (tensor<2x2xf64>)->tensor<2x2xf64>
+   %2 = pphlo.constant dense<[[0.000000e+00, 1.000000e+00], [2.000000e+00, 3.000000e+00]]> : tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @sqrt_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[[0.0, 1.0], [4.0, 9.0]]> : tensor<2x2xf64>
+   %1 = pphlo.convert %0 : (tensor<2x2xf64>)->tensor<2x2x!pphlo.secret<f64>>
+   %2 = pphlo.sqrt %1 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[[0.000000e+00, 1.000000e+00], [2.000000e+00, 3.000000e+00]]> : tensor<2x2xf64>
+   %4 = pphlo.convert %2 : (tensor<2x2x!pphlo.secret<f64>>)->tensor<2x2xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<2x2xf64>, tensor<2x2xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/subtract.mlir b/libspu/compiler/tests/interpret/subtract.mlir
new file mode 100644
index 00000000..3cab82eb
--- /dev/null
+++ b/libspu/compiler/tests/interpret/subtract.mlir
@@ -0,0 +1,275 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @subtract_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xi8>,tensor<5xi8>)->tensor<5xi8>
+   %3 = pphlo.constant dense<[-128, 2, 0, 0, -127]> : tensor<5xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[0, 1, 8, -9, 0]> : tensor<5xi8>
+   %1 = pphlo.constant dense<[-128, -1, 8, -9, 127]> : tensor<5xi8>
+   %2 = pphlo.convert %0 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<5xi8>)->tensor<5x!pphlo.secret<i8>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<i8>>,tensor<5x!pphlo.secret<i8>>)->tensor<5x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[-128, 2, 0, 0, -127]> : tensor<5xi8>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i8>>)->tensor<5xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi8>, tensor<5xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.subtract %0,%1 : (tensor<2xui8>,tensor<2xui8>)->tensor<2xui8>
+   %3 = pphlo.constant dense<[1, 0]> : tensor<2xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 16]> : tensor<2xui8>
+   %1 = pphlo.constant dense<[255, 16]> : tensor<2xui8>
+   %2 = pphlo.convert %0 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<2xui8>)->tensor<2x!pphlo.secret<ui8>>
+   %4 = pphlo.subtract %2, %3 : (tensor<2x!pphlo.secret<ui8>>,tensor<2x!pphlo.secret<ui8>>)->tensor<2x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[1, 0]> : tensor<2xui8>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui8>>)->tensor<2xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui8>, tensor<2xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xi16>,tensor<5xi16>)->tensor<5xi16>
+   %3 = pphlo.constant dense<[-32768, 2, 0, 0, -32767]> : tensor<5xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[0, 1, 128, -129, 0]> : tensor<5xi16>
+   %1 = pphlo.constant dense<[-32768, -1, 128, -129, 32767]> : tensor<5xi16>
+   %2 = pphlo.convert %0 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<5xi16>)->tensor<5x!pphlo.secret<i16>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<i16>>,tensor<5x!pphlo.secret<i16>>)->tensor<5x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[-32768, 2, 0, 0, -32767]> : tensor<5xi16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i16>>)->tensor<5xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi16>, tensor<5xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.subtract %0,%1 : (tensor<2xui16>,tensor<2xui16>)->tensor<2xui16>
+   %3 = pphlo.constant dense<[1, 0]> : tensor<2xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 256]> : tensor<2xui16>
+   %1 = pphlo.constant dense<[65535, 256]> : tensor<2xui16>
+   %2 = pphlo.convert %0 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<2xui16>)->tensor<2x!pphlo.secret<ui16>>
+   %4 = pphlo.subtract %2, %3 : (tensor<2x!pphlo.secret<ui16>>,tensor<2x!pphlo.secret<ui16>>)->tensor<2x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[1, 0]> : tensor<2xui16>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui16>>)->tensor<2xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui16>, tensor<2xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xi32>,tensor<5xi32>)->tensor<5xi32>
+   %3 = pphlo.constant dense<[-2147483648, 2, 0, 0, -2147483647]> : tensor<5xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[0, 1, 32768, -32769, 0]> : tensor<5xi32>
+   %1 = pphlo.constant dense<[-2147483648, -1, 32768, -32769, 2147483647]> : tensor<5xi32>
+   %2 = pphlo.convert %0 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<5xi32>)->tensor<5x!pphlo.secret<i32>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<i32>>,tensor<5x!pphlo.secret<i32>>)->tensor<5x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[-2147483648, 2, 0, 0, -2147483647]> : tensor<5xi32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i32>>)->tensor<5xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi32>, tensor<5xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.subtract %0,%1 : (tensor<2xui32>,tensor<2xui32>)->tensor<2xui32>
+   %3 = pphlo.constant dense<[1, 0]> : tensor<2xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 65536]> : tensor<2xui32>
+   %1 = pphlo.constant dense<[4294967295, 65536]> : tensor<2xui32>
+   %2 = pphlo.convert %0 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<2xui32>)->tensor<2x!pphlo.secret<ui32>>
+   %4 = pphlo.subtract %2, %3 : (tensor<2x!pphlo.secret<ui32>>,tensor<2x!pphlo.secret<ui32>>)->tensor<2x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[1, 0]> : tensor<2xui32>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui32>>)->tensor<2xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui32>, tensor<2xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xi64>,tensor<5xi64>)->tensor<5xi64>
+   %3 = pphlo.constant dense<[-9223372036854775808, 2, 0, 0, -9223372036854775807]> : tensor<5xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[0, 1, 2147483648, -2147483649, 0]> : tensor<5xi64>
+   %1 = pphlo.constant dense<[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]> : tensor<5xi64>
+   %2 = pphlo.convert %0 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<5xi64>)->tensor<5x!pphlo.secret<i64>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<i64>>,tensor<5x!pphlo.secret<i64>>)->tensor<5x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[-9223372036854775808, 2, 0, 0, -9223372036854775807]> : tensor<5xi64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<i64>>)->tensor<5xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<5xi64>, tensor<5xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[18446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.subtract %0,%1 : (tensor<2xui64>,tensor<2xui64>)->tensor<2xui64>
+   %3 = pphlo.constant dense<[1, 0]> : tensor<2xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 4294967296]> : tensor<2xui64>
+   %1 = pphlo.constant dense<[18446744073709551615, 4294967296]> : tensor<2xui64>
+   %2 = pphlo.convert %0 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<2xui64>)->tensor<2x!pphlo.secret<ui64>>
+   %4 = pphlo.subtract %2, %3 : (tensor<2x!pphlo.secret<ui64>>,tensor<2x!pphlo.secret<ui64>>)->tensor<2x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[1, 0]> : tensor<2xui64>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<ui64>>)->tensor<2xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xui64>, tensor<2xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_f16_f16_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.141]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75 , 0.3, 3.141]> : tensor<5xf16>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xf16>,tensor<5xf16>)->tensor<5xf16>
+   %3 = pphlo.constant dense<[0.000000e+00, -6.000000e+00, -6.250000e-01, -2.000730e-01, 0.000000e+00]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_f16_f16_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.141]> : tensor<5xf16>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75 , 0.3, 3.141]> : tensor<5xf16>
+   %2 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.convert %1 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<f16>>,tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %5 = pphlo.constant dense<[0.000000e+00, -6.000000e+00, -6.250000e-01, -2.000730e-01, 0.000000e+00]> : tensor<5xf16>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_f32_f32_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75 , 0.3, 3.14159265]> : tensor<5xf32>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xf32>,tensor<5xf32>)->tensor<5xf32>
+   %3 = pphlo.constant dense<[0.000000e+00, -6.000000e+00, -6.250000e-01, -0.200000018, 0.000000e+0]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_f32_f32_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265]> : tensor<5xf32>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75 , 0.3, 3.14159265]> : tensor<5xf32>
+   %2 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.convert %1 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<f32>>,tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %5 = pphlo.constant dense<[0.000000e+00, -6.000000e+00, -6.250000e-01, -0.200000018, 0.000000e+0]> : tensor<5xf32>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_f64_f64_pp() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75 , 0.3, 3.14159265358979323846]> : tensor<5xf64>
+   %2 = pphlo.subtract %0,%1 : (tensor<5xf64>,tensor<5xf64>)->tensor<5xf64>
+   %3 = pphlo.constant dense<[0.000000e+00, -6.000000e+00, -6.250000e-01, -0.19999999999999998, 0.000000e+00]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%2, %3) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @subtract_op_test_f64_f64_ss() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]> : tensor<5xf64>
+   %1 = pphlo.constant dense<[0.0, 7.0, 0.75 , 0.3, 3.14159265358979323846]> : tensor<5xf64>
+   %2 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.convert %1 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %4 = pphlo.subtract %2, %3 : (tensor<5x!pphlo.secret<f64>>,tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %5 = pphlo.constant dense<[0.000000e+00, -6.000000e+00, -6.250000e-01, -0.19999999999999998, 0.000000e+00]> : tensor<5xf64>
+   %6 = pphlo.convert %4 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%5, %6) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/tanh.mlir b/libspu/compiler/tests/interpret/tanh.mlir
new file mode 100644
index 00000000..4f42bbf7
--- /dev/null
+++ b/libspu/compiler/tests/interpret/tanh.mlir
@@ -0,0 +1,66 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @tanh_op_test_f16_f16_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.tanh %0 : (tensor<5xf16>)->tensor<5xf16>
+   %2 = pphlo.constant dense<[0.000000e+00, 7.617180e-01, 1.243290e-01, 9.967040e-02, 9.960930e-01]> : tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @tanh_op_test_f16_f16_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.140630]> : tensor<5xf16>
+   %1 = pphlo.convert %0 : (tensor<5xf16>)->tensor<5x!pphlo.secret<f16>>
+   %2 = pphlo.tanh %1 : (tensor<5x!pphlo.secret<f16>>)->tensor<5x!pphlo.secret<f16>>
+   %3 = pphlo.constant dense<[0.000000e+00, 7.617180e-01, 1.243290e-01, 9.967040e-02, 9.960930e-01]> : tensor<5xf16>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f16>>)->tensor<5xf16>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf16>, tensor<5xf16>)->()
+   func.return
+}
+
+// -----
+
+func.func @tanh_op_test_f32_f32_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.tanh %0 : (tensor<5xf32>)->tensor<5xf32>
+   %2 = pphlo.constant dense<[0.000000e+00, 0.761594176, 1.243530e-01, 0.0996679961, 0.996272087]> : tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @tanh_op_test_f32_f32_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.14159274]> : tensor<5xf32>
+   %1 = pphlo.convert %0 : (tensor<5xf32>)->tensor<5x!pphlo.secret<f32>>
+   %2 = pphlo.tanh %1 : (tensor<5x!pphlo.secret<f32>>)->tensor<5x!pphlo.secret<f32>>
+   %3 = pphlo.constant dense<[0.000000e+00, 0.761594176, 1.243530e-01, 0.0996679961, 0.996272087]> : tensor<5xf32>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f32>>)->tensor<5xf32>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf32>, tensor<5xf32>)->()
+   func.return
+}
+
+// -----
+
+func.func @tanh_op_test_f64_f64_p() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.tanh %0 : (tensor<5xf64>)->tensor<5xf64>
+   %2 = pphlo.constant dense<[0.000000e+00, 0.76159415595576485, 0.12435300177159619, 0.099667994624955819, 0.99627207622074998]> : tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%1, %2) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
+
+// -----
+
+func.func @tanh_op_test_f64_f64_s() {
+   %0 = pphlo.constant dense<[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]> : tensor<5xf64>
+   %1 = pphlo.convert %0 : (tensor<5xf64>)->tensor<5x!pphlo.secret<f64>>
+   %2 = pphlo.tanh %1 : (tensor<5x!pphlo.secret<f64>>)->tensor<5x!pphlo.secret<f64>>
+   %3 = pphlo.constant dense<[0.000000e+00, 0.76159415595576485, 0.12435300177159619, 0.099667994624955819, 0.99627207622074998]> : tensor<5xf64>
+   %4 = pphlo.convert %2 : (tensor<5x!pphlo.secret<f64>>)->tensor<5xf64>
+   pphlo.custom_call @expect_almost_eq(%3, %4) : (tensor<5xf64>, tensor<5xf64>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/template/basic_binary.template b/libspu/compiler/tests/interpret/template/basic_binary.template
new file mode 100644
index 00000000..b9ce1d02
--- /dev/null
+++ b/libspu/compiler/tests/interpret/template/basic_binary.template
@@ -0,0 +1,22 @@
+func.func @%OP%_op_test_%IN0_DTYPE%_%OUT0_DTYPE%_pp() {
+   %0 = pphlo.constant dense<%INPUT0%> : tensor<%IN0_SHAPE%x%IN0_DTYPE%>
+   %1 = pphlo.constant dense<%INPUT1%> : tensor<%IN1_SHAPE%x%IN1_DTYPE%>
+   %2 = pphlo.%OP% %0,%1 : (tensor<%IN0_SHAPE%x%IN0_DTYPE%>,tensor<%IN1_SHAPE%x%IN1_DTYPE%>)->tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   %3 = pphlo.constant dense<%EXPECTED0%> : tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   pphlo.custom_call @%CHECKER%(%2, %3) %ATTR%: (tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>, tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>)->()
+   func.return
+}
+
+// -----
+
+func.func @%OP%_op_test_%IN0_DTYPE%_%OUT0_DTYPE%_ss() {
+   %0 = pphlo.constant dense<%INPUT0%> : tensor<%IN0_SHAPE%x%IN0_DTYPE%>
+   %1 = pphlo.constant dense<%INPUT1%> : tensor<%IN1_SHAPE%x%IN1_DTYPE%>
+   %2 = pphlo.convert %0 : (tensor<%IN0_SHAPE%x%IN0_DTYPE%>)->tensor<%IN0_SHAPE%x!pphlo.secret<%IN0_DTYPE%>>
+   %3 = pphlo.convert %1 : (tensor<%IN1_SHAPE%x%IN1_DTYPE%>)->tensor<%IN1_SHAPE%x!pphlo.secret<%IN1_DTYPE%>>
+   %4 = pphlo.%OP% %2, %3 : (tensor<%IN0_SHAPE%x!pphlo.secret<%IN0_DTYPE%>>,tensor<%IN1_SHAPE%x!pphlo.secret<%IN1_DTYPE%>>)->tensor<%OUT0_SHAPE%x!pphlo.secret<%OUT0_DTYPE%>>
+   %5 = pphlo.constant dense<%EXPECTED0%> : tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   %6 = pphlo.convert %4 : (tensor<%OUT0_SHAPE%x!pphlo.secret<%OUT0_DTYPE%>>)->tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   pphlo.custom_call @%CHECKER%(%5, %6) %ATTR%: (tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>, tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/template/basic_unary.template b/libspu/compiler/tests/interpret/template/basic_unary.template
new file mode 100644
index 00000000..4dc230b9
--- /dev/null
+++ b/libspu/compiler/tests/interpret/template/basic_unary.template
@@ -0,0 +1,19 @@
+func.func @%OP%_op_test_%IN0_DTYPE%_%OUT0_DTYPE%_p() {
+   %0 = pphlo.constant dense<%INPUT0%> : tensor<%IN0_SHAPE%x%IN0_DTYPE%>
+   %1 = pphlo.%OP% %0 : (tensor<%IN0_SHAPE%x%IN0_DTYPE%>)->tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   %2 = pphlo.constant dense<%EXPECTED0%> : tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   pphlo.custom_call @%CHECKER%(%1, %2) %ATTR%: (tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>, tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>)->()
+   func.return
+}
+
+// -----
+
+func.func @%OP%_op_test_%IN0_DTYPE%_%OUT0_DTYPE%_s() {
+   %0 = pphlo.constant dense<%INPUT0%> : tensor<%IN0_SHAPE%x%IN0_DTYPE%>
+   %1 = pphlo.convert %0 : (tensor<%IN0_SHAPE%x%IN0_DTYPE%>)->tensor<%IN0_SHAPE%x!pphlo.secret<%IN0_DTYPE%>>
+   %2 = pphlo.%OP% %1 : (tensor<%IN0_SHAPE%x!pphlo.secret<%IN0_DTYPE%>>)->tensor<%OUT0_SHAPE%x!pphlo.secret<%OUT0_DTYPE%>>
+   %3 = pphlo.constant dense<%EXPECTED0%> : tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   %4 = pphlo.convert %2 : (tensor<%OUT0_SHAPE%x!pphlo.secret<%OUT0_DTYPE%>>)->tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>
+   pphlo.custom_call @%CHECKER%(%3, %4) %ATTR%: (tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>, tensor<%OUT0_SHAPE%x%OUT0_DTYPE%>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/interpret/test_json/abs.json b/libspu/compiler/tests/interpret/test_json/abs.json
new file mode 100644
index 00000000..d8b52265
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/abs.json
@@ -0,0 +1,39 @@
+{
+    "name": "abs",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2, 0, 2]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[ 2, 0, 2]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[23.1, -23.1, 0.0]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[23.1,  23.1, 0.0]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/add.json b/libspu/compiler/tests/interpret/test_json/add.json
new file mode 100644
index 00000000..2b404eb9
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/add.json
@@ -0,0 +1,303 @@
+{
+    "name": "add",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 8, -9, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[-128, -1, 8, -9, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-128, 0, 16, -18, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "[255, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[255, 32]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 128, -129, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[-32768, -1, 128, -129, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-32768, 0, 256, -258, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "[65535, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[65535, 512]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 32768, -32769, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[-2147483648, -1, 32768, -32769, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2147483648, 0, 65536, -65538, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "[4294967295, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[4294967295, 131072]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 2147483648, -2147483649, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-9223372036854775808, 0, 4294967296, -4294967298, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[18446744073709551615, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[18446744073709551615, 8589934592]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.141]",
+                    "shape": "5",
+                    "dtype": "f16"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75, 0.3, 3.141]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 8.000000e+00, 8.750000e-01, 3.999020e-01, 6.281250e+00]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159265]",
+                    "shape": "5",
+                    "dtype": "f32"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75,  0.3, 3.14159265]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 8.000000e+00, 8.750000e-01, 4.000000e-01, 6.28318548]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75, 0.3, 3.14159265358979323846]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 8.000000e+00, 8.750000e-01, 4.000000e-01, 6.2831853071795862]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "2",
+                    "shape": "",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "3",
+                    "shape": "",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "5",
+                    "shape": "",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "2",
+                    "shape": "2x0x3",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "3",
+                    "shape": "2x0x3",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "",
+                    "shape": "2x0x3",
+                    "dtype": "i8"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/and.json b/libspu/compiler/tests/interpret/test_json/and.json
new file mode 100644
index 00000000..ee71fc55
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/and.json
@@ -0,0 +1,237 @@
+{
+    "name": "and",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[127, -128, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[0, 127, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 127, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "255",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 127, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[32767, -32768, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[0, 32767, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 32767, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "65535",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 32767, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[2147483647, -2147483648, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[0, 2147483647, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 2147483647, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "4294967295",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 2147483647, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[9223372036854775807, -9223372036854775808, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[0, 9223372036854775807, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 9223372036854775807, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "18446744073709551615",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 9223372036854775807, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "false",
+                    "shape": "2",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "true",
+                    "shape": "2",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/arshift.json b/libspu/compiler/tests/interpret/test_json/arshift.json
new file mode 100644
index 00000000..58a93c6c
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/arshift.json
@@ -0,0 +1,27 @@
+{
+    "name": "shift_right_arithmetic",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-1, 0, 8]",
+                    "shape": "3",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[1, 2, 3]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1, 0, 1]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/atan2.json b/libspu/compiler/tests/interpret/test_json/atan2.json
new file mode 100644
index 00000000..7183a7b3
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/atan2.json
@@ -0,0 +1,28 @@
+{
+    "name": "atan2",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, -1.0]",
+                    "shape": "3",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[0.0, 0.0, 0.0]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.0, 1.5707963267948966, -1.5707963267948966]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/ceil.json b/libspu/compiler/tests/interpret/test_json/ceil.json
new file mode 100644
index 00000000..39a6d10c
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/ceil.json
@@ -0,0 +1,57 @@
+{
+    "name": "ceil",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.0, 2.5]",
+                    "shape": "3",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2.000000e+00, 0.000000e+00, 3.000000e+00]",
+                    "shape": "3",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.0, 2.5]",
+                    "shape": "3",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2.000000e+00, 0.000000e+00, 3.000000e+00]",
+                    "shape": "3",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.0, 2.5]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2.000000e+00, 0.000000e+00, 3.000000e+00]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/cosine.json b/libspu/compiler/tests/interpret/test_json/cosine.json
new file mode 100644
index 00000000..fe39c4aa
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/cosine.json
@@ -0,0 +1,57 @@
+{
+    "name": "cosine",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.140630]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1.000000e+00, 0.540302277, 0.992197692, 0.995004177, -1.000000e+00]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159274]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1.000000e+00, 0.540302277, 0.992197692, 0.995004177, -1.000000e+00]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1.000000e+00, 0.54030230586813977, 0.992197667229329, 0.99500416527802582, -1.000000e+00]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/divide.json b/libspu/compiler/tests/interpret/test_json/divide.json
new file mode 100644
index 00000000..431eae69
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/divide.json
@@ -0,0 +1,70 @@
+{
+    "name": "divide",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[17, -17, 17, -17]",
+                    "shape": "4",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[3, 3, -3, -3]",
+                    "shape": "4",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[5, -5, -5, 5]",
+                    "shape": "4",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[17, 18, 19, 20]",
+                    "shape": "4",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[3, 4, 5, 7]",
+                    "shape": "4",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[5, 4, 3, 2]",
+                    "shape": "4",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[17.1, -17.1, 17.1, -17.1]",
+                    "shape": "4",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[3.0, 3.0, -3.0, -3.0]",
+                    "shape": "4",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[5.700000e+00, -5.700000e+00, -5.700000e+00, 5.700000e+00]",
+                    "shape": "4",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/equal.json b/libspu/compiler/tests/interpret/test_json/equal.json
new file mode 100644
index 00000000..92c06201
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/equal.json
@@ -0,0 +1,153 @@
+{
+    "name": "equal",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "-2",
+                    "shape": "",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "-2",
+                    "shape": "",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2, -1, 0, 2, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-2, -2, 0, 1, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false, true, false, true]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "0",
+                    "shape": "",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "0",
+                    "shape": "",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[true, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[true, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -2.0, 0.0, 1.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[-2.0, -1.0, 0.0, 2.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false, true, false, true]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/exponential.json b/libspu/compiler/tests/interpret/test_json/exponential.json
new file mode 100644
index 00000000..5818f6b6
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/exponential.json
@@ -0,0 +1,24 @@
+{
+    "name": "exponential",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[[0.0, 1.0], [2.0, 3.0]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[1.000000e+00, 2.7182818284590451], [7.3890560989306504, 20.085536923187668]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq",
+            "tol": 0.4
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/exponential_minus_one.json b/libspu/compiler/tests/interpret/test_json/exponential_minus_one.json
new file mode 100644
index 00000000..53e77cda
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/exponential_minus_one.json
@@ -0,0 +1,23 @@
+{
+    "name": "exponential_minus_one",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0]",
+                    "shape": "2",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.0, 1.7182818284590451]",
+                    "shape": "2",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/floor.json b/libspu/compiler/tests/interpret/test_json/floor.json
new file mode 100644
index 00000000..0d20b974
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/floor.json
@@ -0,0 +1,57 @@
+{
+    "name": "floor",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.0, 2.5]",
+                    "shape": "3",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-3.000000e+00, 0.000000e+00, 2.000000e+00]",
+                    "shape": "3",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.0, 2.5]",
+                    "shape": "3",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-3.000000e+00, 0.000000e+00, 2.000000e+00]",
+                    "shape": "3",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.0, 2.5]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-3.000000e+00, 0.000000e+00, 2.000000e+00]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/greater.json b/libspu/compiler/tests/interpret/test_json/greater.json
new file mode 100644
index 00000000..3bd2cfed
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/greater.json
@@ -0,0 +1,90 @@
+{
+    "name": "greater",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2, -1, 0, 2, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-2, -2, 0, 1, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, false, true, false]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[true, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[true, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -2.0, 0.0, 1.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[-2.0, -1.0, 0.0, 2.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false, false, false, false]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/greater_equal.json b/libspu/compiler/tests/interpret/test_json/greater_equal.json
new file mode 100644
index 00000000..04e1036a
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/greater_equal.json
@@ -0,0 +1,90 @@
+{
+    "name": "greater_equal",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2, -1, 0, 2, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-2, -2, 0, 1, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, true, true, true, true]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[true, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[true, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -2.0, 0.0, 1.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[-2.0, -1.0, 0.0, 2.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false, true, false, true]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/less.json b/libspu/compiler/tests/interpret/test_json/less.json
new file mode 100644
index 00000000..1d225d8c
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/less.json
@@ -0,0 +1,90 @@
+{
+    "name": "less",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2, -1, 0, 2, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-2, -2, 0, 1, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false, false, false, false]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[true, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[true, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -2.0, 0.0, 1.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[-2.0, -1.0, 0.0, 2.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, false, true, false]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/less_equal.json b/libspu/compiler/tests/interpret/test_json/less_equal.json
new file mode 100644
index 00000000..4b46f536
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/less_equal.json
@@ -0,0 +1,90 @@
+{
+    "name": "less_equal",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2, -1, 0, 2, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-2, -2, 0, 1, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false, true, false, true]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[true, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[true, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -2.0, 0.0, 1.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[-2.0, -1.0, 0.0, 2.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, true, true, true, true]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/log.json b/libspu/compiler/tests/interpret/test_json/log.json
new file mode 100644
index 00000000..332398f1
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/log.json
@@ -0,0 +1,23 @@
+{
+    "name": "log",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[[1.0, 2.0], [3.0, 4.0]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[0.000000e+00, 0.69314718055994529], [1.0986122886681098, 1.3862943611198906]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/log_plus_one.json b/libspu/compiler/tests/interpret/test_json/log_plus_one.json
new file mode 100644
index 00000000..2bc6d5bd
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/log_plus_one.json
@@ -0,0 +1,23 @@
+{
+    "name": "log_plus_one",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, -0.999, 7.0, 6.38905621, 15.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.0, -6.90776825, 2.07944155, 2.0, 2.77258873]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/logistic.json b/libspu/compiler/tests/interpret/test_json/logistic.json
new file mode 100644
index 00000000..31bd7536
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/logistic.json
@@ -0,0 +1,23 @@
+{
+    "name": "logistic",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[[1.0, 2.0], [3.0, 4.0]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[0.73105857863000488, 0.88079707797788244],[0.95257412682243322, 0.98201379003790844]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/max.json b/libspu/compiler/tests/interpret/test_json/max.json
new file mode 100644
index 00000000..5b34fd90
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/max.json
@@ -0,0 +1,261 @@
+{
+    "name": "maximum",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 8, -9, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[-128, -1, 8, -9, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 1, 8, -9, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "[255, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[255, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 128, -129, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[-32768, -1, 128, -129, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 1, 128, -129, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "[65535, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[65535, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 32768, -32769, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[-2147483648, -1, 32768, -32769, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 1, 32768, -32769, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "[4294967295, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[4294967295, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 2147483648, -2147483649, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 1, 2147483648, -2147483649, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[8446744073709551615, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[8446744073709551615, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, -1.0, 0.0,  1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f16"
+                },
+                {
+                    "data": "[ 1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1.0, -1.0, 0.0, 1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, -1.0, 0.0,  1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                },
+                {
+                    "data": "[ 1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1.0, -1.0, 0.0, 1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, -1.0, 0.0,  1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[ 1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1.0, -1.0, 0.0, 1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/min.json b/libspu/compiler/tests/interpret/test_json/min.json
new file mode 100644
index 00000000..40318140
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/min.json
@@ -0,0 +1,261 @@
+{
+    "name": "minimum",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 8, -9, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[-128, -1, 8, -9, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-128, -1, 8, -9, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "[255, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 128, -129, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[-32768, -1, 128, -129, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-32768, -1, 128, -129, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "[65535, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 32768, -32769, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[-2147483648, -1, 32768, -32769, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2147483648, -1, 32768, -32769, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "[4294967295, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 2147483648, -2147483649, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-223372036854775808, -1, 2147483648, -2147483649, 223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-223372036854775808, -1, 2147483648, -2147483649, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[8446744073709551615, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, -1.0, 0.0,  1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f16"
+                },
+                {
+                    "data": "[ 1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, -1.0, 0.0,  1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                },
+                {
+                    "data": "[ 1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, -1.0, 0.0,  1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[ 1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1.0, -1.0, 0.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/multiply.json b/libspu/compiler/tests/interpret/test_json/multiply.json
new file mode 100644
index 00000000..96ca6e1e
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/multiply.json
@@ -0,0 +1,261 @@
+{
+    "name": "multiply",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 8, -9, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[-128, -1, 8, -9, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, -1, 64, 81, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 16, 16]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "[255, 16, 17]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0, 16]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 128, -129, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[-32768, -1, 128, -129, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, -1, 16384, 16641, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "[65535, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 32768, -32769, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[-2147483648, -1, 32768, -32769, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, -1, 1073741824, 1073807361, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "[4294967295, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 2147483648, -2147483649, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, -1, 4611686018427387904, 4611686022722355201, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[8446744073709551615, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, false, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.141]",
+                    "shape": "5",
+                    "dtype": "f16"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75,  0.3, 3.141]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 7.000000e+00, 9.375000e-02, 2.999880e-02, 9.867180e+00]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159265]",
+                    "shape": "5",
+                    "dtype": "f32"
+                },
+                {
+                    "data": "[0.0, 7.0,  0.75, 0.3, 3.14159265]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 7.000000e+00, 9.375000e-02, 0.0300000012, 9.86960506]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75,  0.3, 3.14159265358979323846]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 7.000000e+00, 9.375000e-02, 3.000000e-02, 9.869604401089358]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/negate.json b/libspu/compiler/tests/interpret/test_json/negate.json
new file mode 100644
index 00000000..db8a4dfd
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/negate.json
@@ -0,0 +1,185 @@
+{
+    "name": "negate",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-128, -9, 0, 8, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-128, 9, 0, -8, -127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 16, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 240, 1]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-32768, -129, 0, 128, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-32768, 129, 0, -128, -32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 256, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 65280, 1]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2147483648, -65537, 0, 65536, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2147483648, 65537, 0, -65536, -2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 65536, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 4294901760, 1]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-9223372036854775808, -2147483649, 0, 2147483648, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-9223372036854775808, 2147483649, 0, -2147483648, -9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 4294967296, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 18446744069414584320, 1]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.140630]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, -1.000000e+00, -1.250000e-01, -9.997550e-02, -3.140630e+00]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159274]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, -1.000000e+00, -1.250000e-01, -1.000000e-01, -3.14159274]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, -1.000000e+00, -1.250000e-01, -1.000000e-01, -3.1415926535897931]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/not.json b/libspu/compiler/tests/interpret/test_json/not.json
new file mode 100644
index 00000000..bfb72d67
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/not.json
@@ -0,0 +1,182 @@
+{
+    "name": "not",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[127, -128, 0]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-128, 127, -1]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 127, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[255, 128, 0]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[32767, -32768, 0]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-32768, 32767, -1]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 32767, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[65535, 32768, 0]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[2147483647, -2147483648, 0]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2147483648, 2147483647, -1]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 2147483647, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[4294967295, 2147483648, 0]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[9223372036854775807, -9223372036854775808, 0]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-9223372036854775808, 9223372036854775807, -1]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 9223372036854775807, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[18446744073709551615, 9223372036854775808, 0]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, false]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "false",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "true",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "false",
+                    "shape": "",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/not_equal.json b/libspu/compiler/tests/interpret/test_json/not_equal.json
new file mode 100644
index 00000000..94aabebd
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/not_equal.json
@@ -0,0 +1,90 @@
+{
+    "name": "not_equal",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2, -1, 0, 2, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-2, -2, 0, 1, 2]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, false, true, false]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[true, true, false, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[true, false, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -2.0, 0.0, 1.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[-2.0, -1.0, 0.0, 2.0, 2.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, false, true, false]",
+                    "shape": "5",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/or.json b/libspu/compiler/tests/interpret/test_json/or.json
new file mode 100644
index 00000000..4eb2df2b
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/or.json
@@ -0,0 +1,237 @@
+{
+    "name": "or",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[127, -128, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[0, 127, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[127, -1, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 127, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "255",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[255, 255, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[32767, -32768, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[0, 32767, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[32767, -1, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 32767, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "65535",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[65535, 65535, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[2147483647, -2147483648, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[0, 2147483647, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[2147483647, -1, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 2147483647, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "4294967295",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[4294967295, 4294967295, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[9223372036854775807, -9223372036854775808, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[0, 9223372036854775807, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[9223372036854775807, -1, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 9223372036854775807, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "18446744073709551615",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[18446744073709551615, 18446744073709551615, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "false",
+                    "shape": "2",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "true",
+                    "shape": "2",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[true, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/popcnt.json b/libspu/compiler/tests/interpret/test_json/popcnt.json
new file mode 100644
index 00000000..257b7bfe
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/popcnt.json
@@ -0,0 +1,22 @@
+{
+    "name": "popcnt",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 2, 127]",
+                    "shape": "4",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0, 1, 1, 7]",
+                    "shape": "4",
+                    "dtype": "i64"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/power.json b/libspu/compiler/tests/interpret/test_json/power.json
new file mode 100644
index 00000000..e5e6f3cf
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/power.json
@@ -0,0 +1,71 @@
+{
+    "name": "power",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-1, -1, -3, 1, -3, 0]",
+                    "shape": "6",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[1, 0, -3, -3, 3, 2]",
+                    "shape": "6",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1, 1, 0, 1, -27, 0]",
+                    "shape": "6",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 0, 1, 1, 5]",
+                    "shape": "5",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[0, 1, 0, 2, 5]",
+                    "shape": "5",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 0, 1, 1, 3125]",
+                    "shape": "5",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-2.0, -0.0, 5.0, 3.0, 10000.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[2.0, 2.0, 2.0, -1.0, 1.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[4.000000e+00, 0.000000e+00, 2.500000e+01, 0.33333333333333331, 10000.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq",
+            "tol": 0.5
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/reshape.json b/libspu/compiler/tests/interpret/test_json/reshape.json
new file mode 100644
index 00000000..ddfd4c0e
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/reshape.json
@@ -0,0 +1,70 @@
+{
+    "name": "reshape",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[[1,2,3,4,5,6]]",
+                    "shape": "1x6",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 2, 3, 4, 5, 6]",
+                    "shape": "6",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[1,2,3,4,5,6]",
+                    "shape": "6",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[1, 2, 3], [4, 5, 6]]",
+                    "shape": "2x3",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[[1,2,3],[4,5,6]]",
+                    "shape": "2x3",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[1, 2], [3, 4], [5, 6]]",
+                    "shape": "3x2",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[[1,2],[3,4],[5,6]]",
+                    "shape": "3x2",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 2, 3, 4, 5, 6]",
+                    "shape": "6",
+                    "dtype": "i32"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/round_afz.json b/libspu/compiler/tests/interpret/test_json/round_afz.json
new file mode 100644
index 00000000..e9e0bd79
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/round_afz.json
@@ -0,0 +1,23 @@
+{
+    "name": "round_nearest_afz",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-2.5, 0.4, 0.5, 0.6, 2.5]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-3.0, 0.0, 1.0, 1.0, 3.0]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/rshift.json b/libspu/compiler/tests/interpret/test_json/rshift.json
new file mode 100644
index 00000000..d3693ae6
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/rshift.json
@@ -0,0 +1,27 @@
+{
+    "name": "shift_right_logical",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-1, 0, 8]",
+                    "shape": "3",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[1, 2, 3]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[9223372036854775807, 0, 1]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/rsqrt.json b/libspu/compiler/tests/interpret/test_json/rsqrt.json
new file mode 100644
index 00000000..cb1c16fb
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/rsqrt.json
@@ -0,0 +1,23 @@
+{
+    "name": "rsqrt",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[[1.0, 4.0], [9.0, 25.0]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[1.000000e+00, 5.000000e-01], [0.33333333333333331, 2.000000e-01]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/sign.json b/libspu/compiler/tests/interpret/test_json/sign.json
new file mode 100644
index 00000000..b9c49ba0
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/sign.json
@@ -0,0 +1,39 @@
+{
+    "name": "sign",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[-1, 0, 1]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1, 0, 1]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[-1.0, 0.0, 1.0]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-1.0, 0.0, 1.0]",
+                    "shape": "3",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/sine.json b/libspu/compiler/tests/interpret/test_json/sine.json
new file mode 100644
index 00000000..a03391ec
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/sine.json
@@ -0,0 +1,57 @@
+{
+    "name": "sine",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.140630]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 8.413080e-01, 1.246950e-01, 9.979240e-02, 9.675020e-04]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159274]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 0.841470957, 0.12467473, 0.0998334214, -8.74227765E-8]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 0.8414709848078965, 0.12467473338522769, 0.099833416646828154, 1.2246467991473532E-16]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/sqrt.json b/libspu/compiler/tests/interpret/test_json/sqrt.json
new file mode 100644
index 00000000..43d84afe
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/sqrt.json
@@ -0,0 +1,23 @@
+{
+    "name": "sqrt",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[[0.0, 1.0], [4.0, 9.0]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[[0.000000e+00, 1.000000e+00], [2.000000e+00, 3.000000e+00]]",
+                    "shape": "2x2",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/subtract.json b/libspu/compiler/tests/interpret/test_json/subtract.json
new file mode 100644
index 00000000..86274f46
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/subtract.json
@@ -0,0 +1,240 @@
+{
+    "name": "subtract",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 8, -9, 0]",
+                    "shape": "5",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[-128, -1, 8, -9, 127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-128, 2, 0, 0, -127]",
+                    "shape": "5",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "[255, 16]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 0]",
+                    "shape": "2",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 128, -129, 0]",
+                    "shape": "5",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[-32768, -1, 128, -129, 32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-32768, 2, 0, 0, -32767]",
+                    "shape": "5",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "[65535, 256]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 0]",
+                    "shape": "2",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 32768, -32769, 0]",
+                    "shape": "5",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[-2147483648, -1, 32768, -32769, 2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-2147483648, 2, 0, 0, -2147483647]",
+                    "shape": "5",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "[4294967295, 65536]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 0]",
+                    "shape": "2",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 1, 2147483648, -2147483649, 0]",
+                    "shape": "5",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[-9223372036854775808, -1, 2147483648, -2147483649, 9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[-9223372036854775808, 2, 0, 0, -9223372036854775807]",
+                    "shape": "5",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "[18446744073709551615, 4294967296]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[1, 0]",
+                    "shape": "2",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.141]",
+                    "shape": "5",
+                    "dtype": "f16"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75 , 0.3, 3.141]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, -6.000000e+00, -6.250000e-01, -2.000730e-01, 0.000000e+00]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159265]",
+                    "shape": "5",
+                    "dtype": "f32"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75 , 0.3, 3.14159265]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, -6.000000e+00, -6.250000e-01, -0.200000018, 0.000000e+0]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159265358979323846]",
+                    "shape": "5",
+                    "dtype": "f64"
+                },
+                {
+                    "data": "[0.0, 7.0, 0.75 , 0.3, 3.14159265358979323846]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, -6.000000e+00, -6.250000e-01, -0.19999999999999998, 0.000000e+00]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/tanh.json b/libspu/compiler/tests/interpret/test_json/tanh.json
new file mode 100644
index 00000000..784c5750
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/tanh.json
@@ -0,0 +1,57 @@
+{
+    "name": "tanh",
+    "template": "basic_unary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.140630]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 7.617180e-01, 1.243290e-01, 9.967040e-02, 9.960930e-01]",
+                    "shape": "5",
+                    "dtype": "f16"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.14159274]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 0.761594176, 1.243530e-01, 0.0996679961, 0.996272087]",
+                    "shape": "5",
+                    "dtype": "f32"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0.0, 1.0, 0.125, 0.1, 3.1415926535897931]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[0.000000e+00, 0.76159415595576485, 0.12435300177159619, 0.099667994624955819, 0.99627207622074998]",
+                    "shape": "5",
+                    "dtype": "f64"
+                }
+            ],
+            "checker": "expect_almost_eq"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/test_json/xor.json b/libspu/compiler/tests/interpret/test_json/xor.json
new file mode 100644
index 00000000..85b24c76
--- /dev/null
+++ b/libspu/compiler/tests/interpret/test_json/xor.json
@@ -0,0 +1,237 @@
+{
+    "name": "xor",
+    "template": "basic_binary",
+    "testcases": [
+        {
+            "inputs": [
+                {
+                    "data": "[127, -128, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                },
+                {
+                    "data": "[0, 127, -128]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[127, -1, 0]",
+                    "shape": "3",
+                    "dtype": "i8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 127, 255]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                },
+                {
+                    "data": "255",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[255, 128, 0]",
+                    "shape": "3",
+                    "dtype": "ui8"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[32767, -32768, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                },
+                {
+                    "data": "[0, 32767, -32768]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[32767, -1, 0]",
+                    "shape": "3",
+                    "dtype": "i16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 32767, 65535]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                },
+                {
+                    "data": "65535",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[65535, 32768, 0]",
+                    "shape": "3",
+                    "dtype": "ui16"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[2147483647, -2147483648, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                },
+                {
+                    "data": "[0, 2147483647, -2147483648]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[2147483647, -1, 0]",
+                    "shape": "3",
+                    "dtype": "i32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 2147483647, 4294967295]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                },
+                {
+                    "data": "4294967295",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[4294967295, 2147483648, 0]",
+                    "shape": "3",
+                    "dtype": "ui32"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[9223372036854775807, -9223372036854775808, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                },
+                {
+                    "data": "[0, 9223372036854775807, -9223372036854775808]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[9223372036854775807, -1, 0]",
+                    "shape": "3",
+                    "dtype": "i64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[0, 9223372036854775807, 18446744073709551615]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                },
+                {
+                    "data": "18446744073709551615",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[18446744073709551615, 9223372036854775808, 0]",
+                    "shape": "3",
+                    "dtype": "ui64"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "[false, false, true, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true, false, true]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true, true, false]",
+                    "shape": "4",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "false",
+                    "shape": "2",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        },
+        {
+            "inputs": [
+                {
+                    "data": "false",
+                    "shape": "2",
+                    "dtype": "i1"
+                },
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ],
+            "expected": [
+                {
+                    "data": "[false, true]",
+                    "shape": "2",
+                    "dtype": "i1"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/interpret/transpose.mlir b/libspu/compiler/tests/interpret/transpose.mlir
new file mode 100644
index 00000000..126660db
--- /dev/null
+++ b/libspu/compiler/tests/interpret/transpose.mlir
@@ -0,0 +1,30 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @transpose_op_test_si32() {
+  %0 = pphlo.constant dense<[[[1,2],[3,4],[5,6]], [[7,8],[9,10],[11,12]]]> : tensor<2x3x2xi32>
+  %1 = "pphlo.transpose"(%0) {permutation = array<i64: 1,0,2>} : (tensor<2x3x2xi32>) -> tensor<3x2x2xi32>
+  %expected = pphlo.constant dense<[[[1, 2], [7, 8]], [[3, 4], [9, 10]], [[5, 6], [11, 12]]]> : tensor<3x2x2xi32>
+  pphlo.custom_call @expect_eq (%1, %expected) : (tensor<3x2x2xi32>,tensor<3x2x2xi32>)->()
+  func.return
+}
+
+// -----
+
+func.func @transpose_op_test_si32() {
+  %0 = pphlo.constant dense<[[[1,2],[3,4],[5,6]], [[7,8],[9,10],[11,12]]]> : tensor<2x3x2xi32>
+  %1 = "pphlo.transpose"(%0) {permutation = array<i64: 2,1,0>} : (tensor<2x3x2xi32>) -> tensor<2x3x2xi32>
+  %expected = pphlo.constant dense<[[[1, 7], [3, 9], [5, 11]], [[2, 8], [4, 10], [6, 12]]]> : tensor<2x3x2xi32>
+  pphlo.custom_call @expect_eq (%1, %expected) : (tensor<2x3x2xi32>,tensor<2x3x2xi32>)->()
+  func.return
+}
+
+// -----
+
+func.func @transpose_op_test_si32() {
+  %0 = pphlo.constant dense<[[[1,2],[3,4],[5,6]], [[7,8],[9,10],[11,12]]]> : tensor<2x3x2xi32>
+  %1 = "pphlo.transpose"(%0) {permutation = array<i64: 2,1,0>} : (tensor<2x3x2xi32>) -> tensor<2x3x2xi32>
+  %2 = "pphlo.transpose"(%1) {permutation = array<i64: 2,1,0>} : (tensor<2x3x2xi32>) -> tensor<2x3x2xi32>
+  %expected = pphlo.constant dense<[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]> : tensor<2x3x2xi32>
+  pphlo.custom_call @expect_eq (%2, %expected) : (tensor<2x3x2xi32>,tensor<2x3x2xi32>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/while.mlir b/libspu/compiler/tests/interpret/while.mlir
new file mode 100644
index 00000000..2bcbd3ff
--- /dev/null
+++ b/libspu/compiler/tests/interpret/while.mlir
@@ -0,0 +1,27 @@
+// RUN: spu-translate --interpret -split-input-file %s
+
+func.func @while() {
+  // int i = 0;
+  // int sum = 0;
+  // while (i < 10) {
+  //   sum += 1;
+  //   i += 1;
+  // }
+  %init_i = pphlo.constant dense<0> : tensor<i64>
+  %init_sum = pphlo.constant dense<0> : tensor<i64>
+  %one = pphlo.constant dense<1> : tensor<i64>
+  %ten = pphlo.constant dense<10> : tensor<i64>
+  %results0, %results1 = pphlo.while(%arg0 = %init_i, %arg1 = %init_sum) : tensor<i64>, tensor<i64>
+  cond {
+    %cond = pphlo.less %arg0, %ten : (tensor<i64>, tensor<i64>) -> tensor<i1>
+    pphlo.return %cond : tensor<i1>
+  } do {
+    %new_sum = pphlo.add %arg1, %one : tensor<i64>
+    %new_i = pphlo.add %arg0, %one : tensor<i64>
+    pphlo.return %new_i, %new_sum : tensor<i64>, tensor<i64>
+  }
+  %expected = pphlo.constant dense<10> : tensor<i64>
+  pphlo.custom_call @expect_eq (%results0, %expected) : (tensor<i64>,tensor<i64>)->()
+  pphlo.custom_call @expect_eq (%results1, %expected) : (tensor<i64>,tensor<i64>)->()
+  func.return
+}
diff --git a/libspu/compiler/tests/interpret/xor.mlir b/libspu/compiler/tests/interpret/xor.mlir
new file mode 100644
index 00000000..21d3484d
--- /dev/null
+++ b/libspu/compiler/tests/interpret/xor.mlir
@@ -0,0 +1,275 @@
+// RUN: spu-translate --interpret -split-input-file %s
+// AUTO GENERATED, DO NOT EDIT
+
+func.func @xor_op_test_i8_i8_pp() {
+   %0 = pphlo.constant dense<[127, -128, -128]> : tensor<3xi8>
+   %1 = pphlo.constant dense<[0, 127, -128]> : tensor<3xi8>
+   %2 = pphlo.xor %0,%1 : (tensor<3xi8>,tensor<3xi8>)->tensor<3xi8>
+   %3 = pphlo.constant dense<[127, -1, 0]> : tensor<3xi8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i8_i8_ss() {
+   %0 = pphlo.constant dense<[127, -128, -128]> : tensor<3xi8>
+   %1 = pphlo.constant dense<[0, 127, -128]> : tensor<3xi8>
+   %2 = pphlo.convert %0 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %3 = pphlo.convert %1 : (tensor<3xi8>)->tensor<3x!pphlo.secret<i8>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<i8>>,tensor<3x!pphlo.secret<i8>>)->tensor<3x!pphlo.secret<i8>>
+   %5 = pphlo.constant dense<[127, -1, 0]> : tensor<3xi8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i8>>)->tensor<3xi8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi8>, tensor<3xi8>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui8_ui8_pp() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.constant dense<255> : tensor<3xui8>
+   %2 = pphlo.xor %0,%1 : (tensor<3xui8>,tensor<3xui8>)->tensor<3xui8>
+   %3 = pphlo.constant dense<[255, 128, 0]> : tensor<3xui8>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui8_ui8_ss() {
+   %0 = pphlo.constant dense<[0, 127, 255]> : tensor<3xui8>
+   %1 = pphlo.constant dense<255> : tensor<3xui8>
+   %2 = pphlo.convert %0 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %3 = pphlo.convert %1 : (tensor<3xui8>)->tensor<3x!pphlo.secret<ui8>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<ui8>>,tensor<3x!pphlo.secret<ui8>>)->tensor<3x!pphlo.secret<ui8>>
+   %5 = pphlo.constant dense<[255, 128, 0]> : tensor<3xui8>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui8>>)->tensor<3xui8>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui8>, tensor<3xui8>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i16_i16_pp() {
+   %0 = pphlo.constant dense<[32767, -32768, -32768]> : tensor<3xi16>
+   %1 = pphlo.constant dense<[0, 32767, -32768]> : tensor<3xi16>
+   %2 = pphlo.xor %0,%1 : (tensor<3xi16>,tensor<3xi16>)->tensor<3xi16>
+   %3 = pphlo.constant dense<[32767, -1, 0]> : tensor<3xi16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i16_i16_ss() {
+   %0 = pphlo.constant dense<[32767, -32768, -32768]> : tensor<3xi16>
+   %1 = pphlo.constant dense<[0, 32767, -32768]> : tensor<3xi16>
+   %2 = pphlo.convert %0 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %3 = pphlo.convert %1 : (tensor<3xi16>)->tensor<3x!pphlo.secret<i16>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<i16>>,tensor<3x!pphlo.secret<i16>>)->tensor<3x!pphlo.secret<i16>>
+   %5 = pphlo.constant dense<[32767, -1, 0]> : tensor<3xi16>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i16>>)->tensor<3xi16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi16>, tensor<3xi16>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui16_ui16_pp() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.constant dense<65535> : tensor<3xui16>
+   %2 = pphlo.xor %0,%1 : (tensor<3xui16>,tensor<3xui16>)->tensor<3xui16>
+   %3 = pphlo.constant dense<[65535, 32768, 0]> : tensor<3xui16>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui16_ui16_ss() {
+   %0 = pphlo.constant dense<[0, 32767, 65535]> : tensor<3xui16>
+   %1 = pphlo.constant dense<65535> : tensor<3xui16>
+   %2 = pphlo.convert %0 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %3 = pphlo.convert %1 : (tensor<3xui16>)->tensor<3x!pphlo.secret<ui16>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<ui16>>,tensor<3x!pphlo.secret<ui16>>)->tensor<3x!pphlo.secret<ui16>>
+   %5 = pphlo.constant dense<[65535, 32768, 0]> : tensor<3xui16>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui16>>)->tensor<3xui16>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui16>, tensor<3xui16>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i32_i32_pp() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, -2147483648]> : tensor<3xi32>
+   %1 = pphlo.constant dense<[0, 2147483647, -2147483648]> : tensor<3xi32>
+   %2 = pphlo.xor %0,%1 : (tensor<3xi32>,tensor<3xi32>)->tensor<3xi32>
+   %3 = pphlo.constant dense<[2147483647, -1, 0]> : tensor<3xi32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i32_i32_ss() {
+   %0 = pphlo.constant dense<[2147483647, -2147483648, -2147483648]> : tensor<3xi32>
+   %1 = pphlo.constant dense<[0, 2147483647, -2147483648]> : tensor<3xi32>
+   %2 = pphlo.convert %0 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %3 = pphlo.convert %1 : (tensor<3xi32>)->tensor<3x!pphlo.secret<i32>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<i32>>,tensor<3x!pphlo.secret<i32>>)->tensor<3x!pphlo.secret<i32>>
+   %5 = pphlo.constant dense<[2147483647, -1, 0]> : tensor<3xi32>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i32>>)->tensor<3xi32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi32>, tensor<3xi32>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui32_ui32_pp() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.constant dense<4294967295> : tensor<3xui32>
+   %2 = pphlo.xor %0,%1 : (tensor<3xui32>,tensor<3xui32>)->tensor<3xui32>
+   %3 = pphlo.constant dense<[4294967295, 2147483648, 0]> : tensor<3xui32>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui32_ui32_ss() {
+   %0 = pphlo.constant dense<[0, 2147483647, 4294967295]> : tensor<3xui32>
+   %1 = pphlo.constant dense<4294967295> : tensor<3xui32>
+   %2 = pphlo.convert %0 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %3 = pphlo.convert %1 : (tensor<3xui32>)->tensor<3x!pphlo.secret<ui32>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<ui32>>,tensor<3x!pphlo.secret<ui32>>)->tensor<3x!pphlo.secret<ui32>>
+   %5 = pphlo.constant dense<[4294967295, 2147483648, 0]> : tensor<3xui32>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui32>>)->tensor<3xui32>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui32>, tensor<3xui32>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i64_i64_pp() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, -9223372036854775808]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[0, 9223372036854775807, -9223372036854775808]> : tensor<3xi64>
+   %2 = pphlo.xor %0,%1 : (tensor<3xi64>,tensor<3xi64>)->tensor<3xi64>
+   %3 = pphlo.constant dense<[9223372036854775807, -1, 0]> : tensor<3xi64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i64_i64_ss() {
+   %0 = pphlo.constant dense<[9223372036854775807, -9223372036854775808, -9223372036854775808]> : tensor<3xi64>
+   %1 = pphlo.constant dense<[0, 9223372036854775807, -9223372036854775808]> : tensor<3xi64>
+   %2 = pphlo.convert %0 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %3 = pphlo.convert %1 : (tensor<3xi64>)->tensor<3x!pphlo.secret<i64>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<i64>>,tensor<3x!pphlo.secret<i64>>)->tensor<3x!pphlo.secret<i64>>
+   %5 = pphlo.constant dense<[9223372036854775807, -1, 0]> : tensor<3xi64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<i64>>)->tensor<3xi64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xi64>, tensor<3xi64>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui64_ui64_pp() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.constant dense<18446744073709551615> : tensor<3xui64>
+   %2 = pphlo.xor %0,%1 : (tensor<3xui64>,tensor<3xui64>)->tensor<3xui64>
+   %3 = pphlo.constant dense<[18446744073709551615, 9223372036854775808, 0]> : tensor<3xui64>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_ui64_ui64_ss() {
+   %0 = pphlo.constant dense<[0, 9223372036854775807, 18446744073709551615]> : tensor<3xui64>
+   %1 = pphlo.constant dense<18446744073709551615> : tensor<3xui64>
+   %2 = pphlo.convert %0 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %3 = pphlo.convert %1 : (tensor<3xui64>)->tensor<3x!pphlo.secret<ui64>>
+   %4 = pphlo.xor %2, %3 : (tensor<3x!pphlo.secret<ui64>>,tensor<3x!pphlo.secret<ui64>>)->tensor<3x!pphlo.secret<ui64>>
+   %5 = pphlo.constant dense<[18446744073709551615, 9223372036854775808, 0]> : tensor<3xui64>
+   %6 = pphlo.convert %4 : (tensor<3x!pphlo.secret<ui64>>)->tensor<3xui64>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<3xui64>, tensor<3xui64>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.xor %0,%1 : (tensor<4xi1>,tensor<4xi1>)->tensor<4xi1>
+   %3 = pphlo.constant dense<[false, true, true, false]> : tensor<4xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<[false, false, true, true]> : tensor<4xi1>
+   %1 = pphlo.constant dense<[false, true, false, true]> : tensor<4xi1>
+   %2 = pphlo.convert %0 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<4xi1>)->tensor<4x!pphlo.secret<i1>>
+   %4 = pphlo.xor %2, %3 : (tensor<4x!pphlo.secret<i1>>,tensor<4x!pphlo.secret<i1>>)->tensor<4x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true, true, false]> : tensor<4xi1>
+   %6 = pphlo.convert %4 : (tensor<4x!pphlo.secret<i1>>)->tensor<4xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<4xi1>, tensor<4xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.xor %0,%1 : (tensor<2xi1>,tensor<2xi1>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %4 = pphlo.xor %2, %3 : (tensor<2x!pphlo.secret<i1>>,tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i1_i1_pp() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.xor %0,%1 : (tensor<2xi1>,tensor<2xi1>)->tensor<2xi1>
+   %3 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   pphlo.custom_call @expect_eq(%2, %3) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
+
+// -----
+
+func.func @xor_op_test_i1_i1_ss() {
+   %0 = pphlo.constant dense<false> : tensor<2xi1>
+   %1 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %2 = pphlo.convert %0 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %3 = pphlo.convert %1 : (tensor<2xi1>)->tensor<2x!pphlo.secret<i1>>
+   %4 = pphlo.xor %2, %3 : (tensor<2x!pphlo.secret<i1>>,tensor<2x!pphlo.secret<i1>>)->tensor<2x!pphlo.secret<i1>>
+   %5 = pphlo.constant dense<[false, true]> : tensor<2xi1>
+   %6 = pphlo.convert %4 : (tensor<2x!pphlo.secret<i1>>)->tensor<2xi1>
+   pphlo.custom_call @expect_eq(%5, %6) : (tensor<2xi1>, tensor<2xi1>)->()
+   func.return
+}
diff --git a/libspu/compiler/tests/lit.cfg.py b/libspu/compiler/tests/lit.cfg.py
index 5b984307..325b2c60 100644
--- a/libspu/compiler/tests/lit.cfg.py
+++ b/libspu/compiler/tests/lit.cfg.py
@@ -27,10 +27,7 @@
 config.suffixes = ['.mlir']
 config.test_source_root = os.path.dirname(__file__)
 # Make LLVM and StableHLO tools available in RUN directives
-tools = [
-    'FileCheck',
-    'pphlo-opt',
-]
+tools = ['FileCheck', 'spu-opt', 'spu-translate']
 tool_dirs = [
     config.llvm_tools_dir,
     config.pphlo_tools_dir,
diff --git a/libspu/compiler/tests/optimizations/decompose_minmax.mlir b/libspu/compiler/tests/optimizations/decompose_minmax.mlir
deleted file mode 100644
index e81f0f06..00000000
--- a/libspu/compiler/tests/optimizations/decompose_minmax.mlir
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: pphlo-opt --decompose-minmax --split-input-file %s | FileCheck %s
-
-func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
-    //CHECK: %0 = pphlo.greater %arg0, %arg1 : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xi1>
-    //CHECK: %1 = pphlo.select %0, %arg0, %arg1 : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
-    %0 = pphlo.maximum %arg0, %arg1 : tensor<2x2xf32>
-    return %0 : tensor<2x2xf32>
-}
-
-// -----
-
-func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
-    //CHECK: %0 = pphlo.less %arg0, %arg1 : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xi1>
-    //CHECK: %1 = pphlo.select %0, %arg0, %arg1 : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
-    %0 = pphlo.minimum %arg0, %arg1 : tensor<2x2xf32>
-    return %0 : tensor<2x2xf32>
-}
diff --git a/libspu/compiler/tests/optimizations/expand_secret_gather.mlir b/libspu/compiler/tests/optimizations/expand_secret_gather.mlir
deleted file mode 100644
index 6970ff21..00000000
--- a/libspu/compiler/tests/optimizations/expand_secret_gather.mlir
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: pphlo-opt --expand-secret-gather --split-input-file %s | FileCheck %s
-func.func @main(%arg0: tensor<2xi32>, %arg1: tensor<1x!pphlo.secret<i32>>) -> (tensor<!pphlo.secret<i32>>) {
-    //CHECK-NOT: pphlo.gather
-    //CHECK : pphlo.while
-    %0 = pphlo.custom_call @pphlo.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 0>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 0 : i64, slice_sizes = array<i64: 1>}} : (tensor<2xi32>, tensor<1x!pphlo.secret<i32>>) -> tensor<!pphlo.secret<i32>>
-    return %0: tensor<!pphlo.secret<i32>>
-}
-// -----
-func.func @main(%arg0: tensor<3x3xi32>, %arg1: tensor<2x!pphlo.secret<i32>>) -> (tensor<2x3x!pphlo.secret<i32>>) {
-     //CHECK-NOT: pphlo.gather
-    //CHECK : pphlo.while
-   %0 = pphlo.custom_call @pphlo.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 1>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 1 : i64, slice_sizes = array<i64: 1, 3>}} : (tensor<3x3xi32>, tensor<2x!pphlo.secret<i32>>) -> tensor<2x3x!pphlo.secret<i32>>
-    return %0 : tensor<2x3x!pphlo.secret<i32>>
-}
\ No newline at end of file
diff --git a/libspu/compiler/tests/optimizations/no_expand_secret_gather.mlir b/libspu/compiler/tests/optimizations/no_expand_secret_gather.mlir
deleted file mode 100644
index 19634168..00000000
--- a/libspu/compiler/tests/optimizations/no_expand_secret_gather.mlir
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: pphlo-opt --expand-secret-gather --split-input-file %s | FileCheck %s
-func.func @main(%arg0: tensor<2x!pphlo.secret<i32>>, %arg1: tensor<1xi32>) -> (tensor<!pphlo.secret<i32>>) {
-    //CHECK-NOT: pphlo.while
-    //CHECK : pphlo.gather
-    %0 = pphlo.custom_call @pphlo.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 0>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 0 : i64, slice_sizes = array<i64: 1>}} : (tensor<2x!pphlo.secret<i32>>, tensor<1xi32>) -> tensor<!pphlo.secret<i32>>
-    return %0: tensor<!pphlo.secret<i32>>
-}
-
-// -----
-func.func @main(%arg0: tensor<3x3xi32>, %arg1: tensor<2xi32>) -> (tensor<2x3xi32>) {
-    //CHECK-NOT: pphlo.while
-    //CHECK : pphlo.gather
-   %0 = pphlo.custom_call @pphlo.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 1>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 1 : i64, slice_sizes = array<i64: 1, 3>}} : (tensor<3x3xi32>, tensor<2xi32>) -> tensor<2x3xi32>
-    return %0 : tensor<2x3xi32>
-}
\ No newline at end of file
diff --git a/libspu/compiler/tests/hlo2pphlo/binary_ops_pp.mlir b/libspu/compiler/tests/passes/hlo2pphlo/binary_ops_pp.mlir
similarity index 94%
rename from libspu/compiler/tests/hlo2pphlo/binary_ops_pp.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/binary_ops_pp.mlir
index d5b99077..49c123ac 100644
--- a/libspu/compiler/tests/hlo2pphlo/binary_ops_pp.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/binary_ops_pp.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi32>) {
     // CHECK: pphlo.subtract %arg0, %arg1 : tensor<2x2xi32>
diff --git a/libspu/compiler/tests/hlo2pphlo/binary_ops_ps.mlir b/libspu/compiler/tests/passes/hlo2pphlo/binary_ops_ps.mlir
similarity index 95%
rename from libspu/compiler/tests/hlo2pphlo/binary_ops_ps.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/binary_ops_ps.mlir
index 9067c80f..17037e40 100644
--- a/libspu/compiler/tests/hlo2pphlo/binary_ops_ps.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/binary_ops_ps.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi32>) {
     // CHECK: pphlo.subtract %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2x!pphlo.secret<i32>>) -> tensor<2x2x!pphlo.secret<i32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/binary_ops_ss.mlir b/libspu/compiler/tests/passes/hlo2pphlo/binary_ops_ss.mlir
similarity index 95%
rename from libspu/compiler/tests/hlo2pphlo/binary_ops_ss.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/binary_ops_ss.mlir
index 266e9643..753d8bcc 100644
--- a/libspu/compiler/tests/hlo2pphlo/binary_ops_ss.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/binary_ops_ss.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi32>) {
     // CHECK: pphlo.subtract %arg0, %arg1 : tensor<2x2x!pphlo.secret<i32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/comparison_pp.mlir b/libspu/compiler/tests/passes/hlo2pphlo/comparison_pp.mlir
similarity index 94%
rename from libspu/compiler/tests/hlo2pphlo/comparison_pp.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/comparison_pp.mlir
index 3e827f98..2fa07dd0 100644
--- a/libspu/compiler/tests/hlo2pphlo/comparison_pp.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/comparison_pp.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi1>) {
     // CHECK: pphlo.equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
diff --git a/libspu/compiler/tests/hlo2pphlo/comparison_ps.mlir b/libspu/compiler/tests/passes/hlo2pphlo/comparison_ps.mlir
similarity index 95%
rename from libspu/compiler/tests/hlo2pphlo/comparison_ps.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/comparison_ps.mlir
index b950bb9c..b9a4ea31 100644
--- a/libspu/compiler/tests/hlo2pphlo/comparison_ps.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/comparison_ps.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi1>) {
     // CHECK: %[[COMP:.+]] = pphlo.equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2x!pphlo.secret<i32>>) -> tensor<2x2x!pphlo.secret<i1>>
diff --git a/libspu/compiler/tests/hlo2pphlo/comparison_ss.mlir b/libspu/compiler/tests/passes/hlo2pphlo/comparison_ss.mlir
similarity index 95%
rename from libspu/compiler/tests/hlo2pphlo/comparison_ss.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/comparison_ss.mlir
index 28a1a7dd..afbd9928 100644
--- a/libspu/compiler/tests/hlo2pphlo/comparison_ss.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/comparison_ss.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi1>) {
     // CHECK: %[[COMP:.+]] = pphlo.equal %arg0, %arg1 : (tensor<2x2x!pphlo.secret<i32>>, tensor<2x2x!pphlo.secret<i32>>) -> tensor<2x2x!pphlo.secret<i1>>
diff --git a/libspu/compiler/tests/hlo2pphlo/complex_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/complex_p.mlir
similarity index 82%
rename from libspu/compiler/tests/hlo2pphlo/complex_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/complex_p.mlir
index e8b96359..abcbe68a 100644
--- a/libspu/compiler/tests/hlo2pphlo/complex_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/complex_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<3xcomplex<f32>>) -> tensor<3xcomplex<f32>> {
     // CHECK: %[[REAL:.+]] = pphlo.real %[[ARG0:.+]] : (tensor<3xcomplex<f32>>) -> tensor<3xf32>
diff --git a/libspu/compiler/tests/hlo2pphlo/complex_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/complex_s.mlir
similarity index 85%
rename from libspu/compiler/tests/hlo2pphlo/complex_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/complex_s.mlir
index 10936db9..b2f58576 100644
--- a/libspu/compiler/tests/hlo2pphlo/complex_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/complex_s.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<3xcomplex<f32>>) -> tensor<3xcomplex<f32>> {
     // CHECK: %[[REAL:.+]] = pphlo.real %[[ARG0:.+]] : (tensor<3x!pphlo.secret<complex<f32>>>) -> tensor<3x!pphlo.secret<f32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/conditional_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/conditional_p.mlir
similarity index 91%
rename from libspu/compiler/tests/hlo2pphlo/conditional_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/conditional_p.mlir
index a7e1d66e..79c025b9 100644
--- a/libspu/compiler/tests/hlo2pphlo/conditional_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/conditional_p.mlir
@@ -1,5 +1,5 @@
 
-// RUN: pphlo-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
   //CHECK: %2 = "pphlo.if"(%1) ({
diff --git a/libspu/compiler/tests/hlo2pphlo/conditional_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/conditional_s.mlir
similarity index 93%
rename from libspu/compiler/tests/hlo2pphlo/conditional_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/conditional_s.mlir
index 0b883492..b54d4dc2 100644
--- a/libspu/compiler/tests/hlo2pphlo/conditional_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/conditional_s.mlir
@@ -1,5 +1,5 @@
 
-// RUN: pphlo-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
   //CHECK: %2 = "pphlo.if"(%1) ({
diff --git a/libspu/compiler/tests/hlo2pphlo/dynamic_slice.mlir b/libspu/compiler/tests/passes/hlo2pphlo/dynamic_slice.mlir
similarity index 73%
rename from libspu/compiler/tests/hlo2pphlo/dynamic_slice.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/dynamic_slice.mlir
index 16695333..9f0feb22 100644
--- a/libspu/compiler/tests/hlo2pphlo/dynamic_slice.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/dynamic_slice.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<15xi32>,%arg1: tensor<i32>) -> (tensor<1xi32>) {
     // CHECK:  %0 = pphlo.dynamic_slice %arg0, %arg1 sizes = [1] : (tensor<15xi32>, tensor<!pphlo.secret<i32>>) -> tensor<1x!pphlo.secret<i32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/empty_function.mlir b/libspu/compiler/tests/passes/hlo2pphlo/empty_function.mlir
similarity index 64%
rename from libspu/compiler/tests/hlo2pphlo/empty_function.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/empty_function.mlir
index 4f084d0d..bc8e4bf8 100644
--- a/libspu/compiler/tests/hlo2pphlo/empty_function.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/empty_function.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<10xf64>, %arg1: tensor<10xf64>) -> (tensor<10xf64>, tensor<10xf64>) {
     // CHECK: return %arg0, %arg1 : tensor<10x!pphlo.secret<f64>>, tensor<10x!pphlo.secret<f64>>
diff --git a/libspu/compiler/tests/hlo2pphlo/gather_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/gather_p.mlir
similarity index 66%
rename from libspu/compiler/tests/hlo2pphlo/gather_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/gather_p.mlir
index a8e6e8ff..f40272cd 100644
--- a/libspu/compiler/tests/hlo2pphlo/gather_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/gather_p.mlir
@@ -1,7 +1,7 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main() -> tensor<2x3x2x2xi64> {
-  // CHECK: pphlo.custom_call @pphlo.gather(%0, %1) {pphlo.attributes = {collapsed_slice_dims = array<i64: 0>, index_vector_dim = 2 : i64, offset_dims = array<i64: 2, 3>, slice_sizes = array<i64: 1, 2, 2>, start_index_map = array<i64: 1, 0>}} : (tensor<3x4x2xi64>, tensor<2x3x2xi64>) -> tensor<2x3x2x2xi64>
+  // CHECK: pphlo.custom_call @spu.gather(%0, %1) {pphlo.attributes = {collapsed_slice_dims = array<i64: 0>, index_vector_dim = 2 : i64, offset_dims = array<i64: 2, 3>, slice_sizes = array<i64: 1, 2, 2>, start_index_map = array<i64: 1, 0>}} : (tensor<3x4x2xi64>, tensor<2x3x2xi64>) -> tensor<2x3x2x2xi64>
   %operand = stablehlo.constant dense<[[[1, 2], [3, 4], [5, 6], [7, 8]],
                                        [[9, 10], [11, 12], [13, 14], [15, 16]],
                                        [[17, 18], [19, 20], [21, 22], [23, 24]]]> : tensor<3x4x2xi64>
diff --git a/libspu/compiler/tests/passes/hlo2pphlo/gather_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/gather_s.mlir
new file mode 100644
index 00000000..3d4050c1
--- /dev/null
+++ b/libspu/compiler/tests/passes/hlo2pphlo/gather_s.mlir
@@ -0,0 +1,15 @@
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+
+func.func @main(%arg0 : tensor<3x4x2xi64>, %arg1 : tensor<2x3x2xi64>) -> tensor<2x3x2x2xi64> {
+  // CHECK: pphlo.custom_call @spu.gather(%arg0, %arg1) {pphlo.attributes = {collapsed_slice_dims = array<i64: 0>, index_vector_dim = 2 : i64, offset_dims = array<i64: 2, 3>, slice_sizes = array<i64: 1, 2, 2>, start_index_map = array<i64: 1, 0>}} : (tensor<3x4x2x!pphlo.secret<i64>>, tensor<2x3x2x!pphlo.secret<i64>>) -> tensor<2x3x2x2x!pphlo.secret<i64>>
+  %result = "stablehlo.gather"(%arg0, %arg1) {
+    dimension_numbers = #stablehlo.gather<
+      offset_dims = [2, 3],
+      collapsed_slice_dims = [0],
+      start_index_map = [1, 0],
+      index_vector_dim = 2>,
+    slice_sizes = array<i64: 1, 2, 2>,
+    indices_are_sorted = false
+  } : (tensor<3x4x2xi64>, tensor<2x3x2xi64>) -> tensor<2x3x2x2xi64>
+  return %result : tensor<2x3x2x2xi64>
+}
diff --git a/libspu/compiler/tests/hlo2pphlo/nullary_ops.mlir b/libspu/compiler/tests/passes/hlo2pphlo/nullary_ops.mlir
similarity index 94%
rename from libspu/compiler/tests/hlo2pphlo/nullary_ops.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/nullary_ops.mlir
index fe0df5ce..3730fab0 100644
--- a/libspu/compiler/tests/hlo2pphlo/nullary_ops.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/nullary_ops.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo --split-input-file %s | FileCheck %s
 
 func.func @main() -> (tensor<2x2xi1>) {
     // CHECK: pphlo.constant dense<true> : tensor<2x2xi1>
diff --git a/libspu/compiler/tests/hlo2pphlo/reduce_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/reduce_p.mlir
similarity index 93%
rename from libspu/compiler/tests/hlo2pphlo/reduce_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/reduce_p.mlir
index b6309fc4..4126b8ce 100644
--- a/libspu/compiler/tests/hlo2pphlo/reduce_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/reduce_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 // CHECK: func @main(%arg0: tensor<1024x1xf32>) -> tensor<1024xf32> {
 func.func @main(%arg1: tensor<1024x1xf32>) -> (tensor<1024xf32>) {
diff --git a/libspu/compiler/tests/hlo2pphlo/reduce_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/reduce_s.mlir
similarity index 93%
rename from libspu/compiler/tests/hlo2pphlo/reduce_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/reduce_s.mlir
index c9b1f2d0..57444b14 100644
--- a/libspu/compiler/tests/hlo2pphlo/reduce_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/reduce_s.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg1: tensor<1024x1xf32>) -> (tensor<1024xf32>) {
     %0 = "stablehlo.constant"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
diff --git a/libspu/compiler/tests/hlo2pphlo/select_and_scatter.mlir b/libspu/compiler/tests/passes/hlo2pphlo/select_and_scatter.mlir
similarity index 94%
rename from libspu/compiler/tests/hlo2pphlo/select_and_scatter.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/select_and_scatter.mlir
index 01e04f67..ac7492bc 100644
--- a/libspu/compiler/tests/hlo2pphlo/select_and_scatter.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/select_and_scatter.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<128x5x5x32xf32>, %arg1: tensor<128x4x4x32xf32>, %arg2: tensor<f32>) -> tensor<128x5x5x32xf32>   {
   // CHECK: %1 = "pphlo.select_and_scatter"(%arg0, %arg1, %0) ({
diff --git a/libspu/compiler/tests/hlo2pphlo/shape_ops.mlir b/libspu/compiler/tests/passes/hlo2pphlo/shape_ops.mlir
similarity index 92%
rename from libspu/compiler/tests/hlo2pphlo/shape_ops.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/shape_ops.mlir
index 1a6aa553..8bd36dcc 100644
--- a/libspu/compiler/tests/hlo2pphlo/shape_ops.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/shape_ops.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<16xf32>) -> (tensor<1024x16xf32>) {
     // CHECK: %0 = pphlo.broadcast %arg0, dims = [1] : (tensor<16xf32>) -> tensor<1024x16xf32>
diff --git a/libspu/compiler/tests/hlo2pphlo/sort_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/sort_p.mlir
similarity index 87%
rename from libspu/compiler/tests/hlo2pphlo/sort_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/sort_p.mlir
index e365843f..2224ee7d 100644
--- a/libspu/compiler/tests/hlo2pphlo/sort_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/sort_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<20xi32>) -> (tensor<20xi32>) {
     %0 = stablehlo.iota dim = 0 : tensor<20xi32>
diff --git a/libspu/compiler/tests/hlo2pphlo/sort_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/sort_s.mlir
similarity index 89%
rename from libspu/compiler/tests/hlo2pphlo/sort_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/sort_s.mlir
index a783ca2e..c4d4bd41 100644
--- a/libspu/compiler/tests/hlo2pphlo/sort_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/sort_s.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<20xi32>) -> (tensor<20xi32>) {
     %0 = stablehlo.iota dim = 0 : tensor<20xi32>
diff --git a/libspu/compiler/tests/hlo2pphlo/tenary_ops_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_p.mlir
similarity index 84%
rename from libspu/compiler/tests/hlo2pphlo/tenary_ops_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_p.mlir
index eb7baba1..2622cf84 100644
--- a/libspu/compiler/tests/hlo2pphlo/tenary_ops_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<1024x1xi1>, %arg1: tensor<1024x1xf32>, %arg2: tensor<1024x1xf32>) -> (tensor<1024x1xf32>) {
     // CHECK: pphlo.select %arg0, %arg1, %arg2 : (tensor<1024x1xi1>, tensor<1024x1xf32>, tensor<1024x1xf32>) -> tensor<1024x1xf32>
diff --git a/libspu/compiler/tests/hlo2pphlo/tenary_ops_s_1.mlir b/libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_s_1.mlir
similarity index 85%
rename from libspu/compiler/tests/hlo2pphlo/tenary_ops_s_1.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_s_1.mlir
index ad8757cd..1160bda6 100644
--- a/libspu/compiler/tests/hlo2pphlo/tenary_ops_s_1.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_s_1.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<1024x1xi1>, %arg1: tensor<1024x1xf32>, %arg2: tensor<1024x1xf32>) -> (tensor<1024x1xf32>) {
     // CHECK:pphlo.select %arg0, %arg1, %arg2 : (tensor<1024x1x!pphlo.secret<i1>>, tensor<1024x1xf32>, tensor<1024x1xf32>) -> tensor<1024x1x!pphlo.secret<f32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/tenary_ops_s_2.mlir b/libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_s_2.mlir
similarity index 85%
rename from libspu/compiler/tests/hlo2pphlo/tenary_ops_s_2.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_s_2.mlir
index 479aa3fc..a123022b 100644
--- a/libspu/compiler/tests/hlo2pphlo/tenary_ops_s_2.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/tenary_ops_s_2.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_SECRET,VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<1024x1xi1>, %arg1: tensor<1024x1xf32>, %arg2: tensor<1024x1xf32>) -> (tensor<1024x1xf32>) {
     // CHECK:pphlo.select %arg0, %arg1, %arg2 : (tensor<1024x1xi1>, tensor<1024x1x!pphlo.secret<f32>>, tensor<1024x1xf32>) -> tensor<1024x1x!pphlo.secret<f32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/unary_ops_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/unary_ops_p.mlir
similarity index 95%
rename from libspu/compiler/tests/hlo2pphlo/unary_ops_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/unary_ops_p.mlir
index 259fcae8..f93fed67 100644
--- a/libspu/compiler/tests/hlo2pphlo/unary_ops_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/unary_ops_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>) -> (tensor<2x2xf32>) {
     // CHECK: pphlo.convert %arg0 : (tensor<2x2xi32>) -> tensor<2x2xf32>
diff --git a/libspu/compiler/tests/hlo2pphlo/unary_ops_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/unary_ops_s.mlir
similarity index 95%
rename from libspu/compiler/tests/hlo2pphlo/unary_ops_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/unary_ops_s.mlir
index 457ca25d..128c31ed 100644
--- a/libspu/compiler/tests/hlo2pphlo/unary_ops_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/unary_ops_s.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>) -> (tensor<2x2xf32>) {
     // CHECK: pphlo.convert %arg0 : (tensor<2x2x!pphlo.secret<i32>>) -> tensor<2x2x!pphlo.secret<f32>>
diff --git a/libspu/compiler/tests/hlo2pphlo/vreduce_mixed.mlir b/libspu/compiler/tests/passes/hlo2pphlo/vreduce_mixed.mlir
similarity index 91%
rename from libspu/compiler/tests/hlo2pphlo/vreduce_mixed.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/vreduce_mixed.mlir
index de0e8f77..8873e3c1 100644
--- a/libspu/compiler/tests/hlo2pphlo/vreduce_mixed.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/vreduce_mixed.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<1024x1xf32>, %arg1: tensor<1024x1xf32>) -> (tensor<1024xf32>, tensor<1024xf32>) {
     %0 = "stablehlo.constant"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
diff --git a/libspu/compiler/tests/hlo2pphlo/vreduce_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/vreduce_p.mlir
similarity index 90%
rename from libspu/compiler/tests/hlo2pphlo/vreduce_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/vreduce_p.mlir
index d957a380..28da8bc4 100644
--- a/libspu/compiler/tests/hlo2pphlo/vreduce_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/vreduce_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC,VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<1024x1xf32>, %arg1: tensor<1024x1xf32>) -> (tensor<1024xf32>, tensor<1024xf32>) {
     %0 = "stablehlo.constant"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
diff --git a/libspu/compiler/tests/hlo2pphlo/vreduce_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/vreduce_s.mlir
similarity index 92%
rename from libspu/compiler/tests/hlo2pphlo/vreduce_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/vreduce_s.mlir
index 0d74589e..5c48d647 100644
--- a/libspu/compiler/tests/hlo2pphlo/vreduce_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/vreduce_s.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt --hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET,VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<1024x1xf32>, %arg1: tensor<1024x1xf32>) -> (tensor<1024xf32>, tensor<1024xf32>) {
     %0 = "stablehlo.constant"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
diff --git a/libspu/compiler/tests/hlo2pphlo/while_p.mlir b/libspu/compiler/tests/passes/hlo2pphlo/while_p.mlir
similarity index 87%
rename from libspu/compiler/tests/hlo2pphlo/while_p.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/while_p.mlir
index 8ca9c1ee..29259de0 100644
--- a/libspu/compiler/tests/hlo2pphlo/while_p.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/while_p.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_PUBLIC --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<i64>) -> tensor<i64> {
   //CHECK: %0 = pphlo.while(%arg1 = %arg0) : tensor<i64>
diff --git a/libspu/compiler/tests/hlo2pphlo/while_s.mlir b/libspu/compiler/tests/passes/hlo2pphlo/while_s.mlir
similarity index 88%
rename from libspu/compiler/tests/hlo2pphlo/while_s.mlir
rename to libspu/compiler/tests/passes/hlo2pphlo/while_s.mlir
index d5ba277a..166db0fa 100644
--- a/libspu/compiler/tests/hlo2pphlo/while_s.mlir
+++ b/libspu/compiler/tests/passes/hlo2pphlo/while_s.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
+// RUN: spu-opt -hlo-legalize-to-pphlo=input_vis_list=VIS_SECRET --lower-conversion-cast %s --split-input-file  | FileCheck %s
 
 func.func @main(%arg0: tensor<i64>) -> tensor<i64> {
   //CHECK: %0 = pphlo.while(%arg1 = %arg0) : tensor<!pphlo.secret<i64>>
diff --git a/libspu/compiler/tests/passes/optimizations/canonicalize.mlir b/libspu/compiler/tests/passes/optimizations/canonicalize.mlir
new file mode 100644
index 00000000..416b354c
--- /dev/null
+++ b/libspu/compiler/tests/passes/optimizations/canonicalize.mlir
@@ -0,0 +1,98 @@
+// RUN: spu-opt --canonicalize --split-input-file %s | FileCheck %s
+
+// -----
+
+func.func @mul_fp_cf32(%arg0: tensor<2xf32>) -> (tensor<2xf32>) {
+    // CHECK: return %arg0 : tensor<2xf32>
+    %0 = pphlo.constant() {value = dense<1.000000e+00> : tensor<2xf32>} : () -> tensor<2xf32>
+    %1 = pphlo.multiply %arg0, %0 : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32>
+    return %1 : tensor<2xf32>
+}
+
+// -----
+
+func.func @mul_fp_ci32(%arg0: tensor<2xf32>) -> (tensor<2xf32>) {
+    // CHECK: return %arg0 : tensor<2xf32>
+    %0 = pphlo.constant() {value = dense<1> : tensor<2xi32>} : () -> tensor<2xi32>
+    %1 = pphlo.multiply %arg0, %0 : (tensor<2xf32>, tensor<2xi32>) -> tensor<2xf32>
+    return %1 : tensor<2xf32>
+}
+
+// -----
+
+func.func @mul_fp_ci32_ci32(%arg0: tensor<2xf32>) -> (tensor<2xf32>) {
+    // CHECK: %0 = pphlo.constant dense<3> : tensor<2xi32>
+    // CHECK: %1 = pphlo.multiply %arg0, %0 : (tensor<2xf32>, tensor<2xi32>) -> tensor<2xf32>
+    // CHECK: return %1 : tensor<2xf32>
+    %0 = pphlo.constant() {value = dense<1> : tensor<2xi32>} : () -> tensor<2xi32>
+    %1 = pphlo.constant() {value = dense<3> : tensor<2xi32>} : () -> tensor<2xi32>
+    %2 = pphlo.multiply %0, %1 : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
+    %3 = pphlo.multiply %arg0, %2 : (tensor<2xf32>, tensor<2xi32>) -> tensor<2xf32>
+    return %3 : tensor<2xf32>
+}
+
+// -----
+
+func.func @mul_fp_ci32_cf32(%arg0: tensor<2xf32>) -> (tensor<2xf32>) {
+    // CHECK: %0 = pphlo.constant dense<6.000000e+00> : tensor<2xf32>
+    // CHECK: %1 = pphlo.multiply %arg0, %0 : tensor<2xf32>
+    // CHECK: return %1 : tensor<2xf32>
+    %0 = pphlo.constant() {value = dense<2> : tensor<2xi32>} : () -> tensor<2xi32>
+    %1 = pphlo.constant() {value = dense<3.0> : tensor<2xf32>} : () -> tensor<2xf32>
+    %2 = pphlo.multiply %0, %1 : (tensor<2xi32>, tensor<2xf32>) -> tensor<2xf32>
+    %3 = pphlo.multiply %arg0, %2 : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32>
+    return %3 : tensor<2xf32>
+}
+
+// -----
+
+func.func @slice_full(%arg0: tensor<2xf32>) -> (tensor<2xf32>) {
+    //CHECK: return %arg0
+    %0 = pphlo.slice %arg0 [0:1:2] : (tensor<2xf32>) -> tensor<2xf32>
+    return %0 : tensor<2xf32>
+}
+
+// -----
+
+func.func @slice_full_1d(%arg0: tensor<2x2xf32>) -> (tensor<2x1xf32>) {
+    //CHECK: %0 = pphlo.slice
+    //CHECK: return %0
+    %0 = pphlo.slice %arg0 [0:1:2, 0:1:1] : (tensor<2x2xf32>) -> tensor<2x1xf32>
+    return %0 : tensor<2x1xf32>
+}
+
+// -----
+
+func.func @dot_vv(%arg0: tensor<2xf32>, %arg1: tensor<2xf32>) -> (tensor<f32>) {
+    //CHECK: %0 = pphlo.reshape %arg0 : (tensor<2xf32>) -> tensor<1x2xf32>
+    //CHECK: %1 = pphlo.reshape %arg1 : (tensor<2xf32>) -> tensor<2x1xf32>
+    //CHECK: %2 = pphlo.dot %0, %1 : (tensor<1x2xf32>, tensor<2x1xf32>) -> tensor<1x1xf32>
+    //CHECK: %3 = pphlo.reshape %2 : (tensor<1x1xf32>) -> tensor<f32>
+    %0 = pphlo.dot %arg0,%arg1 : (tensor<2xf32>, tensor<2xf32>) -> tensor<f32>
+    return %0 : tensor<f32>
+}
+
+// -----
+
+func.func @dot_mv(%arg0: tensor<2x2xf32>, %arg1: tensor<2xf32>) -> (tensor<2xf32>) {
+    //CHECK: %0 = pphlo.reshape %arg1 : (tensor<2xf32>) -> tensor<2x1xf32>
+    //CHECK: %1 = pphlo.dot %arg0, %0 : (tensor<2x2xf32>, tensor<2x1xf32>) -> tensor<2x1xf32>
+    //CHECK: %2 = pphlo.reshape %1 : (tensor<2x1xf32>) -> tensor<2xf32>
+    %0 = pphlo.dot %arg0,%arg1 : (tensor<2x2xf32>, tensor<2xf32>) -> tensor<2xf32>
+    return %0 : tensor<2xf32>
+}
+
+// -----
+
+func.func @main(%arg0: tensor<1x1x4xf32>, %arg1: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> {
+    //CHECK: %4 = pphlo.reshape %2 : (tensor<1x4x1xf32>) -> tensor<1x1x4x1xf32>
+    //CHECK: %5 = pphlo.reshape %3 : (tensor<3x1x1xf32>) -> tensor<1x3x1x1xf32>
+    //CHECK: %6 = pphlo.convolution(%4, %5) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [1, 1]} : (tensor<1x1x4x1xf32>, tensor<1x3x1x1xf32>) -> tensor<1x1x2x1xf32>
+    //CHECK: %7 = pphlo.reshape %6 : (tensor<1x1x2x1xf32>) -> tensor<1x2x1xf32>
+    %0 = pphlo.constant dense<0.000000e+00> : tensor<f32>
+    %1 = pphlo.pad %arg1, %0, low = [0, 0, 0], high = [0, 0, 0], interior = [0, 0, 1] : (tensor<1x1x2xf32>, tensor<f32>) -> tensor<1x1x3xf32>
+    %2 = pphlo.convolution(%arg0, %1)
+            dim_numbers = [b, f, 0]x[o, i, 0]->[b, f, 0],
+            window = {stride = [1]} : (tensor<1x1x4xf32>, tensor<1x1x3xf32>) -> tensor<1x1x2xf32>
+    return %2 : tensor<1x1x2xf32>
+}
diff --git a/libspu/compiler/tests/optimizations/convert_push_down.mlir b/libspu/compiler/tests/passes/optimizations/convert_push_down.mlir
similarity index 91%
rename from libspu/compiler/tests/optimizations/convert_push_down.mlir
rename to libspu/compiler/tests/passes/optimizations/convert_push_down.mlir
index 6c74942b..070768ee 100644
--- a/libspu/compiler/tests/optimizations/convert_push_down.mlir
+++ b/libspu/compiler/tests/passes/optimizations/convert_push_down.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --convert-push-down --cse --split-input-file %s | FileCheck %s
+// RUN: spu-opt --convert-push-down --cse --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<4xi32>, %arg1: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
     // CHECK: %0 = pphlo.reshape %arg0 : (tensor<4xi32>) -> tensor<2x2xi32>
diff --git a/libspu/compiler/tests/optimizations/decompose_comparison.mlir b/libspu/compiler/tests/passes/optimizations/decompose_ops.mlir
similarity index 52%
rename from libspu/compiler/tests/optimizations/decompose_comparison.mlir
rename to libspu/compiler/tests/passes/optimizations/decompose_ops.mlir
index dddea3f6..aab75100 100644
--- a/libspu/compiler/tests/optimizations/decompose_comparison.mlir
+++ b/libspu/compiler/tests/passes/optimizations/decompose_ops.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --decompose-comparison --cse --split-input-file %s | FileCheck %s
+// RUN: spu-opt --decompose-ops --canonicalize --cse --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi1>) {
     // CHECK: %0 = pphlo.equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
@@ -10,7 +10,7 @@ func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi
 // -----
 
 func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi1>) {
-    // CHECK: %0 = pphlo.greater %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
+    // CHECK: %0 = pphlo.less %arg1, %arg0 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
     // CHECK: %1 = pphlo.not %0 : tensor<2x2xi1>
     %0 = pphlo.less_equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
     return %0 : tensor<2x2xi1>
@@ -31,7 +31,7 @@ func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi
     //CHECK: %0 = pphlo.equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
     //CHECK: %1 = pphlo.not %0 : tensor<2x2xi1>
     //CHECK: %2 = pphlo.less %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
-    //CHECK: %3 = pphlo.greater %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
+    //CHECK: %3 = pphlo.less %arg1, %arg0 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
     //CHECK: %4 = pphlo.not %3 : tensor<2x2xi1>
     //CHECK: %5 = pphlo.not %2 : tensor<2x2xi1>
     %0 = pphlo.equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
@@ -42,3 +42,41 @@ func.func @main(%arg0: tensor<2x2xi32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xi
     %5 = pphlo.greater_equal %arg0, %arg1 : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi1>
     return %0, %1, %2, %3, %4, %5 : tensor<2x2xi1>, tensor<2x2xi1>, tensor<2x2xi1>, tensor<2x2xi1>, tensor<2x2xi1>, tensor<2x2xi1>
   }
+
+// -----
+
+func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
+    //CHECK: %0 = pphlo.less %arg1, %arg0 : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xi1>
+    //CHECK: %1 = pphlo.select %0, %arg0, %arg1 : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+    %0 = pphlo.maximum %arg0, %arg1 : tensor<2x2xf32>
+    return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
+    //CHECK: %0 = pphlo.less %arg0, %arg1 : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xi1>
+    //CHECK: %1 = pphlo.select %0, %arg0, %arg1 : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+    %0 = pphlo.minimum %arg0, %arg1 : tensor<2x2xf32>
+    return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
+    //CHECK: %0 = pphlo.negate %arg1 : tensor<2x2xf32>
+    //CHECK: %1 = pphlo.add %arg0, %0 : tensor<2x2xf32>
+    %0 = pphlo.subtract %arg0, %arg1 : tensor<2x2xf32>
+    return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>, %arg2: tensor<2x2xf32>) -> (tensor<2x2xf32>) {
+    //CHECK: %0 = pphlo.less %arg1, %arg0 : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xi1>
+    //CHECK: %1 = pphlo.select %0, %arg0, %arg1 : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+    //CHECK: %2 = pphlo.less %1, %arg2 : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xi1>
+    //CHECK: %3 = pphlo.select %2, %1, %arg2 : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+    %0 = pphlo.clamp %arg0, %arg1, %arg2 : tensor<2x2xf32>
+    return %0 : tensor<2x2xf32>
+}
diff --git a/libspu/compiler/tests/passes/optimizations/expand_secret_gather.mlir b/libspu/compiler/tests/passes/optimizations/expand_secret_gather.mlir
new file mode 100644
index 00000000..d46fa9c2
--- /dev/null
+++ b/libspu/compiler/tests/passes/optimizations/expand_secret_gather.mlir
@@ -0,0 +1,14 @@
+// RUN: spu-opt --expand-secret-gather --split-input-file %s | FileCheck %s
+func.func @main(%arg0: tensor<2xi32>, %arg1: tensor<1x!pphlo.secret<i32>>) -> (tensor<!pphlo.secret<i32>>) {
+    //CHECK-NOT: spu.gather
+    //CHECK : pphlo.while
+    %0 = pphlo.custom_call @spu.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 0>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 0 : i64, slice_sizes = array<i64: 1>}} : (tensor<2xi32>, tensor<1x!pphlo.secret<i32>>) -> tensor<!pphlo.secret<i32>>
+    return %0: tensor<!pphlo.secret<i32>>
+}
+// -----
+func.func @main(%arg0: tensor<3x3xi32>, %arg1: tensor<2x!pphlo.secret<i32>>) -> (tensor<2x3x!pphlo.secret<i32>>) {
+     //CHECK-NOT: spu.gather
+    //CHECK : pphlo.while
+   %0 = pphlo.custom_call @spu.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 1>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 1 : i64, slice_sizes = array<i64: 1, 3>}} : (tensor<3x3xi32>, tensor<2x!pphlo.secret<i32>>) -> tensor<2x3x!pphlo.secret<i32>>
+    return %0 : tensor<2x3x!pphlo.secret<i32>>
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/optimizations/lower_mixed_type_op.mlir b/libspu/compiler/tests/passes/optimizations/lower_mixed_type_op.mlir
similarity index 96%
rename from libspu/compiler/tests/optimizations/lower_mixed_type_op.mlir
rename to libspu/compiler/tests/passes/optimizations/lower_mixed_type_op.mlir
index 4b25fd51..1363272e 100644
--- a/libspu/compiler/tests/optimizations/lower_mixed_type_op.mlir
+++ b/libspu/compiler/tests/passes/optimizations/lower_mixed_type_op.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --lower-mixed-type-op --split-input-file %s | FileCheck %s
+// RUN: spu-opt --lower-mixed-type-op --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xi32>) -> (tensor<2x2xf32>) {
     //CHECK: %0 = pphlo.multiply %arg0, %arg1 : (tensor<2x2xf32>, tensor<2x2xi32>) -> tensor<2x2xf32>
diff --git a/libspu/compiler/tests/passes/optimizations/no_expand_secret_gather.mlir b/libspu/compiler/tests/passes/optimizations/no_expand_secret_gather.mlir
new file mode 100644
index 00000000..5f07808f
--- /dev/null
+++ b/libspu/compiler/tests/passes/optimizations/no_expand_secret_gather.mlir
@@ -0,0 +1,15 @@
+// RUN: spu-opt --expand-secret-gather --split-input-file %s | FileCheck %s
+func.func @main(%arg0: tensor<2x!pphlo.secret<i32>>, %arg1: tensor<1xi32>) -> (tensor<!pphlo.secret<i32>>) {
+    //CHECK-NOT: pphlo.while
+    //CHECK : spu.gather
+    %0 = pphlo.custom_call @spu.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 0>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 0 : i64, slice_sizes = array<i64: 1>}} : (tensor<2x!pphlo.secret<i32>>, tensor<1xi32>) -> tensor<!pphlo.secret<i32>>
+    return %0: tensor<!pphlo.secret<i32>>
+}
+
+// -----
+func.func @main(%arg0: tensor<3x3xi32>, %arg1: tensor<2xi32>) -> (tensor<2x3xi32>) {
+    //CHECK-NOT: pphlo.while
+    //CHECK : spu.gather
+   %0 = pphlo.custom_call @spu.gather(%arg0, %arg1) {pphlo.attributes = {offset_dims = array<i64: 1>, collapsed_slice_dims = array<i64: 0>, start_index_map = array<i64: 0>, index_vector_dim = 1 : i64, slice_sizes = array<i64: 1, 3>}} : (tensor<3x3xi32>, tensor<2xi32>) -> tensor<2x3xi32>
+    return %0 : tensor<2x3xi32>
+}
\ No newline at end of file
diff --git a/libspu/compiler/tests/optimizations/ops_negative.mlir b/libspu/compiler/tests/passes/optimizations/ops_negative.mlir
similarity index 98%
rename from libspu/compiler/tests/optimizations/ops_negative.mlir
rename to libspu/compiler/tests/passes/optimizations/ops_negative.mlir
index 2be57b73..926a5de5 100644
--- a/libspu/compiler/tests/optimizations/ops_negative.mlir
+++ b/libspu/compiler/tests/passes/optimizations/ops_negative.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt %s -verify-diagnostics -split-input-file
+// RUN: spu-opt %s -verify-diagnostics -split-input-file
 
 func.func @main() -> tensor<i32> {
   %0 = pphlo.constant dense<1.3347515E+38> : tensor<f32>
diff --git a/libspu/compiler/tests/optimizations/optimize_denominator_with_bcst.mlir b/libspu/compiler/tests/passes/optimizations/optimize_denominator_with_bcst.mlir
similarity index 84%
rename from libspu/compiler/tests/optimizations/optimize_denominator_with_bcst.mlir
rename to libspu/compiler/tests/passes/optimizations/optimize_denominator_with_bcst.mlir
index b362affd..d7772f02 100644
--- a/libspu/compiler/tests/optimizations/optimize_denominator_with_bcst.mlir
+++ b/libspu/compiler/tests/passes/optimizations/optimize_denominator_with_bcst.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --optimize-denominator-with-broadcast --split-input-file %s | FileCheck %s
+// RUN: spu-opt --optimize-denominator-with-broadcast --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<16x!pphlo.secret<f32>>, %arg1: tensor<16x10000x!pphlo.secret<f32>>) -> (tensor<16x10000x!pphlo.secret<f32>>) {
     //CHECK: %0 = pphlo.reciprocal %arg0
diff --git a/libspu/compiler/tests/optimizations/optimize_maxpool.mlir b/libspu/compiler/tests/passes/optimizations/optimize_maxpool.mlir
similarity index 97%
rename from libspu/compiler/tests/optimizations/optimize_maxpool.mlir
rename to libspu/compiler/tests/passes/optimizations/optimize_maxpool.mlir
index 035616c3..0026eaa0 100644
--- a/libspu/compiler/tests/optimizations/optimize_maxpool.mlir
+++ b/libspu/compiler/tests/passes/optimizations/optimize_maxpool.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --optimize-maxpool --split-input-file %s | FileCheck %s
+// RUN: spu-opt --optimize-maxpool --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<129x24x24x16x!pphlo.secret<f32>>, %arg1: tensor<129x23x23x16x!pphlo.secret<f32>>) -> (tensor<129x23x23x16x!pphlo.secret<f32>>, tensor<129x24x24x16x!pphlo.secret<f32>>) {
     %0 = pphlo.constant dense<0xFF800000> : tensor<f32>
diff --git a/libspu/compiler/tests/optimizations/optimize_select.mlir b/libspu/compiler/tests/passes/optimizations/optimize_select.mlir
similarity index 90%
rename from libspu/compiler/tests/optimizations/optimize_select.mlir
rename to libspu/compiler/tests/passes/optimizations/optimize_select.mlir
index 02e5dc12..b8578356 100644
--- a/libspu/compiler/tests/optimizations/optimize_select.mlir
+++ b/libspu/compiler/tests/passes/optimizations/optimize_select.mlir
@@ -1,10 +1,10 @@
-// RUN: pphlo-opt --optimize-select --split-input-file %s | FileCheck %s
+// RUN: spu-opt --optimize-select --split-input-file %s | FileCheck %s
 
 func.func @main() -> (tensor<f32>) {
     %0 = pphlo.constant dense<0xFF800000> : tensor<f32>
     %1 = pphlo.constant dense<1.000000e+00> : tensor<f32>
     %2 = pphlo.less %0, %1: (tensor<f32>, tensor<f32>) -> tensor<i1>
-    //CHECK-NOT: pphlo.prefer_a
+    //CHECK-NOT: spu.prefer_a
     %3 = pphlo.select %2, %0, %1: (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
     return %3: tensor<f32>
 }
@@ -15,7 +15,7 @@ func.func @main() -> (tensor<f32>, tensor<f32>) {
     %0 = pphlo.constant dense<0xFF800000> : tensor<f32>
     %1 = pphlo.constant dense<1.000000e+00> : tensor<f32>
     %2 = pphlo.less %0, %1: (tensor<f32>, tensor<f32>) -> tensor<i1>
-    //CHECK: pphlo.prefer_a
+    //CHECK: spu.prefer_a
     %3 = pphlo.select %2, %0, %1: (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
     %4 = pphlo.select %2, %1, %0: (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
     return %3, %4: tensor<f32>, tensor<f32>
@@ -27,7 +27,7 @@ func.func @main() -> (tensor<f32>, tensor<i1>) {
     %0 = pphlo.constant dense<0xFF800000> : tensor<f32>
     %1 = pphlo.constant dense<1.000000e+00> : tensor<f32>
     %2 = pphlo.less %0, %1: (tensor<f32>, tensor<f32>) -> tensor<i1>
-    //CHECK-NOT: pphlo.prefer_a
+    //CHECK-NOT: spu.prefer_a
     %3 = pphlo.select %2, %0, %1: (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
     %4 = pphlo.not %2: tensor<i1>
     return %3, %4: tensor<f32>, tensor<i1>
@@ -38,7 +38,7 @@ func.func @main() -> (tensor<f32>, tensor<i1>) {
 func.func @main(%arg0: tensor<i1>) -> (tensor<f32>, tensor<f32>) {
     %0 = pphlo.constant dense<0xFF800000> : tensor<f32>
     %1 = pphlo.constant dense<1.000000e+00> : tensor<f32>
-    //CHECK: pphlo.prefer_a
+    //CHECK: spu.prefer_a
     %2 = pphlo.select %arg0, %0, %1: (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
     %3 = pphlo.select %arg0, %1, %0: (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
     return %2, %3: tensor<f32>, tensor<f32>
diff --git a/libspu/compiler/tests/optimizations/optimize_sqrt_to_rsqrt.mlir b/libspu/compiler/tests/passes/optimizations/optimize_sqrt_to_rsqrt.mlir
similarity index 96%
rename from libspu/compiler/tests/optimizations/optimize_sqrt_to_rsqrt.mlir
rename to libspu/compiler/tests/passes/optimizations/optimize_sqrt_to_rsqrt.mlir
index ffcfce03..f590bcda 100644
--- a/libspu/compiler/tests/optimizations/optimize_sqrt_to_rsqrt.mlir
+++ b/libspu/compiler/tests/passes/optimizations/optimize_sqrt_to_rsqrt.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --optimize-sqrt-plus-eps --rewrite-div-sqrt-pattern --split-input-file %s | FileCheck %s
+// RUN: spu-opt --optimize-sqrt-plus-eps --rewrite-div-sqrt-pattern --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<f32>, %arg1: tensor<f32>) -> (tensor<f32>) {
     %0 = pphlo.constant dense<9.99999993E-9> : tensor<f32>
diff --git a/libspu/compiler/tests/optimizations/partial_sort_to_topk.mlir b/libspu/compiler/tests/passes/optimizations/partial_sort_to_topk.mlir
similarity index 98%
rename from libspu/compiler/tests/optimizations/partial_sort_to_topk.mlir
rename to libspu/compiler/tests/passes/optimizations/partial_sort_to_topk.mlir
index ccf8d05d..925c5152 100644
--- a/libspu/compiler/tests/optimizations/partial_sort_to_topk.mlir
+++ b/libspu/compiler/tests/passes/optimizations/partial_sort_to_topk.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --partial-sort-to-topk --split-input-file %s | FileCheck %s
+// RUN: spu-opt --partial-sort-to-topk --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<12x2x!pphlo.secret<i32>>) -> tensor<2x!pphlo.secret<f32>> {
     // CHECK: pphlo.custom_call @mhlo.topk(%10) {mhlo.attributes = {k = 5 : i64, k_hi = 7 : i64, largest = true, value_only = true}} : (tensor<2x12x!pphlo.secret<f32>>) -> tensor<2x7x!pphlo.secret<f32>>
diff --git a/libspu/compiler/tests/optimizations/pphlo_simple_dealloc.mlir b/libspu/compiler/tests/passes/optimizations/pphlo_simple_dealloc.mlir
similarity index 85%
rename from libspu/compiler/tests/optimizations/pphlo_simple_dealloc.mlir
rename to libspu/compiler/tests/passes/optimizations/pphlo_simple_dealloc.mlir
index 48e37f1a..9405013c 100644
--- a/libspu/compiler/tests/optimizations/pphlo_simple_dealloc.mlir
+++ b/libspu/compiler/tests/passes/optimizations/pphlo_simple_dealloc.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --insert-deallocation --split-input-file %s | FileCheck %s
+// RUN: spu-opt --insert-deallocation --split-input-file %s | FileCheck %s
 
 func.func @main() -> (tensor<f32>) {
     %0 = pphlo.constant dense<0xFF800000> : tensor<f32>
diff --git a/libspu/compiler/tests/optimizations/reduce_truncation.mlir b/libspu/compiler/tests/passes/optimizations/reduce_truncation.mlir
similarity index 89%
rename from libspu/compiler/tests/optimizations/reduce_truncation.mlir
rename to libspu/compiler/tests/passes/optimizations/reduce_truncation.mlir
index 5a65d16f..c8ae3f31 100644
--- a/libspu/compiler/tests/optimizations/reduce_truncation.mlir
+++ b/libspu/compiler/tests/passes/optimizations/reduce_truncation.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --reduce-truncation --split-input-file %s | FileCheck %s
+// RUN: spu-opt --reduce-truncation --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<31x1xf32>, %arg1: tensor<31x1x!pphlo.secret<f32>>) -> (tensor<31x1x!pphlo.secret<f32>>) {
     //CHECK: %[[MUL0:.*]] = pphlo.multiply %arg0, %arg0 : tensor<31x1xf32>
diff --git a/libspu/compiler/tests/optimizations/sort_lowering.mlir b/libspu/compiler/tests/passes/optimizations/sort_lowering.mlir
similarity index 95%
rename from libspu/compiler/tests/optimizations/sort_lowering.mlir
rename to libspu/compiler/tests/passes/optimizations/sort_lowering.mlir
index 2ddc6fcf..37d98fd1 100644
--- a/libspu/compiler/tests/optimizations/sort_lowering.mlir
+++ b/libspu/compiler/tests/passes/optimizations/sort_lowering.mlir
@@ -1,4 +1,4 @@
-// RUN: pphlo-opt --sort-lowering --split-input-file %s | FileCheck %s
+// RUN: spu-opt --sort-lowering --split-input-file %s | FileCheck %s
 
 func.func @main(%arg0: tensor<10x!pphlo.secret<f32>>) -> tensor<10x!pphlo.secret<f32>> {
     // CHECK: pphlo.simple_sort %arg0 ASC, dim = 0, num_keys = 1 : (tensor<10x!pphlo.secret<f32>>) -> tensor<10x!pphlo.secret<f32>>
diff --git a/libspu/compiler/tools/BUILD.bazel b/libspu/compiler/tools/BUILD.bazel
index b0f64a7d..6465c247 100644
--- a/libspu/compiler/tools/BUILD.bazel
+++ b/libspu/compiler/tools/BUILD.bazel
@@ -19,12 +19,12 @@ package(
 )
 
 spu_cc_binary(
-    name = "pphlo-opt",
+    name = "spu-opt",
     srcs = [
-        "pphlo-opt.cc",
+        "spu-opt.cc",
     ],
     deps = [
-        "//libspu/compiler/passes:all_passes",
+        "//libspu/dialect/pphlo/transforms:all_passes",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:FuncExtensions",
         "@llvm-project//mlir:IR",
@@ -36,12 +36,12 @@ spu_cc_binary(
 )
 
 spu_cc_binary(
-    name = "pphlo-lsp",
+    name = "spu-lsp",
     srcs = [
-        "pphlo-lsp.cc",
+        "spu-lsp.cc",
     ],
     deps = [
-        "//libspu/dialect/pphlo:dialect",
+        "//libspu/dialect/pphlo/IR:dialect",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:FuncExtensions",
         "@llvm-project//mlir:IR",
@@ -50,3 +50,26 @@ spu_cc_binary(
         "@xla//xla/mlir_hlo",
     ],
 )
+
+spu_cc_binary(
+    name = "spu-translate",
+    srcs = [
+        "spu-translate.cc",
+    ],
+    deps = [
+        "//libspu/compiler/common:compilation_context",
+        "//libspu/compiler/core",
+        "//libspu/compiler/utils",
+        "//libspu/device/pphlo:pphlo_executor",
+        "//libspu/dialect/pphlo/transforms:all_passes",
+        "//libspu/dialect/utils",
+        "//libspu/kernel:test_util",
+        "//libspu/mpc/utils:simulate",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:FuncExtensions",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Transforms",
+        "@llvm-project//mlir:TranslateLib",
+    ],
+)
diff --git a/libspu/compiler/tools/pphlo-lsp.cc b/libspu/compiler/tools/spu-lsp.cc
similarity index 96%
rename from libspu/compiler/tools/pphlo-lsp.cc
rename to libspu/compiler/tools/spu-lsp.cc
index 8b538285..0b9ddbd6 100644
--- a/libspu/compiler/tools/pphlo-lsp.cc
+++ b/libspu/compiler/tools/spu-lsp.cc
@@ -18,7 +18,7 @@
 #include "stablehlo/dialect/StablehloOps.h"
 #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h"
 
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
 
 int main(int argc, char **argv) {
   mlir::DialectRegistry registry;
diff --git a/libspu/compiler/tools/pphlo-opt.cc b/libspu/compiler/tools/spu-opt.cc
similarity index 93%
rename from libspu/compiler/tools/pphlo-opt.cc
rename to libspu/compiler/tools/spu-opt.cc
index 833f87a8..a28a4f26 100644
--- a/libspu/compiler/tools/pphlo-opt.cc
+++ b/libspu/compiler/tools/spu-opt.cc
@@ -20,8 +20,8 @@
 #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h"
 #include "xla/mlir_hlo/mhlo/transforms/passes.h"
 
-#include "libspu/compiler/passes/register_passes.h"
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
+#include "libspu/dialect/pphlo/transforms/register_passes.h"
 
 int main(int argc, char **argv) {
   mlir::registerTransformsPasses();
diff --git a/libspu/compiler/tools/spu-translate.cc b/libspu/compiler/tools/spu-translate.cc
new file mode 100644
index 00000000..a37bd465
--- /dev/null
+++ b/libspu/compiler/tools/spu-translate.cc
@@ -0,0 +1,259 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
+#include "mlir/Tools/mlir-translate/Translation.h"
+#include "mlir/Transforms/Passes.h"
+#include "stablehlo/dialect/StablehloOps.h"
+#include "xtensor/xio.hpp"
+
+#include "libspu/compiler/common/compilation_context.h"
+#include "libspu/compiler/utils/utils.h"
+#include "libspu/core/prelude.h"
+#include "libspu/device/pphlo/pphlo_executor.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
+#include "libspu/dialect/utils/utils.h"
+#include "libspu/kernel/test_util.h"
+#include "libspu/mpc/utils/simulate.h"
+
+#include "libspu/spu.pb.h"
+
+#define EXPOSE_PIPELINE_BUILDER
+#include "libspu/compiler/core/core.h"
+#undef EXPOSE_PIPELINE_BUILDER
+
+template <typename T>
+struct fmt::formatter<xt::xarray<T>> : ostream_formatter {};
+
+namespace mlir {
+
+namespace {
+
+void runPasses(ModuleOp module) {
+  ::spu::CompilerOptions options;
+  // ---- tweak compation options ---- //
+  // --------------------------------- //
+  ::spu::compiler::CompilationContext ccontext(options);
+
+  ::spu::compiler::Core c(&ccontext);
+  mlir::PassManager pm(module->getContext());
+
+  c.buildPipeline(&pm);
+
+  SPU_ENFORCE(pm.run(module).succeeded());
+
+  SPDLOG_INFO("IR\n {}", spu::mlirObjectToString(module));
+}
+
+template <typename T>
+void isEqual(const xt::xarray<T> &lhs, const xt::xarray<T> &rhs) {
+  SPDLOG_INFO("lhs = {}", lhs);
+  SPDLOG_INFO("rhs = {}", rhs);
+
+  auto error = lhs - rhs;
+
+  for (auto v : error) {
+    if (v != 0) {
+      llvm::report_fatal_error(fmt::format("Diff = {}", v).c_str());
+    }
+  }
+}
+
+bool testOpHandler(::spu::SPUContext *sctx, mlir::Operation *op,
+                   absl::Span<const ::spu::Value> inputs) {
+  auto callOp = mlir::dyn_cast<spu::pphlo::CustomCallOp>(op);
+  if (callOp.getCallTargetName() == "expect_almost_eq") {
+    ::spu::Value runtimeLhs = inputs[0];
+    ::spu::Value runtimeRhs = inputs[1];
+
+    if (!runtimeLhs.isPublic()) {
+      runtimeLhs = ::spu::kernel::hal::_s2p(sctx, runtimeLhs)
+                       .setDtype(runtimeLhs.dtype());
+    }
+
+    SPU_ENFORCE(runtimeRhs.isPublic());
+
+    auto lhs = ::spu::kernel::hal::dump_public_as<double>(sctx, runtimeLhs);
+    auto rhs = ::spu::kernel::hal::dump_public_as<double>(sctx, runtimeRhs);
+
+    SPDLOG_INFO("lhs = {}", lhs);
+    SPDLOG_INFO("rhs = {}", rhs);
+
+    double tol = 0.1;
+    if (auto tol_attr = callOp->getAttr("tol")) {
+      tol = mlir::dyn_cast<FloatAttr>(tol_attr).getValueAsDouble();
+    }
+
+    auto error = xt::fabs(lhs - rhs);
+
+    for (auto v : error) {
+      if (v > tol) {
+        llvm::report_fatal_error(
+            fmt::format("Diff {} greater than tol {}", v, tol).c_str());
+      }
+    }
+
+    return true;
+  }
+
+  if (callOp.getCallTargetName() == "expect_eq") {
+    ::spu::Value runtimeLhs = inputs[0];
+    ::spu::Value runtimeRhs = inputs[1];
+
+    if (!runtimeLhs.isPublic()) {
+      runtimeLhs = ::spu::kernel::hal::_s2p(sctx, runtimeLhs)
+                       .setDtype(runtimeLhs.dtype());
+    }
+
+    SPU_ENFORCE(runtimeRhs.isPublic());
+
+    auto it = mlir::dyn_cast<IntegerType>(
+        getElementTypeOrSelf(callOp->getOperand(1).getType()));
+    auto width = it.getWidth();
+    auto unsign = it.isUnsigned();
+
+    switch (width) {
+    case 1: {
+      auto lhs = ::spu::kernel::hal::dump_public_as<bool>(sctx, runtimeLhs);
+      auto rhs = ::spu::kernel::hal::dump_public_as<bool>(sctx, runtimeRhs);
+      isEqual(lhs, rhs);
+      break;
+    }
+    case 8: {
+      if (unsign) {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<uint8_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<uint8_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      } else {
+        auto lhs = ::spu::kernel::hal::dump_public_as<int8_t>(sctx, runtimeLhs);
+        auto rhs = ::spu::kernel::hal::dump_public_as<int8_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      }
+      break;
+    }
+    case 16: {
+      if (unsign) {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<uint16_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<uint16_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      } else {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<int16_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<int16_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      }
+      break;
+    }
+    case 32: {
+      if (unsign) {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<uint32_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<uint32_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      } else {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<int32_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<int32_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      }
+      break;
+    }
+    case 64: {
+      if (unsign) {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<uint64_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<uint64_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      } else {
+        auto lhs =
+            ::spu::kernel::hal::dump_public_as<int64_t>(sctx, runtimeLhs);
+        auto rhs =
+            ::spu::kernel::hal::dump_public_as<int64_t>(sctx, runtimeRhs);
+        isEqual(lhs, rhs);
+      }
+      break;
+    }
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+void evalModule(ModuleOp module) {
+  // Run passes
+  runPasses(module);
+
+  ::spu::RuntimeConfig conf;
+  conf.set_protocol(::spu::REF2K);
+  conf.set_field(::spu::FM64);
+  conf.set_enable_type_checker(true);
+
+  auto entry_function = spu::get_entrypoint(module);
+  SPU_ENFORCE(entry_function, "main module not found");
+
+  ::spu::device::pphlo::PPHloExecutor executor;
+  executor.setExtraIntrinsicHandler(testOpHandler);
+  ::spu::device::ExecutionOptions opts;
+
+  ::spu::mpc::utils::simulate(
+      1, [&](const std::shared_ptr<yacl::link::Context> &lctx) {
+        auto sctx = ::spu::kernel::test::makeSPUContext(conf, lctx);
+
+        runRegion(&executor, &sctx, nullptr, entry_function.getBody(), {},
+                  opts);
+        return;
+      });
+}
+
+} // namespace
+
+TranslateFromMLIRRegistration interpretRegistration(
+    "interpret", "Interpreter for SPU",
+    [](Operation *op, raw_ostream &os) -> LogicalResult {
+      auto module = mlir::dyn_cast<ModuleOp>(op);
+      evalModule(module);
+
+      return success();
+    },
+    [](DialectRegistry &registry) {
+      registry.insert<func::FuncDialect, stablehlo::StablehloDialect,
+                      spu::pphlo::PPHloDialect>();
+    });
+
+} //  namespace mlir
+
+int main(int argc, char **argv) {
+  return failed(
+      mlir::mlirTranslateMain(argc, argv, "SPU interpreter driver\n"));
+}
diff --git a/libspu/compiler/utils/BUILD.bazel b/libspu/compiler/utils/BUILD.bazel
new file mode 100644
index 00000000..f54fb36a
--- /dev/null
+++ b/libspu/compiler/utils/BUILD.bazel
@@ -0,0 +1,30 @@
+# Copyright 2024 Ant Group Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("//bazel:spu.bzl", "spu_cc_library")
+
+package(
+    default_visibility = ["//visibility:public"],
+    licenses = ["notice"],
+)
+
+spu_cc_library(
+    name = "utils",
+    srcs = ["utils.cc"],
+    hdrs = ["utils.h"],
+    deps = [
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:Support",
+    ],
+)
diff --git a/libspu/compiler/utils/utils.cc b/libspu/compiler/utils/utils.cc
new file mode 100644
index 00000000..00d17245
--- /dev/null
+++ b/libspu/compiler/utils/utils.cc
@@ -0,0 +1,27 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "libspu/compiler/utils/utils.h"
+
+#include "llvm/ADT/Twine.h"
+#include "spdlog/spdlog.h"
+
+namespace mlir::spu {
+
+mlir::LogicalResult argparser_error_handler(const llvm::Twine &msg) {
+  SPDLOG_ERROR(msg.str());
+  return mlir::failure();
+}
+
+} // namespace mlir::spu
diff --git a/libspu/compiler/utils/utils.h b/libspu/compiler/utils/utils.h
new file mode 100644
index 00000000..faeff8d9
--- /dev/null
+++ b/libspu/compiler/utils/utils.h
@@ -0,0 +1,23 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "mlir/Support/LogicalResult.h"
+
+namespace mlir::spu {
+
+mlir::LogicalResult argparser_error_handler(const llvm::Twine &msg);
+
+} // namespace mlir::spu
diff --git a/libspu/device/BUILD.bazel b/libspu/device/BUILD.bazel
index cf69d3b2..1a071162 100644
--- a/libspu/device/BUILD.bazel
+++ b/libspu/device/BUILD.bazel
@@ -57,29 +57,20 @@ spu_cc_library(
         "//libspu:spu_cc_proto",
         "//libspu/core:context",
         "//libspu/core:value",
-        "//libspu/dialect/pphlo:dialect",
+        "//libspu/dialect/pphlo/IR:dialect",
         "@llvm-project//mlir:IR",
     ],
 )
 
-spu_cc_library(
-    name = "debug_dump_constant",
-    srcs = [
-        "debug_dump_constant.cc",
-    ],
-    hdrs = [
-        "debug_dump_constant.h",
-    ],
-)
-
 spu_cc_library(
     name = "api",
     srcs = ["api.cc"],
     hdrs = ["api.h"],
     deps = [
-        ":debug_dump_constant",
         ":executor",
         "//libspu/device/pphlo:pphlo_executor",
+        "//libspu/device/utils:debug_dump_constant",
+        "//libspu/dialect/utils",
         "@llvm-project//mlir:FuncDialect",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Parser",
@@ -88,7 +79,6 @@ spu_cc_library(
 
 spu_cc_library(
     name = "test_utils",
-    testonly = True,
     hdrs = ["test_utils.h"],
     deps = [
         ":io",
@@ -108,3 +98,8 @@ spu_cc_library(
         "//libspu/core:value",
     ],
 )
+
+spu_cc_library(
+    name = "intrinsic_table",
+    hdrs = ["intrinsic_table.h"],
+)
diff --git a/libspu/device/api.cc b/libspu/device/api.cc
index 64f939bd..97077a93 100644
--- a/libspu/device/api.cc
+++ b/libspu/device/api.cc
@@ -26,8 +26,9 @@
 #include "spdlog/spdlog.h"
 
 #include "libspu/core/trace.h"
-#include "libspu/device/debug_dump_constant.h"
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/device/utils/debug_dump_constant.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
+#include "libspu/dialect/utils/utils.h"
 
 namespace spu::device {
 namespace {
@@ -289,7 +290,7 @@ void executeImpl(OpExecutor *executor, spu::SPUContext *sctx,
 
     SPU_ENFORCE(moduleOpRef, "MLIR parser failure");
 
-    auto entry_function = moduleOpRef->lookupSymbol<mlir::func::FuncOp>("main");
+    auto entry_function = mlir::spu::get_entrypoint(moduleOpRef.get());
     SPU_ENFORCE(entry_function, "main module not found");
 
     ExecutionOptions opts;
diff --git a/libspu/device/executor.cc b/libspu/device/executor.cc
index 5d0c400a..9093b628 100644
--- a/libspu/device/executor.cc
+++ b/libspu/device/executor.cc
@@ -15,9 +15,7 @@
 #include "libspu/device/executor.h"
 
 #include <algorithm>
-#include <chrono>
 #include <condition_variable>
-#include <future>
 #include <mutex>
 
 #include "mlir/IR/Operation.h"
diff --git a/libspu/device/executor.h b/libspu/device/executor.h
index 38a0c488..db43547d 100644
--- a/libspu/device/executor.h
+++ b/libspu/device/executor.h
@@ -66,6 +66,9 @@ class OpExecutor {
  public:
   virtual ~OpExecutor() = default;
 
+  using handler_t = std::function<bool(SPUContext *sctx, mlir::Operation *op,
+                                       absl::Span<const Value> inputs)>;
+
   //
   virtual void checkType(mlir::Type mlir_type, const spu::Value &v) const = 0;
 
@@ -81,6 +84,17 @@ class OpExecutor {
                  const ExecutionOptions &opts = {}) {
     return runKernelImpl(sctx, sscope, op, opts);
   }
+
+  void setExtraIntrinsicHandler(handler_t handler) {
+    extra_handler_ = std::move(handler);
+  }
+
+  const std::optional<handler_t> &getExtraIntrinsicHandler() const {
+    return extra_handler_;
+  }
+
+ private:
+  std::optional<handler_t> extra_handler_;
 };
 
 std::vector<spu::Value> runRegion(OpExecutor *executor, SPUContext *sctx,
diff --git a/libspu/device/intrinsic_table.h b/libspu/device/intrinsic_table.h
new file mode 100644
index 00000000..d56c5be9
--- /dev/null
+++ b/libspu/device/intrinsic_table.h
@@ -0,0 +1,28 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+// clang-format off
+
+//         MACRO_NAME       FCN_NAME
+#define    TOPK             "mhlo.topk"
+#define    ERF              "mhlo.erf"
+#define    PREFER_A         "spu.prefer_a"
+#define    DBG_PRINT        "spu.dbg_print"
+#define    GATHER           "spu.gather"
+#define    MAKE_CACHE_VAR   "spu.make_cache_var"
+#define    DROP_CACHED_VAR  "spu.drop_cached_var"
+
+// clang-format on
diff --git a/libspu/device/pphlo/BUILD.bazel b/libspu/device/pphlo/BUILD.bazel
index fbfc8b48..dd1200f9 100644
--- a/libspu/device/pphlo/BUILD.bazel
+++ b/libspu/device/pphlo/BUILD.bazel
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-load("//bazel:spu.bzl", "spu_cc_binary", "spu_cc_library", "spu_cc_test")
+load("//bazel:spu.bzl", "spu_cc_library", "spu_cc_test")
 
 package(default_visibility = ["//visibility:public"])
 
@@ -24,7 +24,8 @@ spu_cc_library(
         ":pphlo_intrinsic_executor",
         ":pphlo_verifier",
         "//libspu/device:executor",
-        "//libspu/dialect/pphlo:dialect",
+        "//libspu/dialect/pphlo/IR:dialect",
+        "//libspu/dialect/utils",
         "//libspu/kernel/hlo:basic_binary",
         "//libspu/kernel/hlo:basic_ternary",
         "//libspu/kernel/hlo:basic_unary",
@@ -47,8 +48,10 @@ spu_cc_library(
     srcs = ["pphlo_intrinsic_executor.cc"],
     hdrs = ["pphlo_intrinsic_executor.h"],
     deps = [
-        "//libspu/dialect/pphlo:dialect",
+        "//libspu/device:intrinsic_table",
+        "//libspu/dialect/pphlo/IR:dialect",
         "//libspu/kernel/hal:debug",
+        "//libspu/kernel/hlo:basic_binary",
         "//libspu/kernel/hlo:casting",
         "//libspu/kernel/hlo:const",
         "//libspu/kernel/hlo:indexing",
@@ -57,26 +60,11 @@ spu_cc_library(
     ],
 )
 
-spu_cc_library(
-    name = "pphlo_executor_test_runner",
-    testonly = True,
-    srcs = ["pphlo_executor_test_runner.cc"],
-    hdrs = ["pphlo_executor_test_runner.h"],
-    deps = [
-        ":pphlo_executor",
-        "//libspu/compiler:compile",
-        "//libspu/device:api",
-        "//libspu/device:io",
-        "//libspu/device:test_utils",
-        "//libspu/kernel:test_util",
-    ],
-)
-
 spu_cc_test(
     name = "pphlo_executor_test",
     srcs = ["pphlo_executor_test.cc"],
     deps = [
-        ":pphlo_executor_test_runner",
+        "//libspu/device/utils:pphlo_executor_test_runner",
     ],
 )
 
@@ -86,7 +74,7 @@ spu_cc_library(
     hdrs = ["pphlo_verifier.h"],
     deps = [
         "//libspu/core:value",
-        "//libspu/dialect/pphlo:dialect",
+        "//libspu/dialect/pphlo/IR:dialect",
         "//libspu/kernel/hal:public_helper",
         "//libspu/kernel/hal:type_cast",
         "@stablehlo//:reference_ops",
@@ -104,16 +92,3 @@ spu_cc_test(
         "@llvm-project//mlir:Parser",
     ],
 )
-
-spu_cc_binary(
-    name = "pphlo_executor_debug_runner",
-    testonly = True,
-    srcs = ["pphlo_executor_debug_runner.cc"],
-    deps = [
-        ":pphlo_executor",
-        "//libspu/device:api",
-        "//libspu/device:debug_dump_constant",
-        "//libspu/device:test_utils",
-        "@llvm-project//llvm:Support",
-    ],
-)
diff --git a/libspu/device/pphlo/pphlo_executor.cc b/libspu/device/pphlo/pphlo_executor.cc
index 0865e964..d20d54f9 100644
--- a/libspu/device/pphlo/pphlo_executor.cc
+++ b/libspu/device/pphlo/pphlo_executor.cc
@@ -20,8 +20,9 @@
 #include "libspu/core/trace.h"
 #include "libspu/device/pphlo/pphlo_intrinsic_executor.h"
 #include "libspu/device/pphlo/pphlo_verifier.h"
-#include "libspu/dialect/pphlo/base_enums.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/base_enums.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/utils/utils.h"
 #include "libspu/kernel/hal/debug.h"
 #include "libspu/kernel/hal/public_helper.h"
 #include "libspu/kernel/hal/ring.h"
@@ -42,15 +43,6 @@
 
 namespace {
 
-template <typename T>
-std::string mlirObjectToString(T &&mlir_obj) {
-  std::string buf;
-  llvm::raw_string_ostream rss(buf);
-  mlir_obj.print(rss);
-  rss.flush();
-  return buf;
-}
-
 std::pair<spu::PtType, bool> getPtTypeFromMlirType(mlir::Type mlir_ty) {
   mlir::spu::pphlo::TypeTools tool(mlir_ty.getContext());
   auto express_type =
@@ -95,7 +87,7 @@ std::pair<spu::PtType, bool> getPtTypeFromMlirType(mlir::Type mlir_ty) {
     }
   }
 
-  SPU_THROW("invalid type {}", mlirObjectToString(mlir_ty));
+  SPU_THROW("invalid type {}", mlir::spu::mlirObjectToString(mlir_ty));
 }
 
 spu::DataType getDtypeFromMlirType(mlir::Type mlir_ty) {
@@ -118,7 +110,8 @@ spu::DataType getDtypeFromMlirType(mlir::Type mlir_ty) {
       case 64:
         return int_ty.isUnsigned() ? spu::DT_U64 : spu::DT_I64;
       default:
-        SPU_THROW("unsupported int type {}", mlirObjectToString(mlir_ty));
+        SPU_THROW("unsupported int type {}",
+                  mlir::spu::mlirObjectToString(mlir_ty));
     }
   } else if (auto flp_ty = mlir::dyn_cast<mlir::FloatType>(express_type)) {
     switch (flp_ty.getWidth()) {
@@ -129,7 +122,8 @@ spu::DataType getDtypeFromMlirType(mlir::Type mlir_ty) {
       case 64:
         return spu::DT_F64;
       default:
-        SPU_THROW("unsupported fp type {}", mlirObjectToString(flp_ty));
+        SPU_THROW("unsupported fp type {}",
+                  mlir::spu::mlirObjectToString(flp_ty));
     }
   } else if (auto ct = mlir::dyn_cast<mlir::ComplexType>(express_type)) {
     if (ct.getElementType().isF32()) {
@@ -138,8 +132,8 @@ spu::DataType getDtypeFromMlirType(mlir::Type mlir_ty) {
       return spu::DT_F64;
     }
   }
-  SPU_THROW("invalid type {} {}", mlirObjectToString(mlir_ty),
-            mlirObjectToString(express_type));
+  SPU_THROW("invalid type {} {}", mlir::spu::mlirObjectToString(mlir_ty),
+            mlir::spu::mlirObjectToString(express_type));
 }
 
 // Convert mlir visibility to spu visibility
@@ -199,7 +193,7 @@ void do_type_checker(mlir::Value key, const spu::Value &val,
       SPU_ENFORCE(val.isComplex(), "Expected complex type");
     } else {
       SPU_ENFORCE(!val.isComplex(), "Got type {}",
-                  mlirObjectToString(mlir_type));
+                  mlir::spu::mlirObjectToString(mlir_type));
     }
 
     // Check vtype
@@ -468,7 +462,8 @@ void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
       SPU_THROW(
           "Convolution with {} spatial dimensions is not "
           "supported, {}",
-          dnums.getInputSpatialDimensions().size(), mlirObjectToString(op));
+          dnums.getInputSpatialDimensions().size(),
+          mlir::spu::mlirObjectToString(op));
     }
   }
 
@@ -648,23 +643,6 @@ void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
   addValue(sscope, op.getResult(), std::move(ret), opts);
 }
 
-void execute(OpExecutor *executor, SPUContext *sctx, SymbolScope *sscope,
-             mlir::spu::pphlo::CaseOp &op, const ExecutionOptions &opts) {
-  std::vector<kernel::hlo::BranchFcnT> branches;
-  for (auto &b : op.getBranches()) {
-    branches.emplace_back(
-        [&]() { return runRegion(executor, sctx, sscope, b, {}); });
-  }
-
-  auto results = kernel::hlo::Case(
-      sctx, lookupValue(sscope, op.getIndex(), opts), branches);
-
-  // Copy output
-  for (const auto &ret : llvm::enumerate(op->getResults())) {
-    addValue(sscope, ret.value(), results[ret.index()], opts);
-  }
-}
-
 void execute(OpExecutor *executor, SPUContext *sctx, SymbolScope *sscope,
              mlir::spu::pphlo::IfOp &op, const ExecutionOptions &opts) {
   auto conditional = lookupValue(sscope, op.getCondition(), opts);
@@ -1017,24 +995,11 @@ void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
   addValue(sscope, op.getResult(), casted, opts);
 }
 
-void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
-             mlir::spu::pphlo::PreferAOp &op, const ExecutionOptions &opts) {
-  auto in = lookupValue(sscope, op.getOperand(), opts);
-  if (sctx->config().protocol() == ProtocolKind::CHEETAH) {
-    // NOTE(juhou): For 2PC, MulAB uses COT which is efficient and accurate than
-    // MulAA that needs HE. Thus we just by-pass the PreferAOp for 2PC.
-    addValue(sscope, op.getResult(), in, opts);
-    return;
-  }
-  auto k0 = kernel::hlo::Cast(sctx, kernel::hlo::Constant(sctx, 0, in.shape()),
-                              VIS_PUBLIC, in.dtype());
-  addValue(sscope, op.getResult(), kernel::hlo::Add(sctx, in, k0), opts);
-}
-
 void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
              mlir::spu::pphlo::SignOp &op, const ExecutionOptions &opts) {
   auto in = lookupValue(sscope, op.getOperand(), opts);
-  addValue(sscope, op.getResult(), kernel::hlo::Sign(sctx, in), opts);
+  addValue(sscope, op.getResult(),
+           kernel::hlo::Sign(sctx, in, op.getIgnoreZero()), opts);
 }
 
 void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
@@ -1142,12 +1107,18 @@ void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
            opts);
 }
 
-void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
+void execute(OpExecutor *executor, SPUContext *sctx, SymbolScope *sscope,
              mlir::spu::pphlo::CustomCallOp &op, const ExecutionOptions &opt) {
   std::vector<Value> inputs(op->getNumOperands());
   for (size_t idx = 0; idx < inputs.size(); ++idx) {
     inputs[idx] = lookupValue(sscope, op->getOperand(idx), opt);
   }
+
+  const auto &extra = executor->getExtraIntrinsicHandler();
+  if (extra.has_value() && (*extra)(sctx, op.getOperation(), inputs)) {
+    return;
+  }
+
   auto ret = intrinsic_dispatcher(sctx, op, inputs);
 
   for (size_t idx = 0; idx < op->getNumResults(); ++idx) {
@@ -1193,6 +1164,14 @@ void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
   addValue(sscope, op.getResult(), kernel::hlo::Complex(sctx, r, i), opts);
 }
 
+void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
+             mlir::spu::pphlo::PopcntOp &op, const ExecutionOptions &opts) {
+  auto in = lookupValue(sscope, op.getOperand(), opts);
+  auto ret = kernel::hlo::Popcnt(sctx, in);
+
+  addValue(sscope, op.getResult(), std::move(ret), opts);
+}
+
 #define DEFINE_UNIMPLEMENTED_OP(OpName)                                \
   void execute(OpExecutor *, SPUContext *, SymbolScope *,              \
                mlir::spu::pphlo::OpName &, const ExecutionOptions &) { \
@@ -1200,6 +1179,7 @@ void execute(OpExecutor *, SPUContext *sctx, SymbolScope *sscope,
   }
 
 DEFINE_UNIMPLEMENTED_OP(ReturnOp)
+DEFINE_UNIMPLEMENTED_OP(CaseOp)
 
 #undef DEFINE_UNIMPLEMENTED_OP
 
@@ -1221,7 +1201,7 @@ static bool hasKernelImpl(mlir::Operation &op) {
 bool PPHloExecutor::hasKernel(mlir::Operation &op) const {
   return hasKernelImpl<
 #define GET_OP_LIST
-#include "libspu/dialect/pphlo/ops.cc.inc"
+#include "libspu/dialect/pphlo/IR/ops.cc.inc"
       >(op);
 }
 
@@ -1292,8 +1272,8 @@ static void dispatchOp(OpExecutor *executor, SPUContext *sctx,
     }
   } else {
     if constexpr (!sizeof...(MoreOpT)) {
-      SPU_THROW("Unhandled mlir op {} at {}", mlirObjectToString(op),
-                mlirObjectToString(op.getLoc()));
+      SPU_THROW("Unhandled mlir op {} at {}", mlir::spu::mlirObjectToString(op),
+                mlir::spu::mlirObjectToString(op.getLoc()));
     } else {
       dispatchOp<MoreOpT...>(executor, sctx, sscope, op, opts);
     }
@@ -1304,11 +1284,11 @@ void PPHloExecutor::runKernelImpl(SPUContext *sctx, SymbolScope *sscope,
                                   mlir::Operation &op,
                                   const ExecutionOptions &opts) {
   if (opts.do_log_execution) {
-    SPDLOG_INFO("PPHLO {}", mlirObjectToString(op));
+    SPDLOG_INFO("PPHLO {}", mlir::spu::mlirObjectToString(op));
   }
   dispatchOp<
 #define GET_OP_LIST
-#include "libspu/dialect/pphlo/ops.cc.inc"
+#include "libspu/dialect/pphlo/IR/ops.cc.inc"
       >(this, sctx, sscope, op, opts);
 }
 
diff --git a/libspu/device/pphlo/pphlo_executor_test.cc b/libspu/device/pphlo/pphlo_executor_test.cc
index e4a3d89e..ebc83763 100644
--- a/libspu/device/pphlo/pphlo_executor_test.cc
+++ b/libspu/device/pphlo/pphlo_executor_test.cc
@@ -22,7 +22,7 @@
 #include "gtest/gtest.h"
 #include "xtensor/xarray.hpp"
 
-#include "libspu/device/pphlo/pphlo_executor_test_runner.h"
+#include "libspu/device/utils/pphlo_executor_test_runner.h"
 
 namespace spu::device::pphlo::test {
 
@@ -886,7 +886,7 @@ void testGatherImpl(size_t world_size, FieldType field, ProtocolKind protocol,
 
     auto compiled = r.compileMHlo(mhlo, {VIS_PUBLIC, VIS_PUBLIC});
 
-    EXPECT_THAT(compiled, testing::HasSubstr("pphlo.gather"));
+    EXPECT_THAT(compiled, testing::HasSubstr("spu.gather"));
 
     r.run(compiled);
 
@@ -903,7 +903,7 @@ void testGatherImpl(size_t world_size, FieldType field, ProtocolKind protocol,
 
     auto compiled = r.compileMHlo(mhlo, {VIS_PUBLIC, VIS_SECRET});
 
-    EXPECT_THAT(compiled, testing::Not(testing::HasSubstr("pphlo.gather")));
+    EXPECT_THAT(compiled, testing::Not(testing::HasSubstr("spu.gather")));
 
     r.run(compiled);
 
@@ -2725,130 +2725,6 @@ func.func @main() -> (tensor<i32>, tensor<i1>) {
   }
 }
 
-TEST_P(ExecutorTest, Case) {
-  const auto *prog = R"(
- func.func @main(%arg0: tensor<i32>) -> (tensor<i32>,tensor<i32>) {
-  %0:2 = "pphlo.case"(%arg0) ({
-    %1 = pphlo.constant dense<1> : tensor<i32>
-    %2 = pphlo.constant dense<11> : tensor<i32>
-    pphlo.return %1, %2 : tensor<i32>, tensor<i32>
-  }, {
-    %1 = pphlo.constant dense<2> : tensor<i32>
-    %2 = pphlo.constant dense<12> : tensor<i32>
-    pphlo.return %1, %2 : tensor<i32>, tensor<i32>
-  }, {
-    %1 = pphlo.constant dense<3> : tensor<i32>
-    %2 = pphlo.constant dense<13> : tensor<i32>
-    pphlo.return %1, %2 : tensor<i32>, tensor<i32>
-  }) : (tensor<i32>) -> (tensor<i32>, tensor<i32>)
-  return %0#0, %0#1: tensor<i32>, tensor<i32>
-})";
-
-  {
-    // case 0
-    Runner r(std::get<0>(GetParam()), std::get<1>(GetParam()),
-             std::get<2>(GetParam()));
-
-    r.addInput(static_cast<int32_t>(0));
-
-    r.run(prog, 2);
-
-    r.verifyScalarOutput(static_cast<int32_t>(1), 0);
-    r.verifyScalarOutput(static_cast<int32_t>(11), 1);
-  }
-
-  {
-    // case 1
-    Runner r(std::get<0>(GetParam()), std::get<1>(GetParam()),
-             std::get<2>(GetParam()));
-
-    r.addInput(static_cast<int32_t>(1));
-
-    r.run(prog, 2);
-
-    r.verifyScalarOutput(static_cast<int32_t>(2), 0);
-    r.verifyScalarOutput(static_cast<int32_t>(12), 1);
-  }
-
-  {
-    // case 2
-    Runner r(std::get<0>(GetParam()), std::get<1>(GetParam()),
-             std::get<2>(GetParam()));
-
-    r.addInput(static_cast<int32_t>(2));
-
-    r.run(prog, 2);
-
-    r.verifyScalarOutput(static_cast<int32_t>(3), 0);
-    r.verifyScalarOutput(static_cast<int32_t>(13), 1);
-  }
-}
-
-TEST_P(ExecutorTest, CasePrivate) {
-  const auto *prog = R"(
- func.func @main(%arg0: tensor<!pphlo.secret<i32>>) -> (tensor<!pphlo.secret<i32>>, tensor<!pphlo.secret<i32>>) {
-  %0:2 = "pphlo.case"(%arg0) ({
-    %1 = pphlo.constant dense<1> : tensor<i32>
-    %2 = pphlo.convert %1 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
-    %3 = pphlo.constant dense<11> : tensor<i32>
-    %4 = pphlo.convert %3 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
-    pphlo.return %2, %4 : tensor<!pphlo.secret<i32>>, tensor<!pphlo.secret<i32>>
-  }, {
-    %1 = pphlo.constant dense<2> : tensor<i32>
-    %2 = pphlo.convert %1 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
-    %3 = pphlo.constant dense<12> : tensor<i32>
-    %4 = pphlo.convert %3 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
-    pphlo.return %2, %4 : tensor<!pphlo.secret<i32>>, tensor<!pphlo.secret<i32>>
-  }, {
-    %1 = pphlo.constant dense<3> : tensor<i32>
-    %2 = pphlo.convert %1 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
-    %3 = pphlo.constant dense<13> : tensor<i32>
-    %4 = pphlo.convert %3 : (tensor<i32>) -> tensor<!pphlo.secret<i32>>
-    pphlo.return %2, %4 : tensor<!pphlo.secret<i32>>, tensor<!pphlo.secret<i32>>
-  }) : (tensor<!pphlo.secret<i32>>) -> (tensor<!pphlo.secret<i32>>, tensor<!pphlo.secret<i32>>)
-  return %0#0, %0#1: tensor<!pphlo.secret<i32>>, tensor<!pphlo.secret<i32>>
-})";
-
-  {
-    // case 0
-    Runner r(std::get<0>(GetParam()), std::get<1>(GetParam()),
-             std::get<2>(GetParam()));
-
-    r.addInput(static_cast<int32_t>(0), VIS_SECRET);
-
-    r.run(prog, 2);
-
-    r.verifyScalarOutput(static_cast<int32_t>(1), 0);
-    r.verifyScalarOutput(static_cast<int32_t>(11), 1);
-  }
-
-  {
-    // case 1
-    Runner r(std::get<0>(GetParam()), std::get<1>(GetParam()),
-             std::get<2>(GetParam()));
-
-    r.addInput(static_cast<int32_t>(1), VIS_SECRET);
-
-    r.run(prog, 2);
-
-    r.verifyScalarOutput(static_cast<int32_t>(2), 0);
-    r.verifyScalarOutput(static_cast<int32_t>(12), 1);
-  }
-
-  {
-    // case 2
-    Runner r(std::get<0>(GetParam()), std::get<1>(GetParam()),
-             std::get<2>(GetParam()));
-
-    r.addInput(static_cast<int32_t>(2), VIS_SECRET);
-
-    r.run(prog, 2);
-
-    r.verifyScalarOutput(static_cast<int32_t>(3), 0);
-    r.verifyScalarOutput(static_cast<int32_t>(13), 1);
-  }
-}
-
 TEST_P(ExecutorTest, MixedPayload) {
   xt::xarray<int32_t> op = {10, 9,  8,  7,  6,  5,  4,  3,  2,  1,
                             99, 97, 98, 96, 91, 11, 12, 13, 14, 15};
diff --git a/libspu/device/pphlo/pphlo_intrinsic_executor.cc b/libspu/device/pphlo/pphlo_intrinsic_executor.cc
index dbc3478a..01cb2565 100644
--- a/libspu/device/pphlo/pphlo_intrinsic_executor.cc
+++ b/libspu/device/pphlo/pphlo_intrinsic_executor.cc
@@ -16,8 +16,10 @@
 
 #include "spdlog/spdlog.h"
 
+#include "libspu/device/intrinsic_table.h"
 #include "libspu/kernel/hal/debug.h"
 #include "libspu/kernel/hal/fxp_approx.h"
+#include "libspu/kernel/hlo/basic_binary.h"
 #include "libspu/kernel/hlo/casting.h"
 #include "libspu/kernel/hlo/const.h"
 #include "libspu/kernel/hlo/indexing.h"
@@ -48,7 +50,7 @@ std::vector<Value> intrinsic_dispatcher(SPUContext* ctx,
     return {zeros};
   }
 
-  if (name == "make_cached_var") {
+  if (name == MAKE_CACHE_VAR) {
     if (ctx->hasKernel("beaver_cache")) {
       SPU_ENFORCE(inputs.size() == 1);
       dynDispatch(ctx, "beaver_cache", inputs[0], true);
@@ -57,7 +59,7 @@ std::vector<Value> intrinsic_dispatcher(SPUContext* ctx,
     return {inputs[0]};
   }
 
-  if (name == "drop_cached_var") {
+  if (name == DROP_CACHED_VAR) {
     if (ctx->hasKernel("beaver_cache")) {
       SPU_ENFORCE(inputs.size() > 0);
       dynDispatch(ctx, "beaver_cache", inputs[0], false);
@@ -73,17 +75,17 @@ std::vector<Value> intrinsic_dispatcher(SPUContext* ctx,
     return {inputs.begin(), inputs.end()};
   }
 
-  if (name == "dbg_print") {
+  if (name == DBG_PRINT) {
     kernel::hal::dbg_print(ctx, inputs[0]);
     return {};
   }
 
-  if (name == "mhlo.erf") {
+  if (name == ERF) {
     SPU_ENFORCE(inputs.size() == 1 && inputs[0].isFxp());
     return {kernel::hal::f_erf(ctx, inputs[0])};
   }
 
-  if (name == "mhlo.topk") {
+  if (name == TOPK) {
     SPU_ENFORCE(inputs.size() == 1);
     auto attr =
         mlir::dyn_cast<mlir::DictionaryAttr>(call->getAttr("mhlo.attributes"));
@@ -105,7 +107,7 @@ std::vector<Value> intrinsic_dispatcher(SPUContext* ctx,
     return kernel::hlo::TopK(ctx, inputs[0], k, -1, largest, value_only);
   }
 
-  if (name == "pphlo.gather") {
+  if (name == GATHER) {
     kernel::hlo::GatherConfig config;
     const auto& output_shape =
         mlir::dyn_cast<mlir::RankedTensorType>(call.getResults()[0].getType())
@@ -133,6 +135,18 @@ std::vector<Value> intrinsic_dispatcher(SPUContext* ctx,
         kernel::hlo::Gather(ctx, inputs[0], inputs[1], config, output_shape)};
   }
 
+  if (name == PREFER_A) {
+    if (ctx->config().protocol() == ProtocolKind::CHEETAH) {
+      // NOTE(juhou): For 2PC, MulAB uses COT which is efficient and accurate
+      // than MulAA that needs HE. Thus we just by-pass the PreferAOp for 2PC.
+      return {inputs[0]};
+    }
+    auto k0 =
+        kernel::hlo::Cast(ctx, kernel::hlo::Constant(ctx, 0, inputs[0].shape()),
+                          VIS_PUBLIC, inputs[0].dtype());
+    return {kernel::hlo::Add(ctx, inputs[0], k0)};
+  }
+
   SPU_THROW("Unhandled intrinsic call {}", name.str());
 }
 
diff --git a/libspu/device/pphlo/pphlo_intrinsic_executor.h b/libspu/device/pphlo/pphlo_intrinsic_executor.h
index b4c9b86c..a129b4d0 100644
--- a/libspu/device/pphlo/pphlo_intrinsic_executor.h
+++ b/libspu/device/pphlo/pphlo_intrinsic_executor.h
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "libspu/core/value.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 namespace spu {
 
diff --git a/libspu/device/pphlo/pphlo_verifier.cc b/libspu/device/pphlo/pphlo_verifier.cc
index 83e79294..2fd377a9 100644
--- a/libspu/device/pphlo/pphlo_verifier.cc
+++ b/libspu/device/pphlo/pphlo_verifier.cc
@@ -20,7 +20,7 @@
 #include "stablehlo/reference/Ops.h"
 #include "stablehlo/reference/Tensor.h"
 
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 #include "libspu/kernel/hal/public_helper.h"
 #include "libspu/kernel/hal/type_cast.h"
 
@@ -272,6 +272,7 @@ UNARY_VERIFIER(RoundNearestEvenOp, roundNearestEvenOp)
 UNARY_VERIFIER(SignOp, signOp)
 UNARY_VERIFIER(Log1pOp, log1pOp)
 UNARY_VERIFIER(Expm1Op, expm1Op)
+UNARY_VERIFIER(PopcntOp, populationCountOp)
 
 #undef UNARY_VERIFIER
 
diff --git a/libspu/device/pphlo/pphlo_verifier.h b/libspu/device/pphlo/pphlo_verifier.h
index 4f005399..428883c8 100644
--- a/libspu/device/pphlo/pphlo_verifier.h
+++ b/libspu/device/pphlo/pphlo_verifier.h
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "libspu/core/value.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 namespace spu {
 class SPUContext;
@@ -60,6 +60,7 @@ class PPHloVerifier {
   VERIFY_DECL(SqrtOp)
   VERIFY_DECL(RoundOp)
   VERIFY_DECL(RoundNearestEvenOp)
+  VERIFY_DECL(PopcntOp)
 
   // Simple binary
   VERIFY_DECL(AddOp)
@@ -140,7 +141,6 @@ class PPHloVerifier {
   NO_VERIFY_DEFN(RngOp)
   NO_VERIFY_DEFN(ConstantOp)
   NO_VERIFY_DEFN(MaxPoolScatterOp)
-  NO_VERIFY_DEFN(PreferAOp)
   NO_VERIFY_DEFN(ArgMaxOp)
   NO_VERIFY_DEFN(EpsilonOp)
   NO_VERIFY_DEFN(CustomCallOp)
diff --git a/libspu/device/pphlo/pphlo_verifier_test.cc b/libspu/device/pphlo/pphlo_verifier_test.cc
index 1a014683..8efd4f31 100644
--- a/libspu/device/pphlo/pphlo_verifier_test.cc
+++ b/libspu/device/pphlo/pphlo_verifier_test.cc
@@ -21,7 +21,7 @@
 #include "xtensor/xarray.hpp"
 
 #include "libspu/device/test_utils.h"
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
 #include "libspu/kernel/test_util.h"
 #include "libspu/mpc/utils/simulate.h"
 
diff --git a/libspu/device/utils/BUILD.bazel b/libspu/device/utils/BUILD.bazel
new file mode 100644
index 00000000..699a4302
--- /dev/null
+++ b/libspu/device/utils/BUILD.bazel
@@ -0,0 +1,52 @@
+# Copyright 2024 Ant Group Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("//bazel:spu.bzl", "spu_cc_binary", "spu_cc_library")
+
+spu_cc_library(
+    name = "debug_dump_constant",
+    srcs = [
+        "debug_dump_constant.cc",
+    ],
+    hdrs = [
+        "debug_dump_constant.h",
+    ],
+)
+
+spu_cc_library(
+    name = "pphlo_executor_test_runner",
+    testonly = True,
+    srcs = ["pphlo_executor_test_runner.cc"],
+    hdrs = ["pphlo_executor_test_runner.h"],
+    deps = [
+        "//libspu/compiler:compile",
+        "//libspu/device:api",
+        "//libspu/device:io",
+        "//libspu/device:test_utils",
+        "//libspu/device/pphlo:pphlo_executor",
+        "//libspu/kernel:test_util",
+    ],
+)
+
+spu_cc_binary(
+    name = "pphlo_executor_debug_runner",
+    srcs = ["pphlo_executor_debug_runner.cc"],
+    deps = [
+        "//libspu/device:api",
+        "//libspu/device:test_utils",
+        "//libspu/device/pphlo:pphlo_executor",
+        "//libspu/device/utils:debug_dump_constant",
+        "@llvm-project//llvm:Support",
+    ],
+)
diff --git a/libspu/device/debug_dump_constant.cc b/libspu/device/utils/debug_dump_constant.cc
similarity index 97%
rename from libspu/device/debug_dump_constant.cc
rename to libspu/device/utils/debug_dump_constant.cc
index 6502ed2b..238efcee 100644
--- a/libspu/device/debug_dump_constant.cc
+++ b/libspu/device/utils/debug_dump_constant.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/device/debug_dump_constant.h"
+#include "libspu/device/utils/debug_dump_constant.h"
 
 #include "fmt/format.h"  // IWYU pragma: keep
 
diff --git a/libspu/device/debug_dump_constant.h b/libspu/device/utils/debug_dump_constant.h
similarity index 100%
rename from libspu/device/debug_dump_constant.h
rename to libspu/device/utils/debug_dump_constant.h
diff --git a/libspu/device/pphlo/pphlo_executor_debug_runner.cc b/libspu/device/utils/pphlo_executor_debug_runner.cc
similarity index 99%
rename from libspu/device/pphlo/pphlo_executor_debug_runner.cc
rename to libspu/device/utils/pphlo_executor_debug_runner.cc
index 7d78ea34..f06e13af 100644
--- a/libspu/device/pphlo/pphlo_executor_debug_runner.cc
+++ b/libspu/device/utils/pphlo_executor_debug_runner.cc
@@ -23,9 +23,9 @@
 
 #include "libspu/core/value.h"
 #include "libspu/device/api.h"
-#include "libspu/device/debug_dump_constant.h"
 #include "libspu/device/pphlo/pphlo_executor.h"
 #include "libspu/device/symbol_table.h"
+#include "libspu/device/utils/debug_dump_constant.h"
 #include "libspu/mpc/factory.h"
 #include "libspu/mpc/utils/simulate.h"
 
diff --git a/libspu/device/pphlo/pphlo_executor_test_runner.cc b/libspu/device/utils/pphlo_executor_test_runner.cc
similarity index 97%
rename from libspu/device/pphlo/pphlo_executor_test_runner.cc
rename to libspu/device/utils/pphlo_executor_test_runner.cc
index ca6bed0d..2b4880bc 100644
--- a/libspu/device/pphlo/pphlo_executor_test_runner.cc
+++ b/libspu/device/utils/pphlo_executor_test_runner.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/device/pphlo/pphlo_executor_test_runner.h"
+#include "libspu/device/utils/pphlo_executor_test_runner.h"
 
 #include "libspu/compiler/common/compilation_context.h"
 #include "libspu/compiler/compile.h"
diff --git a/libspu/device/pphlo/pphlo_executor_test_runner.h b/libspu/device/utils/pphlo_executor_test_runner.h
similarity index 100%
rename from libspu/device/pphlo/pphlo_executor_test_runner.h
rename to libspu/device/utils/pphlo_executor_test_runner.h
diff --git a/libspu/dialect/pphlo/BUILD.bazel b/libspu/dialect/pphlo/IR/BUILD.bazel
similarity index 99%
rename from libspu/dialect/pphlo/BUILD.bazel
rename to libspu/dialect/pphlo/IR/BUILD.bazel
index 07438cab..8cbde13d 100644
--- a/libspu/dialect/pphlo/BUILD.bazel
+++ b/libspu/dialect/pphlo/IR/BUILD.bazel
@@ -163,6 +163,7 @@ spu_cc_library(
         ":ops_inc_gen",
         ":types_inc_gen",
         "//libspu/core:prelude",
+        "//libspu/dialect/utils",
         "@llvm-project//mlir:FuncDialect",
         "@llvm-project//mlir:IR",
         "@stablehlo//:stablehlo_type_inference",
diff --git a/libspu/dialect/pphlo/assembly_format.cc b/libspu/dialect/pphlo/IR/assembly_format.cc
similarity index 57%
rename from libspu/dialect/pphlo/assembly_format.cc
rename to libspu/dialect/pphlo/IR/assembly_format.cc
index fe0e834e..4038cec8 100644
--- a/libspu/dialect/pphlo/assembly_format.cc
+++ b/libspu/dialect/pphlo/IR/assembly_format.cc
@@ -12,82 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/dialect/pphlo/assembly_format.h"
+#include "libspu/dialect/pphlo/IR/assembly_format.h"
 
 namespace mlir::spu::pphlo {
 
-namespace {
-
-ParseResult assignFromFunctionType(OpAsmParser& parser, llvm::SMLoc loc,
-                                   ArrayRef<Type*> operands, Type& result,
-                                   FunctionType& fn_type) {
-  assert(fn_type);
-  if (fn_type.getInputs().size() != operands.size()) {
-    return parser.emitError(loc)
-           << operands.size() << " operands present, but expected "
-           << fn_type.getInputs().size();
-  }
-
-  // Set operand types to function input types
-  for (auto [operand, input] : llvm::zip(operands, fn_type.getInputs())) {
-    *operand = input;
-  }
-
-  // Set result type
-  if (fn_type.getResults().size() != 1) {
-    return parser.emitError(loc, "expected single output");
-  }
-  result = fn_type.getResults()[0];
-
-  return success();
-}
-
-}  // namespace
-
-namespace detail {
-void printSameOperandsAndResultTypeImpl(OpAsmPrinter& p, Operation* op,
-                                        TypeRange operands, Type result) {
-  // Handle zero operand types `() -> a` prints `a`
-  if (operands.empty()) {
-    p.printType(result);
-    return;
-  }
-  // Handle all same type `(a,a,...) -> a` prints `a`
-  bool allSameType =
-      llvm::all_of(operands, [&result](auto t) { return t == result; });
-  if (allSameType) {
-    p.printType(result);
-    return;
-  }
-  // Fall back to generic
-  p.printFunctionalType(op);
-}
-
-ParseResult parseSameOperandsAndResultTypeImpl(OpAsmParser& parser,
-                                               ArrayRef<Type*> operands,
-                                               Type& result) {
-  llvm::SMLoc loc = parser.getCurrentLocation();
-
-  Type type;
-  if (parser.parseType(type)) {
-    return failure();
-  }
-
-  // Handle if function type, all operand types did not match result type.
-  if (auto fnType = mlir::dyn_cast<FunctionType>(type)) {
-    return assignFromFunctionType(parser, loc, operands, result, fnType);
-  }
-
-  // Handle bare types. ` : type` indicating all input/output types match.
-  for (Type* t : operands) {
-    *t = type;
-  }
-  result = type;
-  return success();
-}
-
-}  // namespace detail
-
 void printSliceRanges(OpAsmPrinter& p, Operation* op,
                       ArrayRef<int64_t> start_indices,
                       ArrayRef<int64_t> limit_indices,
diff --git a/libspu/dialect/pphlo/assembly_format.h b/libspu/dialect/pphlo/IR/assembly_format.h
similarity index 84%
rename from libspu/dialect/pphlo/assembly_format.h
rename to libspu/dialect/pphlo/IR/assembly_format.h
index 07ecf57f..f1a873e8 100644
--- a/libspu/dialect/pphlo/assembly_format.h
+++ b/libspu/dialect/pphlo/IR/assembly_format.h
@@ -14,43 +14,31 @@
 
 #pragma once
 
-#include "llvm/ADT/ArrayRef.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/IR/TypeRange.h"
 #include "mlir/IR/Types.h"
 #include "mlir/Support/LogicalResult.h"
 
+#include "libspu/dialect/utils/assembly_format.h"
+
 namespace mlir::spu::pphlo {
 
-namespace detail {
-void printSameOperandsAndResultTypeImpl(OpAsmPrinter& p, Operation* op,
-                                        TypeRange operands, Type result);
-ParseResult parseSameOperandsAndResultTypeImpl(OpAsmParser& parser,
-                                               ArrayRef<Type*> operands,
-                                               Type& result);
-}  // namespace detail
 template <class... OpTypes>
 void printSameOperandsAndResultType(OpAsmPrinter& p, Operation* op,
                                     OpTypes... types) {
-  static_assert(sizeof...(types) > 0);
-  SmallVector<Type> typesVec{types...};
-  ArrayRef<Type> typesRef = ArrayRef(typesVec);
-  return detail::printSameOperandsAndResultTypeImpl(
-      p, op, typesRef.drop_back(1), typesRef.back());
+  mlir::spu::printSameOperandsAndResultType(p, op,
+                                            std::forward<OpTypes>(types)...);
 }
+
 template <class... OpTypes>
 ParseResult parseSameOperandsAndResultType(OpAsmParser& parser,
                                            OpTypes&... types) {
-  static_assert(sizeof...(types) > 0);
-  SmallVector<Type*> typesVec{&types...};
-  ArrayRef<Type*> typesRef = ArrayRef(typesVec);
-  return detail::parseSameOperandsAndResultTypeImpl(
-      parser, typesRef.drop_back(1), *typesRef.back());
+  return mlir::spu::parseSameOperandsAndResultType(
+      parser, std::forward<OpTypes&>(types)...);
 }
 
 // SliceRanges - Used to print multi-dimensional ranges for slice.
@@ -158,4 +146,4 @@ ParseResult parseDotDimensionNumbers(AsmParser& parser, AttrTy& target) {
   return success();
 }
 
-}  // namespace mlir::spu::pphlo
\ No newline at end of file
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/attrs.cc b/libspu/dialect/pphlo/IR/attrs.cc
similarity index 98%
rename from libspu/dialect/pphlo/attrs.cc
rename to libspu/dialect/pphlo/IR/attrs.cc
index cb906d56..35633cc6 100644
--- a/libspu/dialect/pphlo/attrs.cc
+++ b/libspu/dialect/pphlo/IR/attrs.cc
@@ -12,13 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/dialect/pphlo/attrs.h"
+#include "libspu/dialect/pphlo/IR/attrs.h"
 
 #include <set>
 
 #include "llvm/ADT/StringSet.h"
 #include "mlir/IR/Builders.h"
 
+#include "libspu/dialect/utils/assembly_format.h"
+
 namespace mlir::spu::pphlo {
 
 namespace {
@@ -556,11 +558,11 @@ ParseResult parseWindowAttributes(OpAsmParser& parser,
 }
 
 void printCustomCallTarget(AsmPrinter& p, Operation*, StringAttr target) {
-  p.printSymbolName(target.getValue());
+  mlir::spu::printCustomCallTargetImpl(p, target);
 }
 
 ParseResult parseCustomCallTarget(AsmParser& parser, StringAttr& target) {
-  return parser.parseSymbolName(target);
+  return mlir::spu::parseCustomCallTargetImpl(parser, target);
 }
 
 }  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/attrs.h b/libspu/dialect/pphlo/IR/attrs.h
similarity index 93%
rename from libspu/dialect/pphlo/attrs.h
rename to libspu/dialect/pphlo/IR/attrs.h
index 852fc173..2bdce5b0 100644
--- a/libspu/dialect/pphlo/attrs.h
+++ b/libspu/dialect/pphlo/IR/attrs.h
@@ -17,9 +17,9 @@
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/Operation.h"
 
-#include "libspu/dialect/pphlo/base_enums.h"
+#include "libspu/dialect/pphlo/IR/base_enums.h"
 #define GET_ATTRDEF_CLASSES
-#include "libspu/dialect/pphlo/attrs.h.inc"
+#include "libspu/dialect/pphlo/IR/attrs.h.inc"
 
 namespace mlir::spu::pphlo {
 
diff --git a/libspu/dialect/pphlo/attrs.td b/libspu/dialect/pphlo/IR/attrs.td
similarity index 97%
rename from libspu/dialect/pphlo/attrs.td
rename to libspu/dialect/pphlo/IR/attrs.td
index a9cc9c98..1d164aed 100644
--- a/libspu/dialect/pphlo/attrs.td
+++ b/libspu/dialect/pphlo/IR/attrs.td
@@ -18,7 +18,7 @@
 
 include "mlir/IR/OpBase.td"
 include "mlir/IR/AttrTypeBase.td"
-include "libspu/dialect/pphlo/dialect.td"
+include "libspu/dialect/pphlo/IR/dialect.td"
 
 def PPHloDim : ArrayRefParameter<"int64_t", "Dimension">;
 
diff --git a/libspu/dialect/pphlo/base_enums.cc b/libspu/dialect/pphlo/IR/base_enums.cc
similarity index 82%
rename from libspu/dialect/pphlo/base_enums.cc
rename to libspu/dialect/pphlo/IR/base_enums.cc
index ea826d08..f7816c48 100644
--- a/libspu/dialect/pphlo/base_enums.cc
+++ b/libspu/dialect/pphlo/IR/base_enums.cc
@@ -12,6 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/dialect/pphlo/base_enums.h"  // IWYU pragma: keep
+#include "libspu/dialect/pphlo/IR/base_enums.h"  // IWYU pragma: keep
 
-#include "libspu/dialect/pphlo/base_enums.cc.inc"
+#include "libspu/dialect/pphlo/IR/base_enums.cc.inc"
diff --git a/libspu/dialect/pphlo/base_enums.h b/libspu/dialect/pphlo/IR/base_enums.h
similarity index 94%
rename from libspu/dialect/pphlo/base_enums.h
rename to libspu/dialect/pphlo/IR/base_enums.h
index 0f09c07e..14bb7d63 100644
--- a/libspu/dialect/pphlo/base_enums.h
+++ b/libspu/dialect/pphlo/IR/base_enums.h
@@ -23,7 +23,7 @@
 // Order matters, this .inc header is not self-contained, and relies on the
 // #includes above.
 
-#include "libspu/dialect/pphlo/base_enums.h.inc"
+#include "libspu/dialect/pphlo/IR/base_enums.h.inc"
 
 namespace mlir::spu::pphlo {
 
diff --git a/libspu/dialect/pphlo/base_enums.td b/libspu/dialect/pphlo/IR/base_enums.td
similarity index 96%
rename from libspu/dialect/pphlo/base_enums.td
rename to libspu/dialect/pphlo/IR/base_enums.td
index 2ae959d8..2f70587f 100644
--- a/libspu/dialect/pphlo/base_enums.td
+++ b/libspu/dialect/pphlo/IR/base_enums.td
@@ -19,7 +19,7 @@
 include "mlir/IR/EnumAttr.td"
 include "mlir/IR/OpBase.td"
 
-include "libspu/dialect/pphlo/dialect.td"
+include "libspu/dialect/pphlo/IR/dialect.td"
 
 //===----------------------------------------------------------------------===//
 // Sort direction enum definitions.
diff --git a/libspu/dialect/pphlo/canonicalization.cc b/libspu/dialect/pphlo/IR/canonicalization.cc
similarity index 67%
rename from libspu/dialect/pphlo/canonicalization.cc
rename to libspu/dialect/pphlo/IR/canonicalization.cc
index c57dbcf5..f44c6c0a 100644
--- a/libspu/dialect/pphlo/canonicalization.cc
+++ b/libspu/dialect/pphlo/IR/canonicalization.cc
@@ -14,9 +14,10 @@
 
 #include <numeric>
 
+#include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 namespace mlir::spu::pphlo {
 
@@ -370,6 +371,163 @@ class NormalizeDimensionOrder : public OpRewritePattern<ConvolutionOp> {
   }
 };
 
+class NormalizeConv1D : public OpRewritePattern<ConvolutionOp> {
+ public:
+  using OpRewritePattern<ConvolutionOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(pphlo::ConvolutionOp op,
+                                PatternRewriter& rewriter) const override {
+    // Check 1D conv
+    auto dnums = op.getDimensionNumbers();
+    if (dnums.getInputSpatialDimensions().size() != 1) {
+      return failure();
+    }
+
+    // Check in [b, 0, f]x[0, i, o] -> [b, 0, f]
+    if (dnums.getInputBatchDimension() != 0 &&
+        dnums.getInputFeatureDimension() != 2) {
+      return failure();
+    }
+    if (dnums.getKernelInputFeatureDimension() != 1 &&
+        dnums.getKernelOutputFeatureDimension() != 2) {
+      return failure();
+    }
+    if (dnums.getOutputBatchDimension() != 0 &&
+        dnums.getOutputFeatureDimension() != 2) {
+      return failure();
+    }
+
+    auto lhs_type = mlir::dyn_cast<RankedTensorType>(op.getLhs().getType());
+    auto rhs_type = mlir::dyn_cast<RankedTensorType>(op.getRhs().getType());
+    auto ret_type = mlir::dyn_cast<RankedTensorType>(op.getResult().getType());
+
+    // reshape lhs to [b, 1, s0, f]
+    auto reshaped_lhs = rewriter.create<ReshapeOp>(
+        op->getLoc(),
+        RankedTensorType::get({lhs_type.getShape()[0], 1,
+                               lhs_type.getShape()[1], lhs_type.getShape()[2]},
+                              lhs_type.getElementType()),
+        op.getLhs());
+
+    // reshape rhs to [1, s0, i, o]
+    auto reshaped_rhs = rewriter.create<ReshapeOp>(
+        op->getLoc(),
+        RankedTensorType::get({1, rhs_type.getShape()[0],
+                               rhs_type.getShape()[1], rhs_type.getShape()[2]},
+                              rhs_type.getElementType()),
+        op.getRhs());
+
+    auto new_dnums = ConvDimensionNumbersAttr::get(
+        op->getContext(), dnums.getInputBatchDimension(),
+        dnums.getInputFeatureDimension() + 1, {1, 2},
+        dnums.getKernelInputFeatureDimension() + 1,
+        dnums.getKernelOutputFeatureDimension() + 1, {0, 1},
+        dnums.getOutputBatchDimension(), dnums.getOutputFeatureDimension() + 1,
+        {1, 2});
+
+    llvm::SmallVector<int64_t> window_strides(2, 1);
+    if (op.getWindowStrides().has_value()) {
+      window_strides[1] = (*op.getWindowStrides())[0];
+    }
+
+    // create a new 2d conv
+    auto new_conv = rewriter.create<ConvolutionOp>(
+        op->getLoc(),
+        RankedTensorType::get({ret_type.getShape()[0], 1,
+                               ret_type.getShape()[1], ret_type.getShape()[2]},
+                              ret_type.getElementType()),
+        reshaped_lhs, reshaped_rhs,
+        DenseI64ArrayAttr::get(op->getContext(), window_strides), new_dnums);
+
+    // Reshape back
+    rewriter.replaceOpWithNewOp<ReshapeOp>(op, ret_type, new_conv);
+
+    return success();
+  }
+};
+
+class DivToReciprocal : public OpRewritePattern<DivOp> {
+ public:
+  using OpRewritePattern<DivOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(pphlo::DivOp op,
+                                PatternRewriter& rewriter) const override {
+    TypeTools tools_(op->getContext());
+    if (!tools_.isFloatType(op.getType())) {
+      return failure();
+    }
+
+    auto lhs_def = op.getLhs().getDefiningOp<ConstantOp>();
+
+    if (lhs_def == nullptr) {
+      return failure();
+    }
+
+    auto const_value = lhs_def.getValue();
+    if (!const_value.isSplat()) {
+      return failure();
+    }
+
+    if (const_value.getSplatValue<llvm::APFloat>().convertToDouble() == 1.0F) {
+      rewriter.replaceOpWithNewOp<ReciprocalOp>(op, op.getRhs());
+    }
+
+    return failure();
+  }
+};
+
+class NormalizeDotShape : public OpRewritePattern<DotOp> {
+ public:
+  using OpRewritePattern<DotOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DotOp op,
+                                PatternRewriter& rewriter) const override {
+    auto lhs_type = mlir::dyn_cast<RankedTensorType>(op.getLhs().getType());
+    auto rhs_type = mlir::dyn_cast<RankedTensorType>(op.getRhs().getType());
+
+    // Semantics listed at https://openxla.org/xla/operation_semantics#dot
+    // scalar dot scalar
+    Value new_dot;
+    if (lhs_type.getRank() == 1 && rhs_type.getRank() == 1) {
+      // n dot n -> 1xn dot nx1 -> 1x1 -> scalar
+      auto new_lhs_type = RankedTensorType::get({1, lhs_type.getNumElements()},
+                                                lhs_type.getElementType());
+      auto new_rhs_type = RankedTensorType::get({rhs_type.getNumElements(), 1},
+                                                rhs_type.getElementType());
+      auto lhs =
+          rewriter.create<ReshapeOp>(op->getLoc(), new_lhs_type, op.getLhs());
+      auto rhs =
+          rewriter.create<ReshapeOp>(op->getLoc(), new_rhs_type, op.getRhs());
+
+      new_dot = rewriter.create<DotOp>(op->getLoc(), lhs, rhs);
+    } else if (lhs_type.getRank() == 2 && rhs_type.getRank() == 1) {
+      // matrix dot vector
+      // mxk dot k -> mxk dot kx1 -> mx1 -> m
+      auto new_rhs_type = RankedTensorType::get({rhs_type.getNumElements(), 1},
+                                                rhs_type.getElementType());
+      auto rhs =
+          rewriter.create<ReshapeOp>(op->getLoc(), new_rhs_type, op.getRhs());
+
+      new_dot = rewriter.create<DotOp>(op->getLoc(), op.getLhs(), rhs);
+    } else if (lhs_type.getRank() == 1 && rhs_type.getRank() == 2) {
+      // vector dot matrix
+      // k dot k*n -> 1xk * k*n -> 1xn -> n
+      auto new_lhs_type = RankedTensorType::get({1, lhs_type.getNumElements()},
+                                                lhs_type.getElementType());
+      auto lhs =
+          rewriter.create<ReshapeOp>(op->getLoc(), new_lhs_type, op.getLhs());
+
+      new_dot = rewriter.create<DotOp>(op->getLoc(), lhs, op.getRhs());
+    } else {
+      return failure();
+    }
+
+    rewriter.replaceOpWithNewOp<ReshapeOp>(op, op.getResult().getType(),
+                                           new_dot);
+    return success();
+  }
+};
+
 class MarkValueOnlyTopK : public OpRewritePattern<CustomCallOp> {
  public:
   using OpRewritePattern<CustomCallOp>::OpRewritePattern;
@@ -406,16 +564,66 @@ class MarkValueOnlyTopK : public OpRewritePattern<CustomCallOp> {
   }
 };
 
-#include "libspu/dialect/pphlo/canonicalization_patterns.cc.inc"
+class MergeMulConstant : public OpRewritePattern<MulOp> {
+ private:
+  ConstantOp getActualDefiningConstant(Value v) const {
+    if (auto op = v.getDefiningOp<ConstantOp>()) {
+      return op;
+    }
+
+    if (auto op = v.getDefiningOp<ConvertOp>()) {
+      return getActualDefiningConstant(op->getOperand(0));
+    }
+
+    return nullptr;
+  }
+
+ public:
+  using OpRewritePattern<MulOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(pphlo::MulOp op,
+                                PatternRewriter& rewriter) const override {
+    auto lhs = getActualDefiningConstant(op.getLhs());
+    auto rhs = getActualDefiningConstant(op.getRhs());
+    if (!lhs && !rhs) {
+      return failure();
+    }
+
+    // x * 1 -> x
+    if (rhs && (matchPattern(rhs.getValue(), m_One()) ||
+                matchPattern(rhs.getValue(), m_OneFloat()))) {
+      rewriter.replaceOpWithNewOp<ConvertOp>(op, op.getResult().getType(),
+                                             op.getLhs());
+      return success();
+    }
+
+    // x * 0 -> 0
+    if (rhs && (matchPattern(rhs.getValue(), m_Zero()) ||
+                matchPattern(rhs.getValue(), m_AnyZeroFloat()))) {
+      rewriter.replaceOpWithNewOp<ConvertOp>(op, op.getResult().getType(),
+                                             op.getRhs());
+      return success();
+    }
+
+    return failure();
+  }
+};
+
+#include "libspu/dialect/pphlo/IR/canonicalization_patterns.cc.inc"
 
 void DotGeneralOp::getCanonicalizationPatterns(RewritePatternSet& results,
                                                MLIRContext* context) {
   results.add<TransposeReshapeGenericDotGeneral>(context);
 }
 
+void DotOp::getCanonicalizationPatterns(RewritePatternSet& results,
+                                        MLIRContext* context) {
+  results.add<NormalizeDotShape>(context);
+}
+
 void ConvolutionOp::getCanonicalizationPatterns(RewritePatternSet& results,
                                                 MLIRContext* context) {
-  results.add<NormalizeDimensionOrder>(context);
+  results.add<NormalizeDimensionOrder, NormalizeConv1D>(context);
 }
 
 void SelectOp::getCanonicalizationPatterns(::mlir::RewritePatternSet& results,
@@ -423,9 +631,19 @@ void SelectOp::getCanonicalizationPatterns(::mlir::RewritePatternSet& results,
   results.add<FusePredNegIntoSelect>(context);
 }
 
+void DivOp::getCanonicalizationPatterns(::mlir::RewritePatternSet& results,
+                                        ::mlir::MLIRContext* context) {
+  results.add<DivToReciprocal>(context);
+}
+
 void CustomCallOp::getCanonicalizationPatterns(RewritePatternSet& results,
                                                MLIRContext* context) {
   results.add<MarkValueOnlyTopK>(context);
 }
 
-}  // namespace mlir::spu::pphlo
\ No newline at end of file
+void MulOp::getCanonicalizationPatterns(RewritePatternSet& results,
+                                        MLIRContext* context) {
+  results.add<MergeMulConstant>(context);
+}
+
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/canonicalization_patterns.td b/libspu/dialect/pphlo/IR/canonicalization_patterns.td
similarity index 95%
rename from libspu/dialect/pphlo/canonicalization_patterns.td
rename to libspu/dialect/pphlo/IR/canonicalization_patterns.td
index 1c2dffce..4289e73b 100644
--- a/libspu/dialect/pphlo/canonicalization_patterns.td
+++ b/libspu/dialect/pphlo/IR/canonicalization_patterns.td
@@ -16,7 +16,7 @@
 // Canonicalization patterns for the MHLO dialect.
 
 include "mlir/IR/PatternBase.td"
-include "libspu/dialect/pphlo/ops.td"
+include "libspu/dialect/pphlo/IR/ops.td"
 
 // select(not(p), t, f) => select(p, f, t)
 def FusePredNegIntoSelect : Pat<
diff --git a/libspu/dialect/pphlo/dialect.cc b/libspu/dialect/pphlo/IR/dialect.cc
similarity index 65%
rename from libspu/dialect/pphlo/dialect.cc
rename to libspu/dialect/pphlo/IR/dialect.cc
index 7cac0700..282fb2bc 100644
--- a/libspu/dialect/pphlo/dialect.cc
+++ b/libspu/dialect/pphlo/IR/dialect.cc
@@ -12,40 +12,40 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
 
 #include "llvm/ADT/TypeSwitch.h"  // IWYU pragma: keep
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpDefinition.h"
 
-#include "libspu/dialect/pphlo/attrs.h"  // IWYU pragma: keep
-#include "libspu/dialect/pphlo/ops.h"    // IWYU pragma: keep
-#include "libspu/dialect/pphlo/types.h"  // IWYU pragma: keep
+#include "libspu/dialect/pphlo/IR/attrs.h"  // IWYU pragma: keep
+#include "libspu/dialect/pphlo/IR/ops.h"    // IWYU pragma: keep
+#include "libspu/dialect/pphlo/IR/types.h"  // IWYU pragma: keep
 
 #define GET_ATTRDEF_CLASSES
-#include "libspu/dialect/pphlo/attrs.cc.inc"
+#include "libspu/dialect/pphlo/IR/attrs.cc.inc"
 
 #define GET_TYPEDEF_CLASSES
-#include "libspu/dialect/pphlo/dialect.cc.inc"
-#include "libspu/dialect/pphlo/types.cc.inc"
+#include "libspu/dialect/pphlo/IR/dialect.cc.inc"
+#include "libspu/dialect/pphlo/IR/types.cc.inc"
 
 namespace mlir::spu::pphlo {
 
 void PPHloDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
-#include "libspu/dialect/pphlo/ops.cc.inc"
+#include "libspu/dialect/pphlo/IR/ops.cc.inc"
       >();
 
   addTypes<
 #define GET_TYPEDEF_LIST
-#include "libspu/dialect/pphlo/types.cc.inc"
+#include "libspu/dialect/pphlo/IR/types.cc.inc"
       >();
 
   addAttributes<
 #define GET_ATTRDEF_LIST
-#include "libspu/dialect/pphlo/attrs.cc.inc"
+#include "libspu/dialect/pphlo/IR/attrs.cc.inc"
       >();
 }
 
@@ -89,4 +89,16 @@ void PPHloDialect::printAttribute(Attribute attr, DialectAsmPrinter& os) const {
   assert(succeeded(result));
 }
 
+/// Hook to materialize a single constant operation from a given attribute value
+/// with the desired resultant type. This method should use the provided builder
+/// to create the operation without changing the insertion position. The
+/// generated operation is expected to be constant-like. On success, this hook
+/// should return the value generated to represent the constant value.
+/// Otherwise, it should return nullptr on failure.
+Operation* PPHloDialect::materializeConstant(OpBuilder& builder,
+                                             Attribute value, Type type,
+                                             Location loc) {
+  return builder.create<ConstantOp>(loc, type, dyn_cast<ElementsAttr>(value));
+}
+
 }  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/dialect.h b/libspu/dialect/pphlo/IR/dialect.h
similarity index 94%
rename from libspu/dialect/pphlo/dialect.h
rename to libspu/dialect/pphlo/IR/dialect.h
index 7ee6896d..b6a7e8a5 100644
--- a/libspu/dialect/pphlo/dialect.h
+++ b/libspu/dialect/pphlo/IR/dialect.h
@@ -20,4 +20,4 @@
 // PPHLO Dialect
 //===----------------------------------------------------------------------===//
 
-#include "libspu/dialect/pphlo/dialect.h.inc"
+#include "libspu/dialect/pphlo/IR/dialect.h.inc"
diff --git a/libspu/dialect/pphlo/dialect.td b/libspu/dialect/pphlo/IR/dialect.td
similarity index 98%
rename from libspu/dialect/pphlo/dialect.td
rename to libspu/dialect/pphlo/IR/dialect.td
index 2a52b5f4..de2c648c 100644
--- a/libspu/dialect/pphlo/dialect.td
+++ b/libspu/dialect/pphlo/IR/dialect.td
@@ -41,6 +41,7 @@ def PPHlo_Dialect : Dialect {
   let useDefaultAttributePrinterParser = 0;
   let useDefaultTypePrinterParser = 0;
   let usePropertiesForAttributes = 0;
+  let hasConstantMaterializer = 1;
   let extraClassDeclaration = [{
     Attribute parseAttribute(DialectAsmParser & parser, Type type)
         const override;
diff --git a/libspu/dialect/pphlo/IR/fold.cc b/libspu/dialect/pphlo/IR/fold.cc
new file mode 100644
index 00000000..7946c07b
--- /dev/null
+++ b/libspu/dialect/pphlo/IR/fold.cc
@@ -0,0 +1,159 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mlir/Dialect/CommonFolders.h"
+#include "mlir/Dialect/UB/IR/UBOps.h"  // IWYU pragma: keep   PoisonAttr
+
+#include "libspu/dialect/pphlo/IR/ops.h"
+
+namespace mlir::spu::pphlo {
+
+OpFoldResult ConstantOp::fold([[maybe_unused]] FoldAdaptor adaptor) {
+  assert(adaptor.getOperands().empty() && "constant has no operands");
+
+  // Return the held attribute value.
+  return getValue();
+}
+
+OpFoldResult ConvertOp::fold(FoldAdaptor) {
+  if (getOperand().getType() == getResult().getType()) {
+    return getOperand();
+  }
+  return {};
+}
+
+OpFoldResult ReverseOp::fold(FoldAdaptor) {
+  auto input = getOperand();
+
+  // No dimensions to reverse.
+  auto dims = getDimensions();
+  if (dims.empty()) {
+    return input;
+  }
+
+  // If the dimensions to reverse are all statically 1, then the reverse is a
+  // no-op.
+  auto shapedType = mlir::dyn_cast<ShapedType>(input.getType());
+  if (llvm::all_of(
+          dims, [&](int64_t dim) { return shapedType.getDimSize(dim) == 1; })) {
+    return input;
+  }
+  return {};
+}
+
+OpFoldResult ReciprocalOp::fold(FoldAdaptor adaptor) {
+  return constFoldUnaryOp<FloatAttr>(adaptor.getOperands(),
+                                     [](const APFloat& a) {
+                                       APFloat one(a.getSemantics(), 1);
+                                       return one / a;
+                                     });
+}
+
+OpFoldResult ReshapeOp::fold(FoldAdaptor) {
+  auto operand_shape =
+      mlir::dyn_cast<TensorType>(getOperand().getType()).getShape();
+  auto result_shape =
+      mlir::dyn_cast<TensorType>(getResult().getType()).getShape();
+  if (operand_shape == result_shape) {
+    return getOperand();
+  }
+  return {};
+}
+
+OpFoldResult MulOp::fold(FoldAdaptor adaptor) {
+  if (!adaptor.getLhs() || !adaptor.getRhs()) {
+    return {};
+  }
+
+  if (isa<TypedAttr>(adaptor.getLhs()) && isa<TypedAttr>(adaptor.getRhs())) {
+    auto lhs = cast<DenseIntOrFPElementsAttr>(adaptor.getLhs());
+    auto rhs = cast<DenseIntOrFPElementsAttr>(adaptor.getRhs());
+
+    if (lhs.getType() == rhs.getType()) {
+      // int * int
+      if (isa<IntegerType>(lhs.getElementType())) {
+        return constFoldBinaryOp<IntegerAttr>(
+            adaptor.getOperands(),
+            [](const APInt& a, const APInt& b) { return a * b; });
+      }
+      // float * float
+      if (isa<FloatType>(lhs.getElementType())) {
+        return constFoldBinaryOp<FloatAttr>(
+            adaptor.getOperands(),
+            [](const APFloat& a, const APFloat& b) { return a * b; });
+      }
+    }
+
+    // mixed type, currently only handle splat
+    if (isa<SplatElementsAttr>(adaptor.getLhs()) &&
+        isa<SplatElementsAttr>(adaptor.getRhs())) {
+      // Both operands are splats so we can avoid expanding the values out and
+      // just fold based on the splat value.
+      auto lhs = cast<SplatElementsAttr>(adaptor.getLhs());
+      auto rhs = cast<SplatElementsAttr>(adaptor.getRhs());
+
+      auto calc = [](const APFloat& lhs, const APInt& rhs, bool rhs_is_signed) {
+        APFloat rhs_f = APFloat(lhs.getSemantics());
+        rhs_f.convertFromAPInt(rhs, rhs_is_signed,
+                               APFloat::roundingMode::NearestTiesToEven);
+
+        return rhs_f * lhs;
+      };
+
+      if (isa<FloatType>(lhs.getElementType()) &&
+          isa<IntegerType>(rhs.getElementType())) {
+        auto lhs_v = lhs.getSplatValue<APFloat>();
+        auto rhs_v = rhs.getSplatValue<APInt>();
+        auto rhs_isSigned =
+            !(dyn_cast<IntegerType>(rhs.getElementType()).isUnsigned());
+
+        auto elementResult = calc(lhs_v, rhs_v, rhs_isSigned);
+
+        return DenseElementsAttr::get(cast<ShapedType>(lhs.getType()),
+                                      elementResult);
+      } else if (isa<IntegerType>(lhs.getElementType()) &&
+                 isa<FloatType>(rhs.getElementType())) {
+        auto lhs_v = lhs.getSplatValue<APInt>();
+        auto rhs_v = rhs.getSplatValue<APFloat>();
+        auto lhs_isSigned =
+            !(dyn_cast<IntegerType>(lhs.getElementType()).isUnsigned());
+
+        auto elementResult = calc(rhs_v, lhs_v, lhs_isSigned);
+
+        return DenseElementsAttr::get(cast<ShapedType>(rhs.getType()),
+                                      elementResult);
+      }
+    }
+  }
+
+  return {};
+}
+
+OpFoldResult TransposeOp::fold(FoldAdaptor) {
+  for (const auto& it : llvm::enumerate(getPermutation())) {
+    if (static_cast<int64_t>(it.index()) != it.value()) {
+      return {};
+    }
+  }
+  return getOperand();
+}
+
+OpFoldResult SliceOp::fold(FoldAdaptor) {
+  if (getOperand().getType() == getResult().getType()) {
+    return getOperand();
+  }
+  return {};
+}
+
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/interface.h b/libspu/dialect/pphlo/IR/interface.h
similarity index 94%
rename from libspu/dialect/pphlo/interface.h
rename to libspu/dialect/pphlo/IR/interface.h
index a6d41b0d..3a457d9c 100644
--- a/libspu/dialect/pphlo/interface.h
+++ b/libspu/dialect/pphlo/IR/interface.h
@@ -19,4 +19,4 @@
 //===----------------------------------------------------------------------===//
 // PPHLO Interface
 //===----------------------------------------------------------------------===//
-#include "libspu/dialect/pphlo/interface.h.inc"
+#include "libspu/dialect/pphlo/IR/interface.h.inc"
diff --git a/libspu/dialect/pphlo/interface.td b/libspu/dialect/pphlo/IR/interface.td
similarity index 100%
rename from libspu/dialect/pphlo/interface.td
rename to libspu/dialect/pphlo/IR/interface.td
diff --git a/libspu/dialect/pphlo/ops.cc b/libspu/dialect/pphlo/IR/ops.cc
similarity index 97%
rename from libspu/dialect/pphlo/ops.cc
rename to libspu/dialect/pphlo/IR/ops.cc
index 9eee163a..564d44e1 100644
--- a/libspu/dialect/pphlo/ops.cc
+++ b/libspu/dialect/pphlo/IR/ops.cc
@@ -12,16 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 #include "fmt/format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/TypeUtilities.h"
 
-#include "libspu/dialect/pphlo/attrs.h"
-#include "libspu/dialect/pphlo/base_enums.h"
-#include "libspu/dialect/pphlo/ops.h.inc"
+#include "libspu/dialect/pphlo/IR/ops.h.inc"
 
 namespace mlir::spu::pphlo {
 
@@ -508,6 +506,19 @@ LogicalResult SliceOp::verify() {
   return success();
 }
 
+LogicalResult BitcastConvertOp::verify() {
+  auto operand_type = getOperand().getType();
+  auto ret_type = getType();
+  TypeTools tools(getContext());
+
+  if (tools.getTypeVisibility(ret_type) !=
+      tools.getTypeVisibility(operand_type)) {
+    return emitOpError("should not change visibility type");
+  }
+
+  return success();
+}
+
 void CustomCallOp::getEffects(
     SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>&
         effects) {
@@ -526,4 +537,4 @@ void CustomCallOp::getEffects(
 }  // namespace mlir::spu::pphlo
 
 #define GET_OP_CLASSES
-#include "libspu/dialect/pphlo/ops.cc.inc"
+#include "libspu/dialect/pphlo/IR/ops.cc.inc"
diff --git a/libspu/dialect/pphlo/ops.h b/libspu/dialect/pphlo/IR/ops.h
similarity index 90%
rename from libspu/dialect/pphlo/ops.h
rename to libspu/dialect/pphlo/IR/ops.h
index c15c1595..671c521e 100644
--- a/libspu/dialect/pphlo/ops.h
+++ b/libspu/dialect/pphlo/IR/ops.h
@@ -18,7 +18,7 @@
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
-#include "libspu/dialect/pphlo/assembly_format.h"
+#include "libspu/dialect/pphlo/IR/assembly_format.h"
 
 namespace mlir::spu::pphlo::OpTrait {
 
@@ -50,8 +50,8 @@ class PairwiseSameOperandAndResultType
 }  // namespace mlir::spu::pphlo::OpTrait
 
 // Put it here
-#include "libspu/dialect/pphlo/attrs.h"
-#include "libspu/dialect/pphlo/types.h"
+#include "libspu/dialect/pphlo/IR/attrs.h"
+#include "libspu/dialect/pphlo/IR/types.h"
 
 #define GET_OP_CLASSES
-#include "libspu/dialect/pphlo/ops.h.inc"
+#include "libspu/dialect/pphlo/IR/ops.h.inc"
diff --git a/libspu/dialect/pphlo/ops.td b/libspu/dialect/pphlo/IR/ops.td
similarity index 97%
rename from libspu/dialect/pphlo/ops.td
rename to libspu/dialect/pphlo/IR/ops.td
index 6fa0df10..b9f6b0c2 100644
--- a/libspu/dialect/pphlo/ops.td
+++ b/libspu/dialect/pphlo/IR/ops.td
@@ -22,9 +22,9 @@ include "mlir/IR/OpBase.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 
-include "attrs.td"
-include "base_enums.td"
-include "types.td"
+include "libspu/dialect/pphlo/IR/attrs.td"
+include "libspu/dialect/pphlo/IR/base_enums.td"
+include "libspu/dialect/pphlo/IR/types.td"
 
 //===----------------------------------------------------------------------===//
 // Common traits
@@ -114,7 +114,6 @@ def PPHLO_ConstantOp : PPHLO_Op<"constant", [ConstantLike, Pure]> {
   let arguments = (ins ElementsAttr : $value);
   let results = (outs PPHLO_Tensor : $output);
   let builders = [OpBuilder<(ins "Attribute" : $value)>];
-  let skipDefaultBuilders = 1;
   let hasFolder = 1;
   let hasCustomAssemblyFormat = 1;
 }
@@ -263,6 +262,20 @@ def PPHLO_NotOp
   }];
 }
 
+def PPHLO_PopcntOp : PPHLO_UnaryElementwiseOp<"popcnt", [Pure, SameOperandsAndResultType], PPHLO_IntTensor> {
+  let summary = "Popcnt operator, ties away from zero";
+  let description = [{
+    Performs element-wise count of the number of bits set in the `operand` tensor and produces a `result` tensor.
+
+    Ref https://github.com/openxla/stablehlo/blob/main/docs/spec.md#popcnt
+  }];
+
+  let arguments = (ins
+      PPHLO_IntTensor: $operand,
+      OptionalAttr<I64Attr>: $bits
+  );
+}
+
 def PPHLO_RoundOp
     : PPHLO_UnaryElementwiseOpWithTypeInfer<"round_nearest_afz", [SameOperandsAndResultType], PPHLO_FpTensor> {
   let summary = "Round operator, ties away from zero";
@@ -306,7 +319,14 @@ def PPHLO_SignOp
     Returns the sign of the `operand` element-wise and produces a `result` tensor.
 
     Ref https://github.com/openxla/stablehlo/blob/main/docs/spec.md#sign
+
+    PPHLO Extension: when `ignore_zero` is set to true, sign does not enforce sign(0) to 0
   }];
+
+  let arguments = (ins
+      PPHLO_Tensor: $operand,
+      DefaultValuedAttr<BoolAttr, "false">: $ignore_zero
+  );
 }
 
 def PPHLO_SineOp
@@ -359,6 +379,8 @@ def PPHLO_DivOp : PPHLO_BinaryElementwiseOpWithTypeInfer<"divide", [], PPHLO_Ten
 
     Ref https://github.com/openxla/stablehlo/blob/main/docs/spec.md#divide
   }];
+
+  let hasCanonicalizer = 1;
 }
 
 def PPHLO_MaxOp
@@ -389,6 +411,8 @@ def PPHLO_MulOp
 
     Ref https://github.com/openxla/stablehlo/blob/main/docs/spec.md#multiply
   }];
+  let hasFolder = 1;
+  let hasCanonicalizer = 1;
 }
 
 def PPHLO_PowOp : PPHLO_BinaryElementwiseOpWithTypeInfer<"power", [], PPHLO_Tensor> {
@@ -573,6 +597,7 @@ def PPHLO_BitcastConvertOp : PPHLO_UnaryElementwiseOp<"bitcast_convert", [Pure],
 
     Ref https://github.com/openxla/stablehlo/blob/main/docs/spec.md#bitcast_convert
   }];
+  let hasVerifier = 1;
 }
 
 def PPHLO_BroadcastOp
@@ -660,7 +685,8 @@ def PPHLO_ConvolutionOp : PPHLO_Op<"convolution", [Pure]> {
   let hasCanonicalizer = 1;
 }
 
-def PPHLO_DotOp : PPHLO_Op<"dot", [Pure]> {
+def PPHLO_DotOp : PPHLO_Op<"dot", [Pure, DeclareOpInterfaceMethods<InferTypeOpInterface>,
+                                         DeclareOpInterfaceMethods<InferShapedTypeOpInterface>]> {
   let summary = "Dot operator";
   let description = [{
     Performs dot products between vectors, vector/matrix and matrix/matrix
@@ -675,6 +701,8 @@ def PPHLO_DotOp : PPHLO_Op<"dot", [Pure]> {
     $lhs `,` $rhs attr-dict
       `:` functional-type(operands, results)
   }];
+
+  let hasCanonicalizer = 1;
 }
 
 def PPHLO_DotGeneralOp: PPHLO_Op<"dot_general", [Pure]> {
@@ -903,7 +931,7 @@ def PPHLO_SelectAndScatterOp: PPHLO_Op<"select_and_scatter",
   let results = (outs PPHLO_Tensor);
 }
 
-def PPHLO_SliceOp : PPHLO_Op<"slice", [
+def PPHLO_SliceOp : PPHLO_WithShapeInferOp<"slice", [
   Pure, SameOperandsAndResultElementType,
   AllMatchSameOperatorTrait<["start_indices", "limit_indices",
       "strides"], "$_self.size()", "size"> /*slice_c2*/,
@@ -923,6 +951,7 @@ def PPHLO_SliceOp : PPHLO_Op<"slice", [
 
   let results = (outs PPHLO_AnyTensor);
   let hasVerifier = 1;
+  let hasFolder = 1;
 
   let assemblyFormat = [{
     $operand custom<SliceRanges>($start_indices, $limit_indices, $strides)
@@ -955,7 +984,7 @@ def PPHLO_SortOp
 
 
 def PPHLO_TransposeOp
-    : PPHLO_Op<"transpose", [Pure, SameOperandsAndResultElementType]> {
+    : PPHLO_WithShapeInferOp<"transpose", [Pure, SameOperandsAndResultElementType]> {
   let summary = "Transpose operator";
   let description = [{
     Permutes the dimensions of `operand` tensor using `permutation` and produces a `result` tensor.
@@ -1110,14 +1139,6 @@ def PPHLO_ReciprocalOp: PPHLO_UnaryElementwiseOpWithTypeInfer<"reciprocal",
   let hasFolder = 1;
 }
 
-def PPHLO_PreferAOp : PPHLO_UnaryElementwiseOpWithTypeInfer<"prefer_a",
-                          [SameOperandsAndResultType], PPHLO_Tensor> {
-  let summary = "Prefer AShare operator";
-  let description = [{
-    Convert input to AShare if possible.
-  }];
-}
-
 def PPHLO_MaxPoolScatterOp: PPHLO_Op<"maxpool_scatter", [Pure]> {
   let summary = "MaxPool Scatter operator";
   let description = [{
diff --git a/libspu/dialect/pphlo/print_parse.cc b/libspu/dialect/pphlo/IR/print_parse.cc
similarity index 99%
rename from libspu/dialect/pphlo/print_parse.cc
rename to libspu/dialect/pphlo/IR/print_parse.cc
index 2865db59..8716e011 100644
--- a/libspu/dialect/pphlo/print_parse.cc
+++ b/libspu/dialect/pphlo/IR/print_parse.cc
@@ -15,7 +15,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "mlir/IR/TypeUtilities.h"
 
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 namespace mlir::spu::pphlo {
 
diff --git a/libspu/dialect/pphlo/type_inference.cc b/libspu/dialect/pphlo/IR/type_inference.cc
similarity index 70%
rename from libspu/dialect/pphlo/type_inference.cc
rename to libspu/dialect/pphlo/IR/type_inference.cc
index bccab6ba..4e3a9464 100644
--- a/libspu/dialect/pphlo/type_inference.cc
+++ b/libspu/dialect/pphlo/IR/type_inference.cc
@@ -12,9 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "mlir/IR/TypeUtilities.h"
 #include "stablehlo/dialect/TypeInference.h"
 
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/core/prelude.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 namespace mlir::spu::pphlo {
 
@@ -122,7 +124,6 @@ INFER_RETURN_TYPES_FROM_OPERANDS(Atan2Op)
 INFER_RETURN_TYPES_FROM_OPERANDS(DivOp)
 INFER_RETURN_TYPES_FROM_OPERANDS(MaxOp)
 INFER_RETURN_TYPES_FROM_OPERANDS(MinOp)
-INFER_RETURN_TYPES_FROM_OPERANDS(MulOp)
 INFER_RETURN_TYPES_FROM_OPERANDS(OrOp)
 INFER_RETURN_TYPES_FROM_OPERANDS(PowOp)
 INFER_RETURN_TYPES_FROM_OPERANDS(RemOp)
@@ -168,6 +169,117 @@ INFER_RETURN_TYPES_COMP(NotEqualOp)
 
 #undef INFER_RETURN_TYPES_COMP
 
+Type inferMulLikeReturnElementType(Type lhs_type, Type rhs_type) {
+  auto lhs_ft = mlir::dyn_cast<FloatType>(lhs_type);
+  auto rhs_ft = mlir::dyn_cast<FloatType>(rhs_type);
+  // Both float, but different, returns a wider one
+  if (lhs_ft && rhs_ft) {
+    return lhs_ft.getWidth() > rhs_ft.getWidth() ? lhs_type : rhs_type;
+  }
+  // Only one float, returns that float
+  if (lhs_ft || rhs_ft) {
+    return lhs_ft ? lhs_type : rhs_type;
+  }
+
+  auto lhs_it = mlir::dyn_cast<IntegerType>(lhs_type);
+  auto rhs_it = mlir::dyn_cast<IntegerType>(rhs_type);
+
+  SPU_ENFORCE(lhs_it && rhs_it);
+
+  IntegerType::SignednessSemantics sign = lhs_it.getSignedness();
+
+  if (lhs_it.getWidth() == 1) {
+    sign = rhs_it.getSignedness();
+  } else if (rhs_it.getWidth() == 1) {
+    sign = lhs_it.getSignedness();
+  } else if (lhs_it.getSignedness() == rhs_it.getSignedness()) {
+    sign = lhs_it.getSignedness();
+  } else if (lhs_it.getSignedness() !=
+             IntegerType::SignednessSemantics::Unsigned) {
+    sign = lhs_it.getSignedness();
+  } else {
+    sign = rhs_it.getSignedness();
+  }
+
+  return IntegerType::get(lhs_type.getContext(),
+                          std::max(lhs_it.getWidth(), rhs_it.getWidth()), sign);
+}
+
+LogicalResult MulOp::inferReturnTypes(
+    ::mlir::MLIRContext* context, ::std::optional<::mlir::Location> location,
+    ::mlir::ValueRange operands, ::mlir::DictionaryAttr attributes,
+    ::mlir::OpaqueProperties properties, ::mlir::RegionRange regions,
+    ::llvm::SmallVectorImpl<::mlir::Type>& inferredReturnTypes) {
+  auto types = operands.getTypes();
+  TypeTools tools(context);
+
+  // Result shape, guaranteed by op
+  auto shape = mlir::dyn_cast<RankedTensorType>(types.front()).getShape();
+
+  // common vis
+  auto common_vis = tools.computeCommonVisibility(
+      {tools.getTypeVisibility(types[0]), tools.getTypeVisibility(types[1])});
+
+  // element type
+  auto element_type = inferMulLikeReturnElementType(
+      getElementTypeOrSelf(tools.getExpressedType(types[0])),
+      getElementTypeOrSelf(tools.getExpressedType(types[1])));
+
+  inferredReturnTypes.emplace_back(
+      tools.getType(RankedTensorType::get(shape, element_type), common_vis));
+  return success();
+}
+
+LogicalResult DotOp::inferReturnTypes(
+    ::mlir::MLIRContext* context, ::std::optional<::mlir::Location> location,
+    ::mlir::ValueRange operands, ::mlir::DictionaryAttr attributes,
+    ::mlir::OpaqueProperties properties, ::mlir::RegionRange regions,
+    ::llvm::SmallVectorImpl<::mlir::Type>& inferredReturnTypes) {
+  auto types = operands.getTypes();
+  TypeTools tools(context);
+
+  auto lhsType = mlir::dyn_cast<RankedTensorType>(types[0]);
+  auto rhsType = mlir::dyn_cast<RankedTensorType>(types[1]);
+
+  llvm::SmallVector<int64_t> dimensions;
+
+  // Result shape, guaranteed by op
+  if (1 == lhsType.getRank() && 1 == rhsType.getRank() &&
+      // vector dot vector
+      (lhsType.getDimSize(0) == rhsType.getDimSize(0))) {
+  } else if (2 == lhsType.getRank() && 1 == rhsType.getRank() &&
+             (lhsType.getDimSize(1) == rhsType.getDimSize(0))) {
+    // matrix dot vector
+    dimensions.push_back(lhsType.getDimSize(0));
+  } else if (1 == lhsType.getRank() && 2 == rhsType.getRank() &&
+             (lhsType.getDimSize(0) == rhsType.getDimSize(0))) {
+    // vector dot matrix
+    dimensions.push_back(rhsType.getDimSize(1));
+  } else if (2 == lhsType.getRank() && 2 == rhsType.getRank() &&
+             (lhsType.getDimSize(1) == rhsType.getDimSize(0))) {
+    // matrix dot matrix
+    dimensions.push_back(lhsType.getDimSize(0));
+    dimensions.push_back(rhsType.getDimSize(1));
+  } else {
+    return emitOptionalError(location,
+                             "expected both lhs/rhs ranks to be "
+                             "either 1 or 2");
+  }
+
+  // common vis
+  auto common_vis = tools.computeCommonVisibility(
+      {tools.getTypeVisibility(types[0]), tools.getTypeVisibility(types[1])});
+
+  // element type
+  auto element_type = inferMulLikeReturnElementType(
+      getElementTypeOrSelf(tools.getExpressedType(types[0])),
+      getElementTypeOrSelf(tools.getExpressedType(types[1])));
+
+  inferredReturnTypes.emplace_back(tools.getType(
+      RankedTensorType::get(dimensions, element_type), common_vis));
+  return success();
+}
+
 LogicalResult PadOp::inferReturnTypes(
     ::mlir::MLIRContext* context, ::std::optional<::mlir::Location> location,
     ::mlir::ValueRange operands, ::mlir::DictionaryAttr attributes,
@@ -190,6 +302,25 @@ LogicalResult ConcatenateOp::inferReturnTypes(
                                  adaptor.getDimension(), inferred_return_types);
 }
 
+LogicalResult TransposeOp::inferReturnTypes(
+    MLIRContext*, std::optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, OpaqueProperties, RegionRange regions,
+    SmallVectorImpl<Type>& inferred_return_types) {
+  TransposeOp::Adaptor adaptor(operands, attributes, {}, regions);
+  return hlo::inferTransposeOp(location, adaptor.getOperand(),
+                               adaptor.getPermutation(), inferred_return_types);
+}
+
+LogicalResult SliceOp::inferReturnTypes(
+    MLIRContext*, std::optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, OpaqueProperties, RegionRange regions,
+    SmallVectorImpl<Type>& inferred_return_types) {
+  SliceOp::Adaptor adaptor(operands, attributes, {}, regions);
+  return hlo::inferSliceOp(location, adaptor.getOperand().getType(),
+                           adaptor.getStartIndices(), adaptor.getLimitIndices(),
+                           adaptor.getStrides(), inferred_return_types);
+}
+
 LogicalResult inferDynamicSliceOp(std::optional<Location> location,
                                   Type operandType, TypeRange startIndicesTypes,
                                   llvm::ArrayRef<int64_t> sliceSizes,
diff --git a/libspu/dialect/pphlo/types.cc b/libspu/dialect/pphlo/IR/types.cc
similarity index 91%
rename from libspu/dialect/pphlo/types.cc
rename to libspu/dialect/pphlo/IR/types.cc
index 9a1c8e11..19a76a01 100644
--- a/libspu/dialect/pphlo/types.cc
+++ b/libspu/dialect/pphlo/IR/types.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/dialect/pphlo/types.h"
+#include "libspu/dialect/pphlo/IR/types.h"
 
 #include "mlir/IR/TypeUtilities.h"
 
@@ -96,4 +96,15 @@ Visibility TypeTools::computeCommonVisibility(
   return Visibility::PUBLIC;
 }
 
+Type TypeTools::getBaseType(const Type &type) const {
+  Type element_type;
+
+  (void)utils::StripAllContainerType(type, [&element_type](const Type &t) {
+    element_type = t;
+    return true;
+  });
+
+  return element_type;
+}
+
 }  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/types.h b/libspu/dialect/pphlo/IR/types.h
similarity index 87%
rename from libspu/dialect/pphlo/types.h
rename to libspu/dialect/pphlo/IR/types.h
index 811a49f3..69f71ede 100644
--- a/libspu/dialect/pphlo/types.h
+++ b/libspu/dialect/pphlo/IR/types.h
@@ -18,11 +18,11 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Types.h"
 
-#include "libspu/dialect/pphlo/base_enums.h"
-#include "libspu/dialect/pphlo/interface.h"  // IWYU pragma: keep
+#include "libspu/dialect/pphlo/IR/base_enums.h"
+#include "libspu/dialect/pphlo/IR/interface.h"  // IWYU pragma: keep
 
 #define GET_TYPEDEF_CLASSES
-#include "libspu/dialect/pphlo/types.h.inc"
+#include "libspu/dialect/pphlo/IR/types.h.inc"
 
 namespace mlir::spu::pphlo {
 
@@ -52,6 +52,8 @@ class TypeTools {
 
   // Calculate common visibility
   Visibility computeCommonVisibility(llvm::ArrayRef<Visibility> vis) const;
+
+  Type getBaseType(const Type &type) const;
 };
 
 }  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/types.td b/libspu/dialect/pphlo/IR/types.td
similarity index 97%
rename from libspu/dialect/pphlo/types.td
rename to libspu/dialect/pphlo/IR/types.td
index 6127b3f6..74bceb27 100644
--- a/libspu/dialect/pphlo/types.td
+++ b/libspu/dialect/pphlo/IR/types.td
@@ -22,8 +22,8 @@
 #ifndef SPU_DIALECT_PPHLO_TYPES
 #define SPU_DIALECT_PPHLO_TYPES
 
-include "dialect.td"
-include "interface.td"
+include "libspu/dialect/pphlo/IR/dialect.td"
+include "libspu/dialect/pphlo/IR/interface.td"
 include "mlir/IR/AttrTypeBase.td"
 include "mlir/IR/BuiltinTypeInterfaces.td"
 
@@ -36,7 +36,7 @@ class PPHLO_BaseType<string name, string typeMnemonic>
 }
 
 def PPHLO_SecretType : PPHLO_BaseType<"Secret", "secret"> {
-  let summary = "A secret value";
+  let summary = "A secret type";
 
   let parameters = (ins "Type":$baseType);
   let builders = [
diff --git a/libspu/dialect/pphlo/fold.cc b/libspu/dialect/pphlo/fold.cc
deleted file mode 100644
index f73784f1..00000000
--- a/libspu/dialect/pphlo/fold.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright 2024 Ant Group Co., Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "libspu/core/prelude.h"
-#include "libspu/dialect/pphlo/ops.h"
-
-namespace mlir::spu::pphlo {
-
-OpFoldResult ConstantOp::fold([[maybe_unused]] FoldAdaptor adaptor) {
-  assert(adaptor.getOperands().empty() && "constant has no operands");
-
-  // Return the held attribute value.
-  return getValue();
-}
-
-OpFoldResult ConvertOp::fold(FoldAdaptor) {
-  auto operand_ty = mlir::dyn_cast<TensorType>(getOperand().getType());
-  auto result_ty = mlir::dyn_cast<TensorType>(getResult().getType());
-  if (operand_ty == result_ty) {
-    return getOperand();
-  }
-
-  return {};
-}
-
-OpFoldResult ReverseOp::fold(FoldAdaptor) {
-  auto input = getOperand();
-
-  // No dimensions to reverse.
-  auto dims = getDimensions();
-  if (dims.empty()) {
-    return input;
-  }
-
-  // If the dimensions to reverse are all statically 1, then the reverse is a
-  // no-op.
-  auto shapedType = mlir::dyn_cast<ShapedType>(input.getType());
-  if (llvm::all_of(
-          dims, [&](int64_t dim) { return shapedType.getDimSize(dim) == 1; })) {
-    return input;
-  }
-  return {};
-}
-
-OpFoldResult ReciprocalOp::fold(FoldAdaptor operands) {
-  auto val =
-      mlir::dyn_cast_or_null<DenseFPElementsAttr>(operands.getOperands()[0]);
-
-  if (!val) {
-    return {};
-  }
-
-  if (val.isSplat()) {
-    auto splat_val = val.getSplatValue<APFloat>();
-    APFloat one(splat_val.getSemantics(), 1);
-
-    return SplatElementsAttr::get(mlir::dyn_cast<ShapedType>(val.getType()),
-                                  one / splat_val);
-  }
-
-  llvm::SmallVector<APFloat, 4> values;
-  values.reserve(val.getNumElements());
-
-  auto first_val = *val.getValues<APFloat>().begin();
-  APFloat one(first_val.getSemantics(), 1);
-
-  for (auto it : val.getValues<APFloat>()) {
-    values.push_back(one / it);
-  }
-
-  return DenseFPElementsAttr::get(mlir::dyn_cast<ShapedType>(val.getType()),
-                                  values);
-}
-
-OpFoldResult ReshapeOp::fold(FoldAdaptor) {
-  auto operand_shape =
-      mlir::dyn_cast<TensorType>(getOperand().getType()).getShape();
-  auto result_shape =
-      mlir::dyn_cast<TensorType>(getResult().getType()).getShape();
-  if (operand_shape == result_shape) {
-    return getOperand();
-  }
-  return {};
-}
-
-OpFoldResult TransposeOp::fold(FoldAdaptor) {
-  for (const auto& it : llvm::enumerate(getPermutation())) {
-    if (static_cast<int64_t>(it.index()) != it.value()) {
-      return {};
-    }
-  }
-  return getOperand();
-}
-
-}  // namespace mlir::spu::pphlo
\ No newline at end of file
diff --git a/libspu/dialect/pphlo/transforms/BUILD.bazel b/libspu/dialect/pphlo/transforms/BUILD.bazel
new file mode 100644
index 00000000..e53f46b8
--- /dev/null
+++ b/libspu/dialect/pphlo/transforms/BUILD.bazel
@@ -0,0 +1,76 @@
+# Copyright 2021 Ant Group Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library")
+load("//bazel:spu.bzl", "spu_cc_library")
+
+gentbl_cc_library(
+    name = "pphlo_pass_inc_gen",
+    tbl_outs = [
+        (
+            ["-gen-pass-decls"],
+            "passes.h.inc",
+        ),
+    ],
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
+    td_file = "passes.td",
+    visibility = [
+        "//visibility:private",  # This target is a private detail of pass implementations
+    ],
+    deps = [
+        "@llvm-project//mlir:PassBaseTdFiles",
+    ],
+)
+
+gentbl_cc_library(
+    name = "decompose_patterns_inc_gen",
+    tbl_outs = [
+        (
+            ["-gen-rewriters"],
+            "decompose_patterns.cc.inc",
+        ),
+    ],
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
+    td_file = "decompose_patterns.td",
+    visibility = [
+        "//visibility:private",  # This target is a private detail of pass implementations
+    ],
+    deps = [
+        "//libspu/dialect/pphlo/IR:dialect_td_files",
+        "@llvm-project//mlir:FuncTdFiles",
+    ],
+)
+
+spu_cc_library(
+    name = "all_passes",
+    srcs = glob([
+        "*.cc",
+    ]),
+    hdrs = glob([
+        "*.h",
+    ]),
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        ":decompose_patterns_inc_gen",
+        ":pphlo_pass_inc_gen",
+        "//libspu/compiler/utils",
+        "//libspu/device:intrinsic_table",
+        "//libspu/dialect/pphlo/IR:dialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:TransformUtils",
+        "@stablehlo//:stablehlo_ops",
+    ],
+)
diff --git a/libspu/compiler/passes/convert_push_down.cc b/libspu/dialect/pphlo/transforms/convert_push_down.cc
similarity index 92%
rename from libspu/compiler/passes/convert_push_down.cc
rename to libspu/dialect/pphlo/transforms/convert_push_down.cc
index abed5375..07f951a2 100644
--- a/libspu/compiler/passes/convert_push_down.cc
+++ b/libspu/dialect/pphlo/transforms/convert_push_down.cc
@@ -16,9 +16,9 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
@@ -35,7 +35,7 @@ namespace {
 // Makes mixed_mul/dot optimization easier
 template <typename OpT>
 struct TypeAgnosticOpConverter : public OpRewritePattern<OpT> {
-public:
+ public:
   explicit TypeAgnosticOpConverter(MLIRContext *context)
       : OpRewritePattern<OpT>(context) {}
 
@@ -72,7 +72,7 @@ struct ConvertPushDown : public ConvertPushDownBase<ConvertPushDown> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<TypeAgnosticOpConverter<ReshapeOp>,
@@ -80,10 +80,10 @@ struct ConvertPushDown : public ConvertPushDownBase<ConvertPushDown> {
                      TypeAgnosticOpConverter<SliceOp>>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createConvertPushDownPass() {
   return std::make_unique<ConvertPushDown>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/transforms/decompose_ops.cc b/libspu/dialect/pphlo/transforms/decompose_ops.cc
new file mode 100644
index 00000000..cea20679
--- /dev/null
+++ b/libspu/dialect/pphlo/transforms/decompose_ops.cc
@@ -0,0 +1,144 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+
+namespace mlir::spu::pphlo {
+
+namespace {
+
+#include "libspu/dialect/pphlo/transforms/decompose_patterns.cc.inc"
+
+// https://github.com/openxla/stablehlo/blob/main/docs/spec.md#add
+// Boolean add is logical or
+struct BooleanAddDecompose : public OpRewritePattern<AddOp> {
+ private:
+  TypeTools tool_;
+
+ public:
+  explicit BooleanAddDecompose(MLIRContext *context)
+      : OpRewritePattern<AddOp>(context), tool_(context) {}
+
+  LogicalResult matchAndRewrite(AddOp op,
+                                PatternRewriter &rewriter) const override {
+    auto el_type = mlir::dyn_cast<IntegerType>(tool_.getBaseType(op.getType()));
+
+    if (!el_type || el_type.getWidth() > 1) {
+      return failure();
+    }
+
+    rewriter.replaceOpWithNewOp<OrOp>(op, op.getType(), op.getLhs(),
+                                      op.getRhs());
+
+    return success();
+  }
+};
+
+// Rewrites `pphlo.case` to a nested `pphlo.if`.
+struct PublicCaseToNestedIf : public OpRewritePattern<pphlo::CaseOp> {
+  using OpRewritePattern<pphlo::CaseOp>::OpRewritePattern;
+
+  void inlinePPhloRegionIntoNewRegion(PatternRewriter &rewriter, Region &region,
+                                      Region &ring) const {
+    // Remove an existing block, then move the region over.
+    if (!ring.empty()) {
+      rewriter.eraseBlock(&ring.back());
+    }
+    rewriter.inlineRegionBefore(region, ring, ring.end());
+  }
+
+  // Recursively create if/else ops to handle each possible value in a case op.
+  pphlo::IfOp createNestedCases(int currentIdx, pphlo::CaseOp op,
+                                PatternRewriter &outerBuilder) const {
+    Location loc = op.getLoc();
+    Value idxValue = op.getIndex();
+    auto finalIdx = op.getBranches().size() - 2;
+    // Determine if the current index matches the case index.
+    auto scalarType = idxValue.getType();
+    auto shapedType = mlir::cast<ShapedType>(scalarType);
+    auto constAttr = DenseElementsAttr::get(
+        shapedType, {mlir::cast<mlir::Attribute>(
+                        outerBuilder.getI32IntegerAttr(currentIdx))});
+    Value currentIdxVal =
+        outerBuilder.create<pphlo::ConstantOp>(loc, constAttr);
+    auto pphloIf = outerBuilder.create<pphlo::IfOp>(
+        loc, op.getResultTypes(),
+        outerBuilder.create<pphlo::EqualOp>(loc, idxValue, currentIdxVal));
+    inlinePPhloRegionIntoNewRegion(outerBuilder, op.getBranches()[currentIdx],
+                                   pphloIf.getTrueBranch());
+    int nextIdx = currentIdx + 1;
+    // Don't recurse for the final default block.
+    if (currentIdx == static_cast<int64_t>(finalIdx)) {
+      inlinePPhloRegionIntoNewRegion(outerBuilder, op.getBranches()[nextIdx],
+                                     pphloIf.getFalseBranch());
+    } else {
+      PatternRewriter::InsertionGuard guard(outerBuilder);
+      outerBuilder.setInsertionPointToEnd(&pphloIf.getFalseBranch().back());
+      auto innerIf = createNestedCases(nextIdx, op, outerBuilder);
+      outerBuilder.create<pphlo::ReturnOp>(op.getLoc(), innerIf.getResults());
+    }
+    return pphloIf;
+  }
+
+  LogicalResult matchAndRewrite(pphlo::CaseOp op,
+                                PatternRewriter &rewriter) const override {
+    // Inline the op if there is only a default block.
+    if (op.getBranches().size() == 1) {
+      Block &block = op.getBranches().front().front();
+      auto results = block.getTerminator()->getOperands();
+      // Remove the mhlo.return terminator, then inline the block.
+      rewriter.eraseOp(block.getTerminator());
+      rewriter.inlineBlockBefore(/*source=*/&block, /*dest=*/op.getOperation(),
+                                 /*argValues=*/{});
+      rewriter.replaceOp(op, results);
+      return success();
+    }
+
+    TypeTools tools(op->getContext());
+    if (tools.isSecretType(op.getIndex().getType())) {
+      // Leave it to secret cf inline
+      return failure();
+    }
+    // Begin recursion with case 0.
+    rewriter.replaceOp(op, createNestedCases(0, op, rewriter).getResults());
+    return success();
+  }
+};
+
+struct DecomposeOps : public DecomposeOpsBase<DecomposeOps> {
+  void runOnOperation() override {
+    RewritePatternSet patterns(&getContext());
+    populateOwningPatterns(&patterns, &getContext());
+    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
+  }
+
+ private:
+  static void populateOwningPatterns(RewritePatternSet *patterns,
+                                     MLIRContext *ctx) {
+    populateWithGenerated(*patterns);
+    patterns->add<BooleanAddDecompose, PublicCaseToNestedIf>(ctx);
+  }
+};
+}  // namespace
+
+std::unique_ptr<OperationPass<func::FuncOp>> createDecomposeOps() {
+  return std::make_unique<DecomposeOps>();
+}
+
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/transforms/decompose_patterns.td b/libspu/dialect/pphlo/transforms/decompose_patterns.td
new file mode 100644
index 00000000..7ec7fcf9
--- /dev/null
+++ b/libspu/dialect/pphlo/transforms/decompose_patterns.td
@@ -0,0 +1,67 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Canonicalization patterns for the MHLO dialect.
+
+include "mlir/IR/PatternBase.td"
+include "libspu/dialect/pphlo/IR/ops.td"
+
+// not_equal(x, y) => not(equal(x, y))
+def NotEqualToNotWithEqual : Pat<
+  (PPHLO_NotEqualOp $lhs, $rhs),
+  (PPHLO_NotOp (PPHLO_EqualOp $lhs, $rhs))
+>;
+
+// greater_equal(x, y) => !less(x, y)
+def GreaterEqualToNotLess : Pat<
+  (PPHLO_GreaterEqualOp $lhs, $rhs),
+  (PPHLO_NotOp (PPHLO_LessOp $lhs, $rhs))
+>;
+
+// less_equal(x, y) => !greater(x, y)
+def LessEqualToNotGreater : Pat<
+  (PPHLO_LessEqualOp $lhs, $rhs),
+  (PPHLO_NotOp (PPHLO_GreaterOp $lhs, $rhs))
+>;
+
+// max(x, y) => select(greater(x, y), x, y)
+def MaxToSelect: Pat<
+  (PPHLO_MaxOp $lhs, $rhs),
+  (PPHLO_SelectOp (PPHLO_GreaterOp $lhs, $rhs), $lhs, $rhs)
+>;
+
+// min(x, y) => select(less(x, y), x, y)
+def MinToSelect : Pat<
+  (PPHLO_MinOp $lhs, $rhs),
+  (PPHLO_SelectOp (PPHLO_LessOp $lhs, $rhs), $lhs, $rhs)
+>;
+
+// sub(x, y) => add(x, neg(y))
+def SubToAdd : Pat<
+  (PPHLO_SubtractOp $lhs, $rhs),
+  (PPHLO_AddOp $lhs, (PPHLO_NegOp $rhs))
+>;
+
+// greater(x, y) => less(y, x)
+def GreaterToLess : Pat<
+  (PPHLO_GreaterOp $lhs, $rhs),
+  (PPHLO_LessOp $rhs, $lhs)
+>;
+
+// clamp(minv, x, maxv) => min(max(minv, x), maxv)
+def ClampToMinMax : Pat<
+  (PPHLO_ClampOp $minv, $operand, $maxv),
+  (PPHLO_MinOp (PPHLO_MaxOp $minv, $operand), $maxv)
+>;
diff --git a/libspu/compiler/passes/expand_secret_gather.cc b/libspu/dialect/pphlo/transforms/expand_secret_gather.cc
similarity index 97%
rename from libspu/compiler/passes/expand_secret_gather.cc
rename to libspu/dialect/pphlo/transforms/expand_secret_gather.cc
index a822b473..26ea85a8 100644
--- a/libspu/compiler/passes/expand_secret_gather.cc
+++ b/libspu/dialect/pphlo/transforms/expand_secret_gather.cc
@@ -19,9 +19,10 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
 #include "libspu/core/prelude.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/device/intrinsic_table.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 namespace mlir::spu::pphlo {
 
@@ -75,9 +76,8 @@ int64_t GatherLoopTripCount(CustomCallOp op) {
   return trip_count;
 }
 
-llvm::SmallVector<int64_t>
-ComputePermutedShape(llvm::ArrayRef<int64_t> shape,
-                     llvm::ArrayRef<int64_t> permutation) {
+llvm::SmallVector<int64_t> ComputePermutedShape(
+    llvm::ArrayRef<int64_t> shape, llvm::ArrayRef<int64_t> permutation) {
   llvm::SmallVector<int64_t> result_shape;
   for (auto dim : permutation) {
     result_shape.emplace_back(shape[dim]);
@@ -372,11 +372,9 @@ int64_t FindIndex(llvm::ArrayRef<int64_t> c, int64_t value) {
 
 // Expand an index vector from the start_indices tensor into a vector that can
 // be used to dynamic-slice out of the gather operand.
-llvm::SmallVector<Value>
-ExpandIndexVectorIntoOperandSpace(MLIRContext *ctx, OpBuilder *builder,
-                                  Value index_vector, DictionaryAttr attr,
-                                  int64_t operand_rank) {
-
+llvm::SmallVector<Value> ExpandIndexVectorIntoOperandSpace(
+    MLIRContext *ctx, OpBuilder *builder, Value index_vector,
+    DictionaryAttr attr, int64_t operand_rank) {
   TypeTools typetool(ctx);
   auto index_type = typetool.getExpressedType(
       mlir::dyn_cast<RankedTensorType>(index_vector.getType())
@@ -414,7 +412,6 @@ ExpandIndexVectorIntoOperandSpace(MLIRContext *ctx, OpBuilder *builder,
     int64_t index_vector_dim_index = FindIndex(start_index_map, i);
     if (index_vector_dim_index !=
         static_cast<int64_t>(start_index_map.size())) {
-
       auto component_to_concat = builder->create<SliceOp>(
           index_vector.getLoc(),
           RankedTensorType::get(
@@ -538,14 +535,13 @@ void GatherLoopBody(CustomCallOp gather, Region &body, Value operand,
       gather->getLoc(), ValueRange{incremented_counter, updated_accumulator});
 }
 
-// pphlo.gather is custom call now
+// spu.gather is custom call now
 struct GatherConverter : public OpRewritePattern<CustomCallOp> {
   explicit GatherConverter(MLIRContext *context) : OpRewritePattern(context) {}
 
   LogicalResult matchAndRewrite(CustomCallOp op,
                                 PatternRewriter &rewriter) const override {
-
-    if (op.getCallTargetName() != "pphlo.gather") {
+    if (op.getCallTargetName() != GATHER) {
       return failure();
     }
 
@@ -684,16 +680,16 @@ struct ExpandSecretGather : public ExpandSecretGatherBase<ExpandSecretGather> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<GatherConverter>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createExpandSecretGatherPass() {
   return std::make_unique<ExpandSecretGather>();
 }
 
-} // namespace mlir::spu::pphlo
\ No newline at end of file
+}  // namespace mlir::spu::pphlo
\ No newline at end of file
diff --git a/libspu/compiler/passes/hlo_legalize_to_pphlo.cc b/libspu/dialect/pphlo/transforms/hlo_legalize_to_pphlo.cc
similarity index 88%
rename from libspu/compiler/passes/hlo_legalize_to_pphlo.cc
rename to libspu/dialect/pphlo/transforms/hlo_legalize_to_pphlo.cc
index 9c75f199..e17f85e5 100644
--- a/libspu/compiler/passes/hlo_legalize_to_pphlo.cc
+++ b/libspu/dialect/pphlo/transforms/hlo_legalize_to_pphlo.cc
@@ -25,15 +25,17 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "stablehlo/dialect/StablehloOps.h"
 
-#include "libspu/compiler/passes/map_stablehlo_to_pphlo_op.h"
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/value_visibility_map.h"
-#include "libspu/compiler/passes/visibility_inference.h"
 #include "libspu/core/prelude.h"
-#include "libspu/dialect/pphlo/attrs.h"
-#include "libspu/dialect/pphlo/base_enums.h"
-#include "libspu/dialect/pphlo/ops.h"
-#include "libspu/dialect/pphlo/types.h"
+#include "libspu/device/intrinsic_table.h"
+#include "libspu/dialect/pphlo/IR/attrs.h"
+#include "libspu/dialect/pphlo/IR/base_enums.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/IR/types.h"
+#include "libspu/dialect/pphlo/transforms/map_stablehlo_to_pphlo_op.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/value_visibility_map.h"
+#include "libspu/dialect/pphlo/transforms/visibility_inference.h"
+#include "libspu/dialect/utils/utils.h"
 
 namespace mlir::spu::pphlo {
 namespace {
@@ -42,12 +44,10 @@ bool isAll(llvm::ArrayRef<int64_t> values, int64_t value) {
   return llvm::all_of(values, [value](int64_t v) { return v == value; });
 }
 
-ValueVisibilityMap
-VisibilityDiscovery(const llvm::ArrayRef<std::string> input_vis_list,
-                    ModuleOp op) {
+ValueVisibilityMap VisibilityDiscovery(
+    const llvm::ArrayRef<std::string> input_vis_list, ModuleOp op) {
   // Get the main function
-  auto entry_func = op.lookupSymbol<mlir::func::FuncOp>("main");
-
+  auto entry_func = get_entrypoint(op);
   SPU_ENFORCE(entry_func != nullptr, "Cannot find main entry point");
 
   ValueVisibilityMap vis_map;
@@ -82,7 +82,7 @@ VisibilityDiscovery(const llvm::ArrayRef<std::string> input_vis_list,
 
 /// Type converter for mhlo type to pphlo types
 class HloToPPHloTypeConverter : public TypeConverter {
-private:
+ private:
   TypeTools typetools_;
 
   static std::optional<Value> materializeCastFromIllegal(OpBuilder &builder,
@@ -101,7 +101,7 @@ class HloToPPHloTypeConverter : public TypeConverter {
         ->getResult(0);
   }
 
-public:
+ public:
   explicit HloToPPHloTypeConverter(MLIRContext *ctx) : typetools_(ctx) {
     // Keep all types unchanged.
     addConversion([](Type type) -> Type { return type; });
@@ -113,20 +113,20 @@ class HloToPPHloTypeConverter : public TypeConverter {
 };
 
 class FuncOpConverter : public OpConversionPattern<::mlir::func::FuncOp> {
-private:
+ private:
   const ValueVisibilityMap &vis_;
   TypeTools tools_;
 
-public:
+ public:
   FuncOpConverter(TypeConverter &type_converter, MLIRContext *context,
                   const ValueVisibilityMap &vis)
       : OpConversionPattern<::mlir::func::FuncOp>(type_converter, context),
-        vis_(vis), tools_(context) {}
+        vis_(vis),
+        tools_(context) {}
 
-  LogicalResult
-  matchAndRewrite(::mlir::func::FuncOp op,
-                  ::mlir::func::FuncOpAdaptor /*adaptor*/,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      ::mlir::func::FuncOp op, ::mlir::func::FuncOpAdaptor /*adaptor*/,
+      ConversionPatternRewriter &rewriter) const override {
     rewriter.startOpModification(op);
 
     auto functionType = op.getFunctionType();
@@ -197,18 +197,18 @@ class FuncOpConverter : public OpConversionPattern<::mlir::func::FuncOp> {
 };
 
 class BasePPHloOpConverter {
-protected:
+ protected:
   const ValueVisibilityMap &vis_;
   TypeTools typetools_;
   const TypeConverter &converter_;
 
-public:
+ public:
   BasePPHloOpConverter(MLIRContext *ctx, const ValueVisibilityMap &vis,
                        const TypeConverter &converter)
       : vis_(vis), typetools_(ctx), converter_(converter) {}
 
-  llvm::SmallVector<Value, 2>
-  materializeInputs(Operation *op, ValueRange adaptor_range) const {
+  llvm::SmallVector<Value, 2> materializeInputs(
+      Operation *op, ValueRange adaptor_range) const {
     OpBuilder builder(op);
     SmallVector<Value, 2> operands(op->getNumOperands());
 
@@ -232,8 +232,8 @@ class BasePPHloOpConverter {
     return operands;
   }
 
-  llvm::SmallVector<Type>
-  convertResultType(::mlir::Operation::result_range result_range) const {
+  llvm::SmallVector<Type> convertResultType(
+      ::mlir::Operation::result_range result_range) const {
     llvm::SmallVector<Type> result_types(result_range.size());
 
     for (size_t idx = 0; idx < result_types.size(); ++idx) {
@@ -257,16 +257,15 @@ class BasePPHloOpConverter {
 template <typename HloOpTy>
 class HloToPPHloOpConverter : public OpConversionPattern<HloOpTy>,
                               BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<HloOpTy>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(HloOpTy hlo_op,
-                  typename HloToPPHloOpConverter::OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      HloOpTy hlo_op, typename HloToPPHloOpConverter::OpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_type = convertResultType(hlo_op.getResult());
     auto operands = materializeInputs(hlo_op, adaptor.getOperands());
 
@@ -280,16 +279,15 @@ class HloToPPHloOpConverter : public OpConversionPattern<HloOpTy>,
 template <>
 class HloToPPHloOpConverter<mlir::func::ReturnOp>
     : public OpConversionPattern<mlir::func::ReturnOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<mlir::func::ReturnOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(::mlir::func::ReturnOp op,
-                  ::mlir::func::ReturnOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      ::mlir::func::ReturnOp op, ::mlir::func::ReturnOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     Operation *operation = op.getOperation();
     rewriter.modifyOpInPlace(op, [&]() {
       operation->setOperands(materializeInputs(op, adaptor.getOperands()));
@@ -301,16 +299,15 @@ class HloToPPHloOpConverter<mlir::func::ReturnOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::CompareOp>
     : public OpConversionPattern<stablehlo::CompareOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::CompareOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::CompareOp hlo_op,
-                  stablehlo::CompareOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::CompareOp hlo_op, stablehlo::CompareOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_type = convertResultType(hlo_op.getResult());
 
     auto comp_direction = hlo_op.getComparisonDirection();
@@ -344,16 +341,15 @@ class HloToPPHloOpConverter<stablehlo::CompareOp>
 template <>
 struct HloToPPHloOpConverter<stablehlo::ReduceOp>
     : public OpConversionPattern<stablehlo::ReduceOp>, BasePPHloOpConverter {
-
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::ReduceOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::ReduceOp op, stablehlo::ReduceOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::ReduceOp op, stablehlo::ReduceOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     // We may need to materialize operands
     auto materialized_operands = materializeInputs(op, adaptor.getOperands());
     auto result_types = convertResultType(op.getResults());
@@ -396,16 +392,15 @@ template <>
 struct HloToPPHloOpConverter<stablehlo::ReduceWindowOp>
     : public OpConversionPattern<stablehlo::ReduceWindowOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::ReduceWindowOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::ReduceWindowOp op,
-                  stablehlo::ReduceWindowOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::ReduceWindowOp op, stablehlo::ReduceWindowOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     // We may need to materialize operands
     auto materialized_operands = materializeInputs(op, adaptor.getOperands());
     auto result_types = convertResultType(op->getResults());
@@ -506,17 +501,15 @@ struct HloToPPHloOpConverter<stablehlo::ReduceWindowOp>
 template <>
 struct HloToPPHloOpConverter<stablehlo::IfOp>
     : public OpConversionPattern<stablehlo::IfOp>, BasePPHloOpConverter {
-
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::IfOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::IfOp op, stablehlo::IfOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-
+  LogicalResult matchAndRewrite(
+      stablehlo::IfOp op, stablehlo::IfOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_types = convertResultType(op->getResults());
     auto operands = materializeInputs(op, adaptor.getOperands());
 
@@ -548,16 +541,15 @@ struct HloToPPHloOpConverter<stablehlo::IfOp>
 template <>
 struct HloToPPHloOpConverter<stablehlo::CaseOp>
     : public OpConversionPattern<stablehlo::CaseOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::CaseOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::CaseOp op, stablehlo::CaseOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-
+  LogicalResult matchAndRewrite(
+      stablehlo::CaseOp op, stablehlo::CaseOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_types = convertResultType(op->getResults());
 
     // Create new op
@@ -590,15 +582,15 @@ struct HloToPPHloOpConverter<stablehlo::CaseOp>
 template <>
 struct HloToPPHloOpConverter<stablehlo::WhileOp>
     : public OpConversionPattern<stablehlo::WhileOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::WhileOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::WhileOp op, stablehlo::WhileOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::WhileOp op, stablehlo::WhileOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_types = convertResultType(op->getResults());
 
     // Convert cond region signature.
@@ -655,17 +647,17 @@ template <>
 class HloToPPHloOpConverter<stablehlo::BroadcastInDimOp>
     : public OpConversionPattern<stablehlo::BroadcastInDimOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::BroadcastInDimOp>(type_converter,
                                                          context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::BroadcastInDimOp hlo_op,
-                  stablehlo::BroadcastInDimOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::BroadcastInDimOp hlo_op,
+      stablehlo::BroadcastInDimOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto resultType = convertResultType(hlo_op.getResult());
 
     mlir::NamedAttribute dim(
@@ -684,15 +676,14 @@ class HloToPPHloOpConverter<stablehlo::BroadcastInDimOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::ConstantOp>
     : public OpConversionPattern<stablehlo::ConstantOp> {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap & /*unused*/)
       : OpConversionPattern<stablehlo::ConstantOp>(type_converter, context) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::ConstantOp hlo_op,
-                  stablehlo::ConstantOpAdaptor /*adaptor*/,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::ConstantOp hlo_op, stablehlo::ConstantOpAdaptor /*adaptor*/,
+      ConversionPatternRewriter &rewriter) const override {
     rewriter.replaceOpWithNewOp<pphlo::HloToPPHloOp<stablehlo::ConstantOp>>(
         hlo_op, hlo_op.getValue());
     return success();
@@ -702,16 +693,15 @@ class HloToPPHloOpConverter<stablehlo::ConstantOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::IotaOp>
     : public OpConversionPattern<stablehlo::IotaOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::IotaOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::IotaOp hlo_op,
-                  stablehlo::IotaOpAdaptor /*adaptor*/,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::IotaOp hlo_op, stablehlo::IotaOpAdaptor /*adaptor*/,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_type = convertResultType(hlo_op.getResult());
     rewriter.replaceOpWithNewOp<pphlo::HloToPPHloOp<stablehlo::IotaOp>>(
         hlo_op, result_type, hlo_op.getIotaDimension());
@@ -723,15 +713,15 @@ class HloToPPHloOpConverter<stablehlo::IotaOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::DotOp>
     : public OpConversionPattern<stablehlo::DotOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::DotOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::DotOp hlo_op, stablehlo::DotOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::DotOp hlo_op, stablehlo::DotOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_type = convertResultType(hlo_op.getResult());
 
     rewriter.replaceOpWithNewOp<pphlo::HloToPPHloOp<stablehlo::DotOp>>(
@@ -744,7 +734,7 @@ template <>
 class HloToPPHloOpConverter<stablehlo::DotGeneralOp>
     : public OpConversionPattern<stablehlo::DotGeneralOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::DotGeneralOp>(type_converter, context),
@@ -758,16 +748,15 @@ class HloToPPHloOpConverter<stablehlo::DotGeneralOp>
     }
 
     std::vector<int64_t> new_shape(type.getShape());
-    new_shape.emplace_back(1); // Add a trailing one dimension
+    new_shape.emplace_back(1);  // Add a trailing one dimension
     auto new_type = RankedTensorType::get(new_shape, type.getElementType());
     return rewriter.create<pphlo::ReshapeOp>(operand.getLoc(), new_type,
                                              operand);
   }
 
-  LogicalResult
-  matchAndRewrite(stablehlo::DotGeneralOp hlo_op,
-                  stablehlo::DotGeneralOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::DotGeneralOp hlo_op, stablehlo::DotGeneralOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_type = convertResultType(hlo_op.getResult());
     auto operands = materializeInputs(hlo_op, adaptor.getOperands());
 
@@ -788,15 +777,15 @@ class HloToPPHloOpConverter<stablehlo::DotGeneralOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::ReturnOp>
     : public OpConversionPattern<stablehlo::ReturnOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::ReturnOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::ReturnOp op, stablehlo::ReturnOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::ReturnOp op, stablehlo::ReturnOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto operands = materializeInputs(op, adaptor.getOperands());
     rewriter.replaceOpWithNewOp<pphlo::ReturnOp>(op, std::nullopt, operands);
     return success();
@@ -807,17 +796,17 @@ template <>
 struct HloToPPHloOpConverter<stablehlo::SelectAndScatterOp>
     : public OpConversionPattern<stablehlo::SelectAndScatterOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::SelectAndScatterOp>(type_converter,
                                                            context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::SelectAndScatterOp op,
-                  stablehlo::SelectAndScatterOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::SelectAndScatterOp op,
+      stablehlo::SelectAndScatterOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     // Select
     auto operands = materializeInputs(op, adaptor.getOperands());
 
@@ -930,15 +919,15 @@ struct HloToPPHloOpConverter<stablehlo::SelectAndScatterOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::RngOp>
     : public OpConversionPattern<stablehlo::RngOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::RngOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::RngOp op, stablehlo::RngOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::RngOp op, stablehlo::RngOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     Type resultType = convertResultType(op.getResult());
 
     rewriter.replaceOpWithNewOp<pphlo::RngOp>(
@@ -950,15 +939,15 @@ class HloToPPHloOpConverter<stablehlo::RngOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::SortOp>
     : public OpConversionPattern<stablehlo::SortOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::SortOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::SortOp op, stablehlo::SortOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::SortOp op, stablehlo::SortOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto comp_ret = llvm::dyn_cast<stablehlo::ReturnOp>(
         op.getComparator().back().getTerminator());
     SPU_ENFORCE(comp_ret.getNumOperands() == 1,
@@ -1004,7 +993,7 @@ template <>
 class HloToPPHloOpConverter<stablehlo::ConvolutionOp>
     : public OpConversionPattern<stablehlo::ConvolutionOp>,
       BasePPHloOpConverter {
-private:
+ private:
   /// Returns true if the given `attr` is a splat of the given `value`.
   static bool isSplatValue(DenseIntElementsAttr attr, uint64_t value) {
     return attr.isSplat() && attr.getSplatValue<uint64_t>() == value;
@@ -1064,16 +1053,15 @@ class HloToPPHloOpConverter<stablehlo::ConvolutionOp>
         DenseI64ArrayAttr::get(loc.getContext(), padInterior));
   }
 
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::ConvolutionOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::ConvolutionOp op,
-                  stablehlo::ConvolutionOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::ConvolutionOp op, stablehlo::ConvolutionOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto old_attr = op.getDimensionNumbers();
     auto attr = ConvDimensionNumbersAttr::get(
         op->getContext(), old_attr.getInputBatchDimension(),
@@ -1138,15 +1126,15 @@ class HloToPPHloOpConverter<stablehlo::ConvolutionOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::PadOp>
     : public OpConversionPattern<stablehlo::PadOp>, BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::PadOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::PadOp op, stablehlo::PadOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::PadOp op, stablehlo::PadOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     Type result_type = convertResultType(op.getResult());
 
     rewriter.replaceOpWithNewOp<pphlo::PadOp>(
@@ -1161,17 +1149,17 @@ template <>
 class HloToPPHloOpConverter<stablehlo::BitcastConvertOp>
     : public OpConversionPattern<stablehlo::BitcastConvertOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::BitcastConvertOp>(type_converter,
                                                          context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::BitcastConvertOp op,
-                  stablehlo::BitcastConvertOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::BitcastConvertOp op,
+      stablehlo::BitcastConvertOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto in_type_size =
         mlir::dyn_cast<RankedTensorType>(op->getOperandTypes()[0])
             .getElementTypeBitWidth();
@@ -1195,16 +1183,15 @@ template <>
 class HloToPPHloOpConverter<stablehlo::ConcatenateOp>
     : public OpConversionPattern<stablehlo::ConcatenateOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::ConcatenateOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::ConcatenateOp op,
-                  stablehlo::ConcatenateOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::ConcatenateOp op, stablehlo::ConcatenateOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_type = convertResultType(op.getResult());
 
     rewriter.replaceOpWithNewOp<pphlo::ConcatenateOp>(
@@ -1218,16 +1205,15 @@ class HloToPPHloOpConverter<stablehlo::ConcatenateOp>
 template <>
 class HloToPPHloOpConverter<stablehlo::GatherOp>
     : public OpConversionPattern<stablehlo::GatherOp>, BasePPHloOpConverter {
-
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::GatherOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::GatherOp op, stablehlo::GatherOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::GatherOp op, stablehlo::GatherOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto old_attr = op.getDimensionNumbers();
     auto result_vis = vis_.getValueVisibility(op.getResult());
     Type resultType = typetools_.getType(
@@ -1235,7 +1221,7 @@ class HloToPPHloOpConverter<stablehlo::GatherOp>
     auto materialized_operands = materializeInputs(op, adaptor.getOperands());
 
     auto call = rewriter.create<pphlo::CustomCallOp>(
-        op->getLoc(), resultType, materialized_operands, "pphlo.gather");
+        op->getLoc(), resultType, materialized_operands, GATHER);
     auto attr = DictionaryAttr::get(
         op->getContext(),
         {NamedAttribute(
@@ -1265,17 +1251,17 @@ template <>
 class HloToPPHloOpConverter<stablehlo::DynamicUpdateSliceOp>
     : public OpConversionPattern<stablehlo::DynamicUpdateSliceOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::DynamicUpdateSliceOp>(type_converter,
                                                              context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::DynamicUpdateSliceOp op,
-                  stablehlo::DynamicUpdateSliceOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
+  LogicalResult matchAndRewrite(
+      stablehlo::DynamicUpdateSliceOp op,
+      stablehlo::DynamicUpdateSliceOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     auto result_vis = vis_.getValueVisibility(op.getResult());
 
     Type result_type = typetools_.getType(
@@ -1294,17 +1280,15 @@ template <>
 class HloToPPHloOpConverter<stablehlo::CustomCallOp>
     : public OpConversionPattern<stablehlo::CustomCallOp>,
       BasePPHloOpConverter {
-public:
+ public:
   HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
                         const ValueVisibilityMap &vis)
       : OpConversionPattern<stablehlo::CustomCallOp>(type_converter, context),
         BasePPHloOpConverter(context, vis, type_converter) {}
 
-  LogicalResult
-  matchAndRewrite(stablehlo::CustomCallOp op,
-                  stablehlo::CustomCallOpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-
+  LogicalResult matchAndRewrite(
+      stablehlo::CustomCallOp op, stablehlo::CustomCallOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
     llvm::SmallVector<Type> result_types = convertResultType(op->getResults());
 
     auto new_op = rewriter.replaceOpWithNewOp<pphlo::CustomCallOp>(
@@ -1320,13 +1304,37 @@ class HloToPPHloOpConverter<stablehlo::CustomCallOp>
   }
 };
 
+template <>
+class HloToPPHloOpConverter<stablehlo::PopulationCountOp>
+    : public OpConversionPattern<stablehlo::PopulationCountOp>,
+      BasePPHloOpConverter {
+ public:
+  HloToPPHloOpConverter(TypeConverter &type_converter, MLIRContext *context,
+                        const ValueVisibilityMap &vis)
+      : OpConversionPattern<stablehlo::PopulationCountOp>(type_converter,
+                                                          context),
+        BasePPHloOpConverter(context, vis, type_converter) {}
+
+  LogicalResult matchAndRewrite(
+      stablehlo::PopulationCountOp hlo_op,
+      stablehlo::PopulationCountOpAdaptor adaptor,
+      ConversionPatternRewriter &rewriter) const override {
+    auto result_type = convertResultType(hlo_op.getResult());
+    auto operands = materializeInputs(hlo_op, adaptor.getOperands());
+
+    rewriter.replaceOpWithNewOp<pphlo::PopcntOp>(hlo_op, result_type, operands,
+                                                 hlo_op->getAttrs());
+
+    return success();
+  }
+};
+
 struct HloLegalizeToPPHlo
     : public HloLegalizeToPPHloPassBase<HloLegalizeToPPHlo> {
-private:
-  static void
-  populateHLOToPPHloConversionPattern(HloToPPHloTypeConverter &converter,
-                                      RewritePatternSet &patterns,
-                                      const ValueVisibilityMap &vis_map) {
+ private:
+  static void populateHLOToPPHloConversionPattern(
+      HloToPPHloTypeConverter &converter, RewritePatternSet &patterns,
+      const ValueVisibilityMap &vis_map) {
     auto *context = patterns.getContext();
 
     patterns.insert<FuncOpConverter, HloToPPHloOpConverter<func::ReturnOp>,
@@ -1369,6 +1377,7 @@ struct HloLegalizeToPPHlo
                     HloToPPHloOpConverter<stablehlo::NotOp>,
                     HloToPPHloOpConverter<stablehlo::OrOp>,
                     HloToPPHloOpConverter<stablehlo::PadOp>,
+                    HloToPPHloOpConverter<stablehlo::PopulationCountOp>,
                     HloToPPHloOpConverter<stablehlo::PowOp>,
                     HloToPPHloOpConverter<stablehlo::ReduceOp>,
                     HloToPPHloOpConverter<stablehlo::ReduceWindowOp>,
@@ -1399,7 +1408,7 @@ struct HloLegalizeToPPHlo
                                                              vis_map);
   }
 
-public:
+ public:
   HloLegalizeToPPHlo(const HloLegalizeToPPHlo &) = default;
   HloLegalizeToPPHlo() = default;
 
@@ -1474,10 +1483,10 @@ struct HloLegalizeToPPHlo
     }
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> createLegalizeToPPHloPass() {
   return std::make_unique<HloLegalizeToPPHlo>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/transforms/inline_secret_control_flow.cc b/libspu/dialect/pphlo/transforms/inline_secret_control_flow.cc
new file mode 100644
index 00000000..981a8c41
--- /dev/null
+++ b/libspu/dialect/pphlo/transforms/inline_secret_control_flow.cc
@@ -0,0 +1,244 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Region.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+
+namespace mlir::spu::pphlo {
+
+namespace {
+
+class CaseConverter : public OpRewritePattern<CaseOp> {
+ private:
+  TypeTools tools_;
+
+  Value reshapeOrBroadcast(PatternRewriter &rewriter, Location loc, Value in,
+                           RankedTensorType target_type) const {
+    auto in_type = mlir::dyn_cast<RankedTensorType>(in.getType());
+    auto broadcasted_mask_type =
+        RankedTensorType::get(target_type.getShape(), in_type.getElementType());
+    if (target_type.getNumElements() == in_type.getNumElements()) {
+      return rewriter.create<ReshapeOp>(loc, broadcasted_mask_type, in);
+    } else {
+      return rewriter.create<BroadcastOp>(
+          loc, broadcasted_mask_type, in,
+          llvm::SmallVector<int64_t>(target_type.getRank(), 0));
+    }
+  }
+
+  // Basic algorithm here:
+  // %out = case(%idx) {
+  //  b0^ { yield r0 }
+  //  b1^ { yield r1 }
+  //  ...
+  //  bn^ { yield rn }
+  // }
+  // r0, r1, r2, ..., rn represent results of each case region,
+  // %out represents results of branch, where branch id == %idx
+  // 1. Compute all branches and get r0...rn
+  // 2. Generate a mask m = equal(%idx, [0, n]), where only branch id == %idx
+  // should be one
+  // 3. Compute mr0 = m[0]*r0, mr1 = m[1]*r1, ..., mrn = m[n]*rn
+  // 4. Accumulate mrs, %out = sum(mr0, mr1, ..., mrn)
+  void inlineRegionIntoParent(CaseOp &op, PatternRewriter &rewriter) const {
+    auto *blockBeforeCase = rewriter.getInsertionBlock();
+    auto initPosition = rewriter.getInsertionPoint();
+    auto *blockAfterCase = rewriter.splitBlock(blockBeforeCase, initPosition);
+
+    // Collect all blocks
+    llvm::SmallVector<Block *> blocks_to_work;
+    for (auto &r : op->getRegions()) {
+      blocks_to_work.emplace_back(&r.front());
+      rewriter.inlineRegionBefore(r, blockAfterCase);
+    }
+
+    Value index = op.getIndex();
+    int64_t num_cases = op->getNumRegions();
+    auto index_type = op.getIndex().getType();
+    auto index_base = tools_.getBaseType(index_type);
+
+    // Clamp index to [0, num_cases)
+    auto zero_const = rewriter.create<ConstantOp>(
+        op->getLoc(),
+        rewriter.getZeroAttr(RankedTensorType::get({}, index_base)));
+    auto num_cases_const = rewriter.create<ConstantOp>(
+        op->getLoc(),
+        DenseIntElementsAttr::get(RankedTensorType::get({}, index_base),
+                                  static_cast<int32_t>(num_cases - 1)));
+    index = rewriter.create<ClampOp>(op->getLoc(), index_type, zero_const,
+                                     index, num_cases_const);
+
+    // Reconnect all results.
+    // build mask
+    auto iota = rewriter.create<IotaOp>(
+        op->getLoc(), RankedTensorType::get({num_cases}, index_base), 0);
+    auto index_reshaped = rewriter.create<ReshapeOp>(
+        op->getLoc(),
+        RankedTensorType::get({1}, getElementTypeOrSelf(index_type)), index);
+    auto index_brocasted = rewriter.create<BroadcastOp>(
+        op->getLoc(),
+        RankedTensorType::get({num_cases}, getElementTypeOrSelf(index_type)),
+        index_reshaped, llvm::ArrayRef<int64_t>{0});
+    auto masks = rewriter.create<EqualOp>(op->getLoc(), iota, index_brocasted);
+
+    llvm::SmallVector<Value> result_masks;
+    auto mask_slice_type =
+        RankedTensorType::get({1}, getElementTypeOrSelf(masks));
+    for (int64_t region_id = 0; region_id < op.getNumRegions(); ++region_id) {
+      auto m = rewriter.create<SliceOp>(op->getLoc(), mask_slice_type, masks,
+                                        llvm::ArrayRef<int64_t>{region_id},
+                                        llvm::ArrayRef<int64_t>{region_id + 1},
+                                        llvm::ArrayRef<int64_t>{1});
+      result_masks.emplace_back(m);
+    }
+
+    std::vector<Value> rets(op->getNumResults());
+
+    // First case
+    auto &first_return = blocks_to_work[0]->back();
+    for (int64_t idx = 0; idx < op->getNumResults(); ++idx) {
+      auto m = reshapeOrBroadcast(
+          rewriter, op->getLoc(), result_masks[0],
+          mlir::dyn_cast<RankedTensorType>(op.getResultTypes()[idx]));
+      rets[idx] =
+          rewriter.create<MulOp>(op->getLoc(), op->getResultTypes()[idx],
+                                 first_return.getOperand(idx), m);
+    }
+
+    // Other cases
+    for (int64_t branch_idx = 1; branch_idx < num_cases; ++branch_idx) {
+      auto &branch_return = blocks_to_work[branch_idx]->back();
+      for (int64_t idx = 0; idx < op->getNumResults(); ++idx) {
+        auto m = reshapeOrBroadcast(
+            rewriter, op->getLoc(), result_masks[branch_idx],
+            mlir::dyn_cast<RankedTensorType>(op.getResultTypes()[idx]));
+        m = rewriter.create<MulOp>(op->getLoc(), op->getResultTypes()[idx],
+                                   branch_return.getOperand(idx), m);
+        rets[idx] = rewriter.create<AddOp>(
+            op->getLoc(), op->getResultTypes()[idx], rets[idx], m);
+      }
+    }
+
+    // Replace results
+    for (int64_t idx = 0; idx < op->getNumResults(); ++idx) {
+      rewriter.replaceAllUsesWith(op->getResults()[idx], rets[idx]);
+    }
+
+    // Erase all returns
+    for (auto *b : blocks_to_work) {
+      rewriter.eraseOp(&b->back());
+    }
+
+    // Merge all blocks
+    for (auto *b : blocks_to_work) {
+      rewriter.mergeBlocks(b, blockBeforeCase);
+    }
+    rewriter.mergeBlocks(blockAfterCase, blockBeforeCase);
+  }
+
+ public:
+  explicit CaseConverter(MLIRContext *context)
+      : OpRewritePattern(context), tools_(context) {}
+
+  LogicalResult matchAndRewrite(CaseOp op,
+                                PatternRewriter &rewriter) const override {
+    auto index = op.getIndex();
+
+    if (tools_.getTypeVisibility(index.getType()) == Visibility::PUBLIC) {
+      return failure();
+    }
+
+    inlineRegionIntoParent(op, rewriter);
+    return success();
+  }
+};
+
+class IfConverter : public OpRewritePattern<IfOp> {
+ private:
+  // Basic algorithm
+  // %out = if(%pred) {
+  //   trueBranch  { yield r0 }
+  //   falseBranch { yield r1 }
+  // }
+  // With oblivious execution:
+  // %out = select(%pred, r0, r1)
+  void inlineRegionIntoParent(IfOp &op, PatternRewriter &rewriter) const {
+    auto *blockBeforeIf = rewriter.getInsertionBlock();
+    auto &trueBlock = op.getTrueBranch().front();
+    auto &falseBlock = op.getFalseBranch().front();
+    auto initPosition = rewriter.getInsertionPoint();
+    auto *blockAfterIf = rewriter.splitBlock(blockBeforeIf, initPosition);
+
+    // Remove the IfOp and returns.
+    auto &trueReturnOp = trueBlock.back();
+    auto &falseReturnOp = falseBlock.back();
+    rewriter.inlineRegionBefore(op.getTrueBranch(), blockAfterIf);
+    rewriter.inlineRegionBefore(op.getFalseBranch(), blockAfterIf);
+    for (const auto &[idx, ret] : llvm::enumerate(op->getResults())) {
+      auto s = rewriter.create<SelectOp>(
+          op->getLoc(), op.getResultTypes()[idx], op.getCondition(),
+          trueReturnOp.getOperands()[idx], falseReturnOp.getOperands()[idx]);
+      rewriter.replaceAllUsesWith(op->getResult(idx), s);
+    }
+    rewriter.eraseOp(&trueReturnOp);
+    rewriter.eraseOp(&falseReturnOp);
+
+    rewriter.mergeBlocks(&trueBlock, blockBeforeIf);
+    rewriter.mergeBlocks(&falseBlock, blockBeforeIf);
+    rewriter.mergeBlocks(blockAfterIf, blockBeforeIf);
+  }
+
+ public:
+  explicit IfConverter(MLIRContext *context) : OpRewritePattern(context) {}
+
+  LogicalResult matchAndRewrite(IfOp op,
+                                PatternRewriter &rewriter) const override {
+    TypeTools tools(op->getContext());
+    auto pred = op.getCondition();
+
+    if (tools.getTypeVisibility(pred.getType()) == Visibility::PUBLIC) {
+      return failure();
+    }
+
+    inlineRegionIntoParent(op, rewriter);
+    return success();
+  }
+};
+
+struct InlineSecretControlFlow
+    : public InlineSecretControlFlowBase<InlineSecretControlFlow> {
+  void runOnOperation() override {
+    RewritePatternSet patterns(&getContext());
+    populateOwningPatterns(&patterns, &getContext());
+    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
+  }
+
+ private:
+  static void populateOwningPatterns(RewritePatternSet *patterns,
+                                     MLIRContext *ctx) {
+    patterns->insert<IfConverter, CaseConverter>(ctx);
+  }
+};
+}  // namespace
+
+std::unique_ptr<OperationPass<func::FuncOp>> createInlineSecretControlFlow() {
+  return std::make_unique<InlineSecretControlFlow>();
+}
+
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/insert_deallocation.cc b/libspu/dialect/pphlo/transforms/insert_deallocation.cc
similarity index 95%
rename from libspu/compiler/passes/insert_deallocation.cc
rename to libspu/dialect/pphlo/transforms/insert_deallocation.cc
index bf70d968..0b1e1f8e 100644
--- a/libspu/compiler/passes/insert_deallocation.cc
+++ b/libspu/dialect/pphlo/transforms/insert_deallocation.cc
@@ -17,8 +17,8 @@
 #include "mlir/Analysis/Liveness.h"
 #include "mlir/Pass/Pass.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 #ifdef ENABLE_LIVENESS_DEBUG
 
@@ -40,10 +40,10 @@ namespace mlir::spu::pphlo {
 namespace {
 
 struct Deallocator {
-private:
+ private:
   std::unique_ptr<Liveness> top_liveness_;
 
-public:
+ public:
   LogicalResult transformOp(Operation *op,
                             const LivenessBlockInfo *block_liveness) {
     for (const auto &operand : op->getOperands()) {
@@ -118,10 +118,10 @@ struct InsertDeallocation : public InsertDeallocationBase<InsertDeallocation> {
     }
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createInsertDeallocationOp() {
   return std::make_unique<InsertDeallocation>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/lower_conversion_cast.cc b/libspu/dialect/pphlo/transforms/lower_conversion_cast.cc
similarity index 92%
rename from libspu/compiler/passes/lower_conversion_cast.cc
rename to libspu/dialect/pphlo/transforms/lower_conversion_cast.cc
index 59967283..a651c6a6 100644
--- a/libspu/compiler/passes/lower_conversion_cast.cc
+++ b/libspu/dialect/pphlo/transforms/lower_conversion_cast.cc
@@ -16,8 +16,8 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 namespace mlir::spu::pphlo {
 
@@ -46,16 +46,16 @@ struct LowerConversionCast
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<CastConverter>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLowerConversionCastPass() {
   return std::make_unique<LowerConversionCast>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/lower_mixed_type_op.cc b/libspu/dialect/pphlo/transforms/lower_mixed_type_op.cc
similarity index 90%
rename from libspu/compiler/passes/lower_mixed_type_op.cc
rename to libspu/dialect/pphlo/transforms/lower_mixed_type_op.cc
index 23d62579..c4dca00e 100644
--- a/libspu/compiler/passes/lower_mixed_type_op.cc
+++ b/libspu/dialect/pphlo/transforms/lower_mixed_type_op.cc
@@ -16,10 +16,10 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/ops.h"
-#include "libspu/dialect/pphlo/types.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/IR/types.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
@@ -32,7 +32,7 @@ namespace {
 // %3 = mul/dot(%0, %2) int, fxp -> fxp // Save one truncation
 template <typename OpT>
 struct FxpIntMulTruncationRemover : public OpRewritePattern<OpT> {
-private:
+ private:
   TypeTools typetools_;
 
   bool isLegitConvert(ConvertOp op) const {
@@ -45,7 +45,7 @@ struct FxpIntMulTruncationRemover : public OpRewritePattern<OpT> {
            typetools_.isIntType(op.getOperand().getType());
   }
 
-public:
+ public:
   explicit FxpIntMulTruncationRemover(MLIRContext *context)
       : OpRewritePattern<OpT>(context), typetools_(context) {}
 
@@ -55,7 +55,7 @@ struct FxpIntMulTruncationRemover : public OpRewritePattern<OpT> {
     auto rhs = op.getRhs();
 
     if (!typetools_.isFloatType(op.getType())) {
-      return failure(); // Must be an op result in fp type
+      return failure();  // Must be an op result in fp type
     }
 
     auto lhs_convert = lhs.template getDefiningOp<ConvertOp>();
@@ -82,7 +82,7 @@ struct LowerMixedTypeOp : public LowerMixedTypeOpBase<LowerMixedTypeOp> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<FxpIntMulTruncationRemover<MulOp>,
@@ -90,10 +90,10 @@ struct LowerMixedTypeOp : public LowerMixedTypeOpBase<LowerMixedTypeOp> {
                      FxpIntMulTruncationRemover<DotGeneralOp>>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createLowerMixedTypeOpPass() {
   return std::make_unique<LowerMixedTypeOp>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/map_stablehlo_to_pphlo_op.h b/libspu/dialect/pphlo/transforms/map_stablehlo_to_pphlo_op.h
similarity index 78%
rename from libspu/compiler/passes/map_stablehlo_to_pphlo_op.h
rename to libspu/dialect/pphlo/transforms/map_stablehlo_to_pphlo_op.h
index e347b097..e8d88fbd 100644
--- a/libspu/compiler/passes/map_stablehlo_to_pphlo_op.h
+++ b/libspu/dialect/pphlo/transforms/map_stablehlo_to_pphlo_op.h
@@ -18,7 +18,7 @@
 
 #include "stablehlo/dialect/StablehloOps.h"
 
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
 
 namespace mlir::spu::pphlo {
 
@@ -30,16 +30,16 @@ struct HloToPPHloOpImpl {
 template <typename HloOpTy>
 using HloToPPHloOp = typename HloToPPHloOpImpl<HloOpTy>::Type;
 
-#define MAP_HLO_TO_PPHLO(OpName)                                               \
-  template <>                                                                  \
-  struct HloToPPHloOpImpl<stablehlo::OpName> {                                 \
-    using Type = pphlo::OpName;                                                \
+#define MAP_HLO_TO_PPHLO(OpName)               \
+  template <>                                  \
+  struct HloToPPHloOpImpl<stablehlo::OpName> { \
+    using Type = pphlo::OpName;                \
   };
 
-#define MAP_HLO_TO_PPHLO_DIFF_NAME(HloName, PPHloName)                         \
-  template <>                                                                  \
-  struct HloToPPHloOpImpl<stablehlo::HloName> {                                \
-    using Type = pphlo::PPHloName;                                             \
+#define MAP_HLO_TO_PPHLO_DIFF_NAME(HloName, PPHloName) \
+  template <>                                          \
+  struct HloToPPHloOpImpl<stablehlo::HloName> {        \
+    using Type = pphlo::PPHloName;                     \
   };
 
 MAP_HLO_TO_PPHLO(AbsOp)
@@ -104,4 +104,4 @@ MAP_HLO_TO_PPHLO_DIFF_NAME(GatherOp, CustomCallOp)
 
 #undef MAP_HLO_TO_PPHLO
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/optimize_denominator_with_broadcast.cc b/libspu/dialect/pphlo/transforms/optimize_denominator_with_broadcast.cc
similarity index 93%
rename from libspu/compiler/passes/optimize_denominator_with_broadcast.cc
rename to libspu/dialect/pphlo/transforms/optimize_denominator_with_broadcast.cc
index 6d5a7f63..4a0cc1d7 100644
--- a/libspu/compiler/passes/optimize_denominator_with_broadcast.cc
+++ b/libspu/dialect/pphlo/transforms/optimize_denominator_with_broadcast.cc
@@ -16,8 +16,8 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 namespace mlir::spu::pphlo {
 
@@ -58,17 +58,17 @@ struct OptimizeDenominatorWithBcast
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<DivRewriter>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
 createOptimizeDenominatorWithBroadcast() {
   return std::make_unique<OptimizeDenominatorWithBcast>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/optimize_maxpool.cc b/libspu/dialect/pphlo/transforms/optimize_maxpool.cc
similarity index 95%
rename from libspu/compiler/passes/optimize_maxpool.cc
rename to libspu/dialect/pphlo/transforms/optimize_maxpool.cc
index a80d0890..eeb3bc0f 100644
--- a/libspu/compiler/passes/optimize_maxpool.cc
+++ b/libspu/dialect/pphlo/transforms/optimize_maxpool.cc
@@ -21,17 +21,17 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/ops.h"
-#include "libspu/dialect/pphlo/types.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/IR/types.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
 namespace {
 
 struct SelectAndScatterConverter : public OpRewritePattern<SelectAndScatterOp> {
-private:
+ private:
   TypeTools typetools_;
 
   Value rewriteReduceWindow(ReduceWindowOp op,
@@ -74,13 +74,12 @@ struct SelectAndScatterConverter : public OpRewritePattern<SelectAndScatterOp> {
     return false;
   }
 
-public:
+ public:
   explicit SelectAndScatterConverter(MLIRContext *context)
       : OpRewritePattern(context), typetools_(context) {}
 
   LogicalResult matchAndRewrite(SelectAndScatterOp op,
                                 PatternRewriter &rewriter) const override {
-
     // Select and scatter region should be a single element region
     if (!isSingleRegion(op.getScatter()) || !isSingleRegion(op.getSelect())) {
       return failure();
@@ -164,17 +163,17 @@ struct OptimizeMaxPooling : public OptimizeMaxPoolingBase<OptimizeMaxPooling> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<SelectAndScatterConverter>(ctx);
   }
 };
 
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeMaxPoolingPass() {
   return std::make_unique<OptimizeMaxPooling>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/optimize_select.cc b/libspu/dialect/pphlo/transforms/optimize_select.cc
similarity index 85%
rename from libspu/compiler/passes/optimize_select.cc
rename to libspu/dialect/pphlo/transforms/optimize_select.cc
index 9e195f18..ae6c81cc 100644
--- a/libspu/compiler/passes/optimize_select.cc
+++ b/libspu/dialect/pphlo/transforms/optimize_select.cc
@@ -16,9 +16,10 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/device/intrinsic_table.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
@@ -54,13 +55,12 @@ static bool isSplatZero(DenseElementsAttr val) {
 // Rational:
 // This is a pattern created by xla alg simplifier
 struct SelectConversion : public OpRewritePattern<SelectOp> {
-public:
+ public:
   explicit SelectConversion(MLIRContext *context)
       : OpRewritePattern<SelectOp>(context) {}
 
   LogicalResult matchAndRewrite(SelectOp op,
                                 PatternRewriter &rewrite) const override {
-
     // Pattern 2 first:
     auto on_false = op.getOnFalse();
     if (auto on_false_const = on_false.getDefiningOp<ConstantOp>()) {
@@ -75,9 +75,11 @@ struct SelectConversion : public OpRewritePattern<SelectOp> {
     // Pattern 1:
     auto pred = op.getPred();
     // Only do this for certain select...
-    if (pred.getDefiningOp<PreferAOp>() != nullptr) {
-      // This select pred has already been optimized, bailout here
-      return failure();
+    if (auto pref_a = pred.getDefiningOp<CustomCallOp>()) {
+      if (pref_a.getCallTargetName() == PREFER_A) {
+        // This select pred has already been optimized, bailout here
+        return failure();
+      }
     }
 
     // If this pred has only one use...do not rewrite, with mula1b is faster
@@ -107,10 +109,11 @@ struct SelectConversion : public OpRewritePattern<SelectOp> {
       builder.setInsertionPoint(pred.getDefiningOp()->getNextNode());
       new_loc = pred.getDefiningOp()->getLoc();
     }
-    auto pref_a = builder.create<PreferAOp>(new_loc, pred);
+    auto pref_a =
+        builder.create<CustomCallOp>(new_loc, pred.getType(), pred, PREFER_A);
 
     // Only replace select usage
-    pred.replaceUsesWithIf(pref_a, [](OpOperand &use) {
+    pred.replaceUsesWithIf(pref_a->getResult(0), [](OpOperand &use) {
       return mlir::isa<SelectOp>(use.getOwner());
     });
 
@@ -125,16 +128,16 @@ struct OptimizeSelect : public OptimizeSelectBase<OptimizeSelect> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<SelectConversion>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeSelectPass() {
   return std::make_unique<OptimizeSelect>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/optimize_sqrt_plus_eps.cc b/libspu/dialect/pphlo/transforms/optimize_sqrt_plus_eps.cc
similarity index 95%
rename from libspu/compiler/passes/optimize_sqrt_plus_eps.cc
rename to libspu/dialect/pphlo/transforms/optimize_sqrt_plus_eps.cc
index 1bd22fc7..b5c98272 100644
--- a/libspu/compiler/passes/optimize_sqrt_plus_eps.cc
+++ b/libspu/dialect/pphlo/transforms/optimize_sqrt_plus_eps.cc
@@ -18,9 +18,9 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
 #include "libspu/core/prelude.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 namespace mlir::spu::pphlo {
 
@@ -87,16 +87,16 @@ struct OptimizeSqrtPlusEps
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<SqrtRewriter>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeSqrtPlusEps() {
   return std::make_unique<OptimizeSqrtPlusEps>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/partial_sort_to_topk.cc b/libspu/dialect/pphlo/transforms/partial_sort_to_topk.cc
similarity index 97%
rename from libspu/compiler/passes/partial_sort_to_topk.cc
rename to libspu/dialect/pphlo/transforms/partial_sort_to_topk.cc
index 29464ff5..139f2650 100644
--- a/libspu/compiler/passes/partial_sort_to_topk.cc
+++ b/libspu/dialect/pphlo/transforms/partial_sort_to_topk.cc
@@ -18,16 +18,16 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
 namespace {
 
 struct SortConversion : public OpRewritePattern<SimpleSortOp> {
-private:
+ private:
   bool sliceAttributesOk(llvm::ArrayRef<int64_t> in,
                          llvm::ArrayRef<int64_t> expected,
                          size_t allow_mismatch_at) const {
@@ -43,7 +43,7 @@ struct SortConversion : public OpRewritePattern<SimpleSortOp> {
     return std::all_of(in.begin(), in.end(), [](int64_t i) { return i == 1; });
   }
 
-public:
+ public:
   explicit SortConversion(MLIRContext *context)
       : OpRewritePattern<SimpleSortOp>(context) {}
 
@@ -201,17 +201,17 @@ struct PartialSortToTopK : public PartialSortToTopKBase<PartialSortToTopK> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<SortConversion>(ctx);
   }
 };
 
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createPartialSortToTopK() {
   return std::make_unique<PartialSortToTopK>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/pass_details.h b/libspu/dialect/pphlo/transforms/pass_details.h
similarity index 79%
rename from libspu/compiler/passes/pass_details.h
rename to libspu/dialect/pphlo/transforms/pass_details.h
index 152c4b35..443c4fdf 100644
--- a/libspu/compiler/passes/pass_details.h
+++ b/libspu/dialect/pphlo/transforms/pass_details.h
@@ -16,12 +16,12 @@
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/dialect.h"
+#include "libspu/dialect/pphlo/IR/dialect.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
 #define GEN_PASS_CLASSES
-#include "libspu/compiler/passes/passes.h.inc"
+#include "libspu/dialect/pphlo/transforms/passes.h.inc"
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/passes.h b/libspu/dialect/pphlo/transforms/passes.h
similarity index 82%
rename from libspu/compiler/passes/passes.h
rename to libspu/dialect/pphlo/transforms/passes.h
index 3361ef13..b70ce1ab 100644
--- a/libspu/compiler/passes/passes.h
+++ b/libspu/dialect/pphlo/transforms/passes.h
@@ -35,14 +35,11 @@ namespace spu::pphlo {
 /// Lowers from HLO dialect to pphlo dialect
 std::unique_ptr<OperationPass<ModuleOp>> createLegalizeToPPHloPass();
 
-// Decompose comparison into lower ops when possible
-std::unique_ptr<OperationPass<func::FuncOp>> createDecomposeComparisonPass();
-
 // Lower UnrealizedConversionCastOp
 std::unique_ptr<OperationPass<func::FuncOp>> createLowerConversionCastPass();
 
-// Lower min/max
-std::unique_ptr<OperationPass<func::FuncOp>> createDecomposeMinMaxPass();
+// Lower high-level ops into basic ops
+std::unique_ptr<OperationPass<func::FuncOp>> createDecomposeOps();
 
 // Reduce truncation
 std::unique_ptr<OperationPass<func::FuncOp>> createReduceTruncationPass();
@@ -68,14 +65,23 @@ createOptimizeDenominatorWithBroadcast();
 
 std::unique_ptr<OperationPass<func::FuncOp>> createInsertDeallocationOp();
 
+// Lower sort with simple comprators to simple sort
 std::unique_ptr<OperationPass<func::FuncOp>> createSortLowering();
 
 std::unique_ptr<OperationPass<func::FuncOp>> createExpandSecretGatherPass();
 
+// Push convert later
 std::unique_ptr<OperationPass<func::FuncOp>> createConvertPushDownPass();
 
+// Convert partial sort to topk
 std::unique_ptr<OperationPass<func::FuncOp>> createPartialSortToTopK();
 
-} // namespace spu::pphlo
+// Inline secret if/case
+std::unique_ptr<OperationPass<func::FuncOp>> createInlineSecretControlFlow();
+
+// Convert signbit pattern to SignOp
+std::unique_ptr<OperationPass<func::FuncOp>> createRewriteSignbitPatterns();
+
+}  // namespace spu::pphlo
 
-} // namespace mlir
+}  // namespace mlir
diff --git a/libspu/compiler/passes/passes.td b/libspu/dialect/pphlo/transforms/passes.td
similarity index 87%
rename from libspu/compiler/passes/passes.td
rename to libspu/dialect/pphlo/transforms/passes.td
index f5e61d34..4fc28fdc 100644
--- a/libspu/compiler/passes/passes.td
+++ b/libspu/dialect/pphlo/transforms/passes.td
@@ -27,21 +27,15 @@ def HloLegalizeToPPHloPass : Pass<"hlo-legalize-to-pphlo", "ModuleOp"> {
   ];
 }
 
-def DecomposeComparison : Pass<"decompose-comparison", "func::FuncOp"> {
-  let summary = "Decompose comparisons into basic ops.";
-  let constructor = "createDecomposeComparisonPass()";
-  let dependentDialects = ["pphlo::PPHloDialect"];
-}
-
 def LowerConversionCast : Pass<"lower-conversion-cast", "func::FuncOp"> {
   let summary = "Lower UnrealizedConversionCastOp created during dialect conversion.";
   let constructor = "createLowerConversionCastPass()";
   let dependentDialects = ["pphlo::PPHloDialect"];
 }
 
-def DecomposeMinMax : Pass<"decompose-minmax", "func::FuncOp"> {
-  let summary = "Decompose min/max into select with greater/less ops.";
-  let constructor = "createDecomposeMinMaxPass()";
+def DecomposeOps : Pass<"decompose-ops", "func::FuncOp"> {
+  let summary = "Decompose high-level ops into basic ops.";
+  let constructor = "createDecomposeOps()";
   let dependentDialects = ["pphlo::PPHloDialect"];
 }
 
@@ -81,6 +75,12 @@ def RewriteDivSqrtPatterns: Pass<"rewrite-div-sqrt-pattern", "func::FuncOp"> {
   let dependentDialects = ["pphlo::PPHloDialect"];
 }
 
+def RewriteSignbitPatterns: Pass<"rewrite-signbit-pattern", "func::FuncOp"> {
+  let summary = "Rewrite x >> (n-1) bits to SignOp";
+  let constructor = "createRewriteSignbitPatterns()";
+  let dependentDialects = ["pphlo::PPHloDialect"];
+}
+
 def OptimizeDenominatorWithBcast: Pass<"optimize-denominator-with-broadcast", "func::FuncOp"> {
   let summary = "Optimize x/broadcast(y) into x*broadcast(1/y)";
   let constructor = "createOptimizeDenominatorWithBroadcast()";
@@ -116,3 +116,9 @@ def PartialSortToTopK: Pass<"partial-sort-to-topk", "func::FuncOp"> {
   let constructor = "createPartialSortToTopK()";
   let dependentDialects = ["pphlo::PPHloDialect"];
 }
+
+def InlineSecretControlFlow: Pass<"inline-secret-control-flow", "func::FuncOp"> {
+  let summary = "Flatten secret control flow";
+  let constructor = "createInlineSecretControlFlow()";
+  let dependentDialects = ["pphlo::PPHloDialect"];
+}
diff --git a/libspu/compiler/passes/reduce_truncation.cc b/libspu/dialect/pphlo/transforms/reduce_truncation.cc
similarity index 92%
rename from libspu/compiler/passes/reduce_truncation.cc
rename to libspu/dialect/pphlo/transforms/reduce_truncation.cc
index 7e74cae8..004d5f75 100644
--- a/libspu/compiler/passes/reduce_truncation.cc
+++ b/libspu/dialect/pphlo/transforms/reduce_truncation.cc
@@ -16,8 +16,8 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 namespace mlir::spu::pphlo {
 
@@ -26,7 +26,7 @@ namespace {
 // Convert the following pattern %s1 = mul(%p0, %s0); %s2 = mul(%p1, %s1) into
 // %p2 = mul(%p0, %p1); %s1 = mul(%p2, %s1)
 struct MulConverter : public OpRewritePattern<MulOp> {
-private:
+ private:
   TypeTools tools_;
   bool isMulSP(MulOp op) const {
     auto lhs_vis = tools_.getTypeVisibility(op.getLhs().getType());
@@ -35,8 +35,8 @@ struct MulConverter : public OpRewritePattern<MulOp> {
     return lhs_vis != rhs_vis;
   }
 
-  std::pair<mlir::Value, mlir::Value>
-  getSecretAndPublicOperand(MulOp op) const {
+  std::pair<mlir::Value, mlir::Value> getSecretAndPublicOperand(
+      MulOp op) const {
     auto lhs_vis = tools_.getTypeVisibility(op.getLhs().getType());
 
     auto secret_operand =
@@ -47,7 +47,7 @@ struct MulConverter : public OpRewritePattern<MulOp> {
     return {secret_operand, public_operand};
   }
 
-public:
+ public:
   explicit MulConverter(MLIRContext *context)
       : OpRewritePattern(context), tools_(context) {
     setHasBoundedRewriteRecursion(false);
@@ -55,7 +55,6 @@ struct MulConverter : public OpRewritePattern<MulOp> {
 
   LogicalResult matchAndRewrite(MulOp op,
                                 PatternRewriter &rewriter) const override {
-
     // If lhs and rhs has the same visibility, bailout
     if (!isMulSP(op)) {
       return failure();
@@ -89,16 +88,16 @@ struct ReduceTruncation : public ReduceTruncBase<ReduceTruncation> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<MulConverter>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createReduceTruncationPass() {
   return std::make_unique<ReduceTruncation>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/register_passes.h b/libspu/dialect/pphlo/transforms/register_passes.h
similarity index 84%
rename from libspu/compiler/passes/register_passes.h
rename to libspu/dialect/pphlo/transforms/register_passes.h
index 138a5b87..e8905f89 100644
--- a/libspu/compiler/passes/register_passes.h
+++ b/libspu/dialect/pphlo/transforms/register_passes.h
@@ -16,13 +16,13 @@
 
 #include "mlir/Pass/Pass.h"
 
-#include "libspu/compiler/passes/passes.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
 #define GEN_PASS_REGISTRATION
-#include "libspu/compiler/passes/passes.h.inc"
+#include "libspu/dialect/pphlo/transforms/passes.h.inc"
 
 inline void registerAllPPHloPasses() { registerPasses(); }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/rewrite_div_sqrt_patterns.cc b/libspu/dialect/pphlo/transforms/rewrite_div_sqrt_patterns.cc
similarity index 95%
rename from libspu/compiler/passes/rewrite_div_sqrt_patterns.cc
rename to libspu/dialect/pphlo/transforms/rewrite_div_sqrt_patterns.cc
index b8c05534..51864380 100644
--- a/libspu/compiler/passes/rewrite_div_sqrt_patterns.cc
+++ b/libspu/dialect/pphlo/transforms/rewrite_div_sqrt_patterns.cc
@@ -16,15 +16,15 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
 
 namespace mlir::spu::pphlo {
 
 namespace {
 
 struct DivRewriter : public OpRewritePattern<DivOp> {
-private:
+ private:
   Operation *rewriteSqrtIfPossible(PatternRewriter &rewriter,
                                    Operation *op) const {
     if (op == nullptr || op->getNumOperands() != 1) {
@@ -49,7 +49,7 @@ struct DivRewriter : public OpRewritePattern<DivOp> {
     return nullptr;
   }
 
-public:
+ public:
   explicit DivRewriter(MLIRContext *context)
       : OpRewritePattern<DivOp>(context) {}
 
@@ -103,16 +103,16 @@ struct RewriteDivSqrtPatterns
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<DivRewriter>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createRewriteDivSqrtPatterns() {
   return std::make_unique<RewriteDivSqrtPatterns>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/pphlo/transforms/rewrite_signbit.cc b/libspu/dialect/pphlo/transforms/rewrite_signbit.cc
new file mode 100644
index 00000000..486ba682
--- /dev/null
+++ b/libspu/dialect/pphlo/transforms/rewrite_signbit.cc
@@ -0,0 +1,124 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+
+namespace mlir::spu::pphlo {
+
+namespace {
+
+// Signbit can generate the following pattern
+// %0 = pphlo.constant dense<0> : tensor<i32>
+// %1 = pphlo.constant dense<31> : tensor<i32>
+// %2 = pphlo.bitcast_convert %arg0 : (tensor<f32>) -> tensor<i32>
+// %3 = pphlo.shift_right_arithmetic %2, %1 : tensor<i32>
+// This is not right after legalize to fxp. Rewrite to following
+// %0 = pphlo.constant dense<-1.0> : tensor<f32>
+// %1 = pphlo.constant dense<1> : tensor<i32>
+// %2 = pphlo.sign %arg0 : (tensor<f32>) -> tensor<f32>
+// %3 = pphlo.convert %2 : (tensor<f32>) -> tensor<i32>
+// %4 = pphlo.shift_right_arithmetic %3, %1 : (tensor<i32>, tensor<i32>) ->
+// tensor<i1>
+struct ARShiftRightRewrittern
+    : public OpRewritePattern<ShiftRightArithmeticOp> {
+ private:
+  TypeTools tools_;
+
+  std::optional<int64_t> extractSplatConstantValue(
+      DenseIntElementsAttr attr) const {
+    if (!attr.isSplat()) {
+      return std::nullopt;
+    }
+
+    return attr.getSplatValue<APInt>().getSExtValue();
+  }
+
+  bool isLegitARShift(ShiftRightArithmeticOp op) const {
+    auto lhs_type = mlir::dyn_cast<IntegerType>(tools_.getBaseType(
+        mlir::dyn_cast<RankedTensorType>(op.getLhs().getType())));
+    auto shifted_bits = op.getRhs().getDefiningOp<ConstantOp>();
+
+    if (!shifted_bits) {
+      return false;
+    }
+
+    auto shifted_bits_v = extractSplatConstantValue(
+        mlir::dyn_cast<DenseIntElementsAttr>(shifted_bits.getValue()));
+
+    if (!shifted_bits_v.has_value()) {
+      return false;
+    }
+
+    return lhs_type.getWidth() - 1 == *shifted_bits_v;
+  }
+
+  Value stripConvertOps(Value v) const {
+    if (auto parent = v.getDefiningOp<BitcastConvertOp>()) {
+      return stripConvertOps(parent.getOperand());
+    }
+    return v;
+  }
+
+ public:
+  explicit ARShiftRightRewrittern(MLIRContext *context)
+      : OpRewritePattern(context), tools_(context) {}
+
+  LogicalResult matchAndRewrite(ShiftRightArithmeticOp op,
+                                PatternRewriter &rewriter) const override {
+    if (!isLegitARShift(op)) {
+      return failure();
+    }
+
+    auto value_before_shift = stripConvertOps(op.getLhs());
+
+    // rewrite
+    // sign
+    auto sign = rewriter.create<SignOp>(op->getLoc(), value_before_shift, true);
+    // convert
+    auto convert = rewriter.create<ConvertOp>(op->getLoc(), op.getType(), sign);
+    // sign is -1 for negative and 1 for positive
+    // arshift 1 bit, to get -1 and 0
+    auto one = rewriter.create<ConstantOp>(
+        op->getLoc(), rewriter.getOneAttr(op.getRhs().getType()));
+    rewriter.replaceOpWithNewOp<ShiftRightArithmeticOp>(op, op.getType(),
+                                                        convert, one);
+    return success();
+  }
+};
+
+struct SignbitPattern : public RewriteSignbitPatternsBase<SignbitPattern> {
+  void runOnOperation() override {
+    RewritePatternSet patterns(&getContext());
+    populateOwningPatterns(&patterns, &getContext());
+    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
+  }
+
+ private:
+  static void populateOwningPatterns(RewritePatternSet *patterns,
+                                     MLIRContext *ctx) {
+    patterns->insert<ARShiftRightRewrittern>(ctx);
+  }
+};
+}  // namespace
+
+std::unique_ptr<OperationPass<func::FuncOp>> createRewriteSignbitPatterns() {
+  return std::make_unique<SignbitPattern>();
+}
+
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/sort_lowering.cc b/libspu/dialect/pphlo/transforms/sort_lowering.cc
similarity index 93%
rename from libspu/compiler/passes/sort_lowering.cc
rename to libspu/dialect/pphlo/transforms/sort_lowering.cc
index 2491602b..3640e8ad 100644
--- a/libspu/compiler/passes/sort_lowering.cc
+++ b/libspu/dialect/pphlo/transforms/sort_lowering.cc
@@ -16,16 +16,16 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#include "libspu/compiler/passes/pass_details.h"
-#include "libspu/compiler/passes/passes.h"
-#include "libspu/dialect/pphlo/ops.h"
+#include "libspu/dialect/pphlo/IR/ops.h"
+#include "libspu/dialect/pphlo/transforms/pass_details.h"
+#include "libspu/dialect/pphlo/transforms/passes.h"
 
 namespace mlir::spu::pphlo {
 
 namespace {
 
 struct SortConversion : public OpRewritePattern<SortOp> {
-public:
+ public:
   explicit SortConversion(MLIRContext *context)
       : OpRewritePattern<SortOp>(context) {}
 
@@ -80,16 +80,16 @@ struct SortLowering : public SortLoweringBase<SortLowering> {
     (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
-private:
+ private:
   static void populateOwningPatterns(RewritePatternSet *patterns,
                                      MLIRContext *ctx) {
     patterns->insert<SortConversion>(ctx);
   }
 };
-} // namespace
+}  // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>> createSortLowering() {
   return std::make_unique<SortLowering>();
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/value_visibility_map.cc b/libspu/dialect/pphlo/transforms/value_visibility_map.cc
similarity index 92%
rename from libspu/compiler/passes/value_visibility_map.cc
rename to libspu/dialect/pphlo/transforms/value_visibility_map.cc
index fd887148..c3d5d523 100644
--- a/libspu/compiler/passes/value_visibility_map.cc
+++ b/libspu/dialect/pphlo/transforms/value_visibility_map.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/compiler/passes/value_visibility_map.h"
+#include "libspu/dialect/pphlo/transforms/value_visibility_map.h"
 
 #include "libspu/core/prelude.h"
 
@@ -38,4 +38,4 @@ void ValueVisibilityMap::setOperationInputVisibility(
   op_in_vis_[op] = llvm::SmallVector<Visibility>(vis);
 }
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/value_visibility_map.h b/libspu/dialect/pphlo/transforms/value_visibility_map.h
similarity index 89%
rename from libspu/compiler/passes/value_visibility_map.h
rename to libspu/dialect/pphlo/transforms/value_visibility_map.h
index dbf11668..3c62c0e6 100644
--- a/libspu/compiler/passes/value_visibility_map.h
+++ b/libspu/dialect/pphlo/transforms/value_visibility_map.h
@@ -17,21 +17,21 @@
 #include "llvm/ADT/DenseMap.h"
 #include "mlir/IR/Value.h"
 
-#include "libspu/dialect/pphlo/types.h"
+#include "libspu/dialect/pphlo/IR/types.h"
 
 namespace mlir::spu::pphlo {
 
 class ValueVisibilityMap {
-private:
+ private:
   llvm::DenseMap<Value, Visibility> value_vis_;
   llvm::DenseMap<Operation *, llvm::SmallVector<Visibility>> op_in_vis_;
   llvm::SmallVector<Visibility> input_vis_;
   llvm::SmallVector<Visibility> output_vis_;
 
-public:
+ public:
   Visibility getValueVisibility(const Value &v) const;
-  std::optional<llvm::ArrayRef<Visibility>>
-  getOperationInputVisibility(Operation *op) const {
+  std::optional<llvm::ArrayRef<Visibility>> getOperationInputVisibility(
+      Operation *op) const {
     auto iter = op_in_vis_.find(op);
     if (iter == op_in_vis_.end()) {
       return std::nullopt;
@@ -52,4 +52,4 @@ class ValueVisibilityMap {
                                    llvm::ArrayRef<Visibility> vis);
 };
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/compiler/passes/visibility_inference.cc b/libspu/dialect/pphlo/transforms/visibility_inference.cc
similarity index 99%
rename from libspu/compiler/passes/visibility_inference.cc
rename to libspu/dialect/pphlo/transforms/visibility_inference.cc
index fb681348..5f212997 100644
--- a/libspu/compiler/passes/visibility_inference.cc
+++ b/libspu/dialect/pphlo/transforms/visibility_inference.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "libspu/compiler/passes/visibility_inference.h"
+#include "libspu/dialect/pphlo/transforms/visibility_inference.h"
 
 #include "llvm/Support/raw_ostream.h"
 #include "mlir/IR/Block.h"
@@ -353,4 +353,4 @@ void VisibilityInference::inferOperation(Operation &op) {
     llvm_unreachable(debug_s.str().c_str());
   }
 }
-} // namespace mlir::spu::pphlo
\ No newline at end of file
+}  // namespace mlir::spu::pphlo
\ No newline at end of file
diff --git a/libspu/compiler/passes/visibility_inference.h b/libspu/dialect/pphlo/transforms/visibility_inference.h
similarity index 96%
rename from libspu/compiler/passes/visibility_inference.h
rename to libspu/dialect/pphlo/transforms/visibility_inference.h
index 4aaa3dc8..b243b178 100644
--- a/libspu/compiler/passes/visibility_inference.h
+++ b/libspu/dialect/pphlo/transforms/visibility_inference.h
@@ -16,20 +16,20 @@
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 
-#include "libspu/compiler/passes/value_visibility_map.h"
 #include "libspu/core/prelude.h"
+#include "libspu/dialect/pphlo/transforms/value_visibility_map.h"
 
 namespace mlir::spu::pphlo {
 
 class VisibilityInference {
-public:
+ public:
   explicit VisibilityInference(MLIRContext *context,
                                ValueVisibilityMap &value_vis)
       : value_vis_(value_vis), tools_(context) {}
 
   void infer(func::FuncOp &func);
 
-private:
+ private:
   void inferRegion(Region &region);
   void inferBlock(Block &blk);
   void inferOperation(Operation &op);
@@ -106,4 +106,4 @@ class VisibilityInference {
   TypeTools tools_;
 };
 
-} // namespace mlir::spu::pphlo
+}  // namespace mlir::spu::pphlo
diff --git a/libspu/dialect/utils/BUILD.bazel b/libspu/dialect/utils/BUILD.bazel
new file mode 100644
index 00000000..acfb2b05
--- /dev/null
+++ b/libspu/dialect/utils/BUILD.bazel
@@ -0,0 +1,29 @@
+# Copyright 2024 Ant Group Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("//bazel:spu.bzl", "spu_cc_library")
+
+spu_cc_library(
+    name = "utils",
+    srcs = glob([
+        "*.cc",
+    ]),
+    hdrs = glob([
+        "*.h",
+    ]),
+    deps = [
+        "@llvm-project//mlir:FuncDialect",
+        "@llvm-project//mlir:IR",
+    ],
+)
diff --git a/libspu/dialect/utils/assembly_format.cc b/libspu/dialect/utils/assembly_format.cc
new file mode 100644
index 00000000..939096fe
--- /dev/null
+++ b/libspu/dialect/utils/assembly_format.cc
@@ -0,0 +1,100 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "libspu/dialect/utils/assembly_format.h"
+
+namespace mlir::spu {
+
+namespace {
+
+ParseResult assignFromFunctionType(OpAsmParser& parser, llvm::SMLoc loc,
+                                   ArrayRef<Type*> operands, Type& result,
+                                   FunctionType& fn_type) {
+  assert(fn_type);
+  if (fn_type.getInputs().size() != operands.size()) {
+    return parser.emitError(loc)
+           << operands.size() << " operands present, but expected "
+           << fn_type.getInputs().size();
+  }
+
+  // Set operand types to function input types
+  for (auto [operand, input] : llvm::zip(operands, fn_type.getInputs())) {
+    *operand = input;
+  }
+
+  // Set result type
+  if (fn_type.getResults().size() != 1) {
+    return parser.emitError(loc, "expected single output");
+  }
+  result = fn_type.getResults()[0];
+
+  return success();
+}
+
+}  // namespace
+
+namespace detail {
+
+void printSameOperandsAndResultTypeImpl(OpAsmPrinter& p, Operation* op,
+                                        TypeRange operands, Type result) {
+  // Handle zero operand types `() -> a` prints `a`
+  if (operands.empty()) {
+    p.printType(result);
+    return;
+  }
+  // Handle all same type `(a,a,...) -> a` prints `a`
+  bool allSameType =
+      llvm::all_of(operands, [&result](auto t) { return t == result; });
+  if (allSameType) {
+    p.printType(result);
+    return;
+  }
+  // Fall back to generic
+  p.printFunctionalType(op);
+}
+
+ParseResult parseSameOperandsAndResultTypeImpl(OpAsmParser& parser,
+                                               ArrayRef<Type*> operands,
+                                               Type& result) {
+  llvm::SMLoc loc = parser.getCurrentLocation();
+
+  Type type;
+  if (parser.parseType(type)) {
+    return failure();
+  }
+
+  // Handle if function type, all operand types did not match result type.
+  if (auto fnType = mlir::dyn_cast<FunctionType>(type)) {
+    return assignFromFunctionType(parser, loc, operands, result, fnType);
+  }
+
+  // Handle bare types. ` : type` indicating all input/output types match.
+  for (Type* t : operands) {
+    *t = type;
+  }
+  result = type;
+  return success();
+}
+
+}  // namespace detail
+
+void printCustomCallTargetImpl(AsmPrinter& p, StringAttr target) {
+  p.printSymbolName(target.getValue());
+}
+
+ParseResult parseCustomCallTargetImpl(AsmParser& parser, StringAttr& target) {
+  return parser.parseSymbolName(target);
+}
+
+}  // namespace mlir::spu
diff --git a/libspu/dialect/utils/assembly_format.h b/libspu/dialect/utils/assembly_format.h
new file mode 100644
index 00000000..c23374dd
--- /dev/null
+++ b/libspu/dialect/utils/assembly_format.h
@@ -0,0 +1,63 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "llvm/ADT/ArrayRef.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/TypeRange.h"
+#include "mlir/IR/Types.h"
+#include "mlir/Support/LogicalResult.h"
+
+namespace mlir::spu {
+
+namespace detail {
+
+void printSameOperandsAndResultTypeImpl(OpAsmPrinter& p, Operation* op,
+                                        TypeRange operands, Type result);
+ParseResult parseSameOperandsAndResultTypeImpl(OpAsmParser& parser,
+                                               ArrayRef<Type*> operands,
+                                               Type& result);
+
+}  // namespace detail
+
+template <class... OpTypes>
+void printSameOperandsAndResultType(OpAsmPrinter& p, Operation* op,
+                                    OpTypes... types) {
+  static_assert(sizeof...(types) > 0);
+  SmallVector<Type> typesVec{types...};
+  ArrayRef<Type> typesRef = ArrayRef(typesVec);
+  return detail::printSameOperandsAndResultTypeImpl(
+      p, op, typesRef.drop_back(1), typesRef.back());
+}
+
+template <class... OpTypes>
+ParseResult parseSameOperandsAndResultType(OpAsmParser& parser,
+                                           OpTypes&... types) {
+  static_assert(sizeof...(types) > 0);
+  SmallVector<Type*> typesVec{&types...};
+  ArrayRef<Type*> typesRef = ArrayRef(typesVec);
+  return detail::parseSameOperandsAndResultTypeImpl(
+      parser, typesRef.drop_back(1), *typesRef.back());
+}
+
+// CustomCall target attr
+void printCustomCallTargetImpl(AsmPrinter& p, StringAttr target);
+ParseResult parseCustomCallTargetImpl(AsmParser& parser, StringAttr& target);
+
+}  // namespace mlir::spu
diff --git a/libspu/dialect/utils/utils.cc b/libspu/dialect/utils/utils.cc
new file mode 100644
index 00000000..2fa8db42
--- /dev/null
+++ b/libspu/dialect/utils/utils.cc
@@ -0,0 +1,35 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "libspu/dialect/utils/utils.h"
+
+#include "llvm/ADT/Twine.h"
+#include "spdlog/spdlog.h"
+
+namespace mlir::spu {
+
+mlir::func::FuncOp get_entrypoint(ModuleOp op) {
+  // Get the main function
+  auto entry_func = op.lookupSymbol<mlir::func::FuncOp>("main");
+  if (!entry_func) {
+    auto funcs = op.getOps<func::FuncOp>();
+    if (std::distance(funcs.begin(), funcs.end()) == 1) {
+      entry_func = *funcs.begin();
+    }
+  }
+
+  return entry_func;
+}
+
+}  // namespace mlir::spu
diff --git a/libspu/dialect/utils/utils.h b/libspu/dialect/utils/utils.h
new file mode 100644
index 00000000..63e71729
--- /dev/null
+++ b/libspu/dialect/utils/utils.h
@@ -0,0 +1,34 @@
+// Copyright 2024 Ant Group Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "llvm/Support/raw_ostream.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinOps.h"
+
+namespace mlir::spu {
+
+template <typename T>
+std::string mlirObjectToString(T&& mlir_obj) {
+  std::string buf;
+  llvm::raw_string_ostream rss(buf);
+  rss << mlir_obj;
+  rss.flush();
+  return buf;
+}
+
+mlir::func::FuncOp get_entrypoint(ModuleOp op);
+
+}  // namespace mlir::spu
diff --git a/libspu/kernel/BUILD.bazel b/libspu/kernel/BUILD.bazel
index c1853e34..d588fa53 100644
--- a/libspu/kernel/BUILD.bazel
+++ b/libspu/kernel/BUILD.bazel
@@ -18,7 +18,6 @@ package(default_visibility = ["//visibility:public"])
 
 spu_cc_library(
     name = "test_util",
-    testonly = True,
     srcs = ["test_util.cc"],
     hdrs = ["test_util.h"],
     deps = [
diff --git a/libspu/kernel/hal/fxp_approx.cc b/libspu/kernel/hal/fxp_approx.cc
index 738ad50c..5cda83df 100644
--- a/libspu/kernel/hal/fxp_approx.cc
+++ b/libspu/kernel/hal/fxp_approx.cc
@@ -891,4 +891,68 @@ Value f_atan2(SPUContext* ctx, const Value& y, const Value& x) {
   return atan2_minimax(ctx, y, x);
 }
 
+namespace {
+
+// ref: Handbook of Mathematical Functions: with Formulas, Graphs, and
+// Mathematical
+Value acos_minimax(SPUContext* ctx, const Value& x) {
+  auto msb = _msb(ctx, x);
+  msb = _prefer_a(ctx, msb);
+
+  auto abs_x = _mux(ctx, msb, _negate(ctx, x), x).setDtype(x.dtype());
+
+  // arccos(x) ~= sqrt(1-x) * poly(x), when x is in [0,1]
+  // 3-order minimax approximation with max error < 5e-5
+  static std::array<float, 4> kAcosCoefficientSmall{1.5707288, -0.2121144,
+                                                    0.0742610, -0.0187293};
+
+  // 7-order minimax approximation with max error < 2e-8
+  static std::array<float, 8> kAcosCoefficientLarge{
+      1.5707963050, -0.2145988016, 0.0889789874, -0.0501743046,
+      0.0308918810, -0.0170881256, 0.0066700901, -0.0012624911};
+
+  Value poly_part;
+  if (ctx->getFxpBits() <= 20) {
+    poly_part = detail::polynomial(ctx, abs_x, kAcosCoefficientSmall);
+  } else {
+    poly_part = detail::polynomial(ctx, abs_x, kAcosCoefficientLarge);
+  }
+  const auto k1 = constant(ctx, 1.0F, x.dtype(), x.shape());
+  auto sqrt_part = f_sqrt(ctx, f_sub(ctx, k1, abs_x));
+
+  auto ret = f_mul(ctx, sqrt_part, poly_part, SignType::Positive);
+
+  const auto pi = constant(ctx, M_PI, x.dtype(), x.shape());
+  ret = _mux(ctx, msb, f_sub(ctx, pi, ret), ret).setDtype(x.dtype());
+
+  return ret;
+}
+
+}  // namespace
+
+Value f_acos(SPUContext* ctx, const Value& x) {
+  SPU_TRACE_HAL_DISP(ctx, x);
+
+  SPU_ENFORCE(x.isFxp());
+
+  if (x.isPublic()) {
+    return f_acos_p(ctx, x);
+  }
+
+  return acos_minimax(ctx, x);
+}
+
+Value f_asin(SPUContext* ctx, const Value& x) {
+  SPU_TRACE_HAL_DISP(ctx, x);
+
+  SPU_ENFORCE(x.isFxp());
+
+  if (x.isPublic()) {
+    return f_asin_p(ctx, x);
+  }
+
+  const auto k_pi2 = constant(ctx, M_PI_2, x.dtype(), x.shape());
+  // asin(x) = pi/2 - acos(x)
+  return f_sub(ctx, k_pi2, f_acos(ctx, x));
+}
 }  // namespace spu::kernel::hal
diff --git a/libspu/kernel/hal/fxp_approx.h b/libspu/kernel/hal/fxp_approx.h
index df01e933..fa401887 100644
--- a/libspu/kernel/hal/fxp_approx.h
+++ b/libspu/kernel/hal/fxp_approx.h
@@ -68,4 +68,8 @@ Value f_erf(SPUContext* ctx, const Value& x);
 
 Value f_atan2(SPUContext* ctx, const Value& y, const Value& x);
 
+Value f_acos(SPUContext* ctx, const Value& x);
+
+Value f_asin(SPUContext* ctx, const Value& x);
+
 }  // namespace spu::kernel::hal
diff --git a/libspu/kernel/hal/fxp_approx_test.cc b/libspu/kernel/hal/fxp_approx_test.cc
index f9e2ec9b..c79dc434 100644
--- a/libspu/kernel/hal/fxp_approx_test.cc
+++ b/libspu/kernel/hal/fxp_approx_test.cc
@@ -447,4 +447,66 @@ TEST(FxpTest, Atan2) {
   }
 }
 
+TEST(FxpTest, Acos) {
+  // GIVEN
+  SPUContext ctx = test::makeSPUContext();
+
+  // some special cases
+  xt::xarray<float> x0 = {-1.0, -0.5, 0.0, 0.5, 1.0};
+  xt::xarray<float> x1 = xt::random::rand<float>({30}, -1, 1);
+  xt::xarray<float> x = xt::concatenate(xt::xtuple(x0, x1));
+
+  // public acos
+  {
+    Value a = constant(&ctx, x, DT_F32);
+    Value c = f_acos(&ctx, a);
+    EXPECT_EQ(c.dtype(), DT_F32);
+    auto y = dump_public_as<float>(&ctx, c);
+    EXPECT_TRUE(xt::allclose(xt::acos(x), y, 0.01, 0.001))
+        << xt::acos(x) << std::endl
+        << y;
+  }
+  // secret acos
+  {
+    Value a = test::makeValue(&ctx, x, VIS_SECRET);
+    Value c = f_acos(&ctx, a);
+    EXPECT_EQ(c.dtype(), DT_F32);
+    auto y = dump_public_as<float>(&ctx, reveal(&ctx, c));
+    EXPECT_TRUE(xt::allclose(xt::acos(x), y, 0.01, 0.001))
+        << xt::acos(x) << std::endl
+        << y;
+  }
+}
+
+TEST(FxpTest, Asin) {
+  // GIVEN
+  SPUContext ctx = test::makeSPUContext();
+
+  // some special cases
+  xt::xarray<float> x0 = {-1.0, -0.5, 0.0, 0.5, 1.0};
+  xt::xarray<float> x1 = xt::random::rand<float>({30}, -1, 1);
+  xt::xarray<float> x = xt::concatenate(xt::xtuple(x0, x1));
+
+  // public asin
+  {
+    Value a = constant(&ctx, x, DT_F32);
+    Value c = f_asin(&ctx, a);
+    EXPECT_EQ(c.dtype(), DT_F32);
+    auto y = dump_public_as<float>(&ctx, c);
+    EXPECT_TRUE(xt::allclose(xt::asin(x), y, 0.01, 0.001))
+        << xt::asin(x) << std::endl
+        << y;
+  }
+  // secret asin
+  {
+    Value a = test::makeValue(&ctx, x, VIS_SECRET);
+    Value c = f_asin(&ctx, a);
+    EXPECT_EQ(c.dtype(), DT_F32);
+    auto y = dump_public_as<float>(&ctx, reveal(&ctx, c));
+    EXPECT_TRUE(xt::allclose(xt::asin(x), y, 0.01, 0.001))
+        << xt::asin(x) << std::endl
+        << y;
+  }
+}
+
 }  // namespace spu::kernel::hal
diff --git a/libspu/kernel/hal/fxp_cleartext.cc b/libspu/kernel/hal/fxp_cleartext.cc
index e742c277..818e0b23 100644
--- a/libspu/kernel/hal/fxp_cleartext.cc
+++ b/libspu/kernel/hal/fxp_cleartext.cc
@@ -65,7 +65,8 @@ Value applyFloatingPointFn(SPUContext* ctx, const Value& in, FN&& fn) {
 
   DataType dtype;
   const auto out = encodeToRing(fp_arr, field, fxp_bits, &dtype);
-  SPU_ENFORCE(dtype == DT_F32 || dtype == DT_F64, "sanity failed");
+  SPU_ENFORCE(dtype == DT_F16 || dtype == DT_F32 || dtype == DT_F64,
+              "sanity failed");
   return Value(out.as(in.storage_type()), dtype);
 }
 
@@ -157,4 +158,14 @@ Value f_atan2_p(SPUContext* ctx, const Value& x, const Value& y) {
       ctx, x, y, [](float a, float b) { return std::atan2(a, b); });
 }
 
+Value f_acos_p(SPUContext* ctx, const Value& x) {
+  SPU_TRACE_HAL_DISP(ctx, x);
+  return applyFloatingPointFn(ctx, x, [](float x) { return std::acos(x); });
+}
+
+Value f_asin_p(SPUContext* ctx, const Value& in) {
+  SPU_TRACE_HAL_DISP(ctx, in);
+  return applyFloatingPointFn(ctx, in, [](float x) { return std::asin(x); });
+}
+
 }  // namespace spu::kernel::hal
diff --git a/libspu/kernel/hal/fxp_cleartext.h b/libspu/kernel/hal/fxp_cleartext.h
index a38f372f..173a3c78 100644
--- a/libspu/kernel/hal/fxp_cleartext.h
+++ b/libspu/kernel/hal/fxp_cleartext.h
@@ -44,4 +44,8 @@ Value f_pow_p(SPUContext* ctx, const Value& x, const Value& y);
 
 Value f_atan2_p(SPUContext* ctx, const Value& x, const Value& y);
 
+Value f_acos_p(SPUContext* ctx, const Value& in);
+
+Value f_asin_p(SPUContext* ctx, const Value& in);
+
 }  // namespace spu::kernel::hal
diff --git a/libspu/kernel/hal/polymorphic.cc b/libspu/kernel/hal/polymorphic.cc
index 108356c8..34cf5f77 100644
--- a/libspu/kernel/hal/polymorphic.cc
+++ b/libspu/kernel/hal/polymorphic.cc
@@ -554,6 +554,22 @@ Value atan2(SPUContext* ctx, const Value& y, const Value& x) {
   return f_atan2(ctx, y, x);
 }
 
+Value acos(SPUContext* ctx, const Value& x) {
+  SPU_TRACE_HAL_DISP(ctx, x);
+
+  SPU_ENFORCE(x.isFxp());
+
+  return f_acos(ctx, x);
+}
+
+Value asin(SPUContext* ctx, const Value& x) {
+  SPU_TRACE_HAL_DISP(ctx, x);
+
+  SPU_ENFORCE(x.isFxp());
+
+  return f_asin(ctx, x);
+}
+
 Value rsqrt(SPUContext* ctx, const Value& x) {
   SPU_TRACE_HAL_DISP(ctx, x);
 
diff --git a/libspu/kernel/hal/polymorphic.h b/libspu/kernel/hal/polymorphic.h
index 86ad04da..58e90f00 100644
--- a/libspu/kernel/hal/polymorphic.h
+++ b/libspu/kernel/hal/polymorphic.h
@@ -221,6 +221,14 @@ Value cosine(SPUContext* ctx, const Value& x);
 // @param x, horizontal coordinate
 Value atan2(SPUContext* ctx, const Value& y, const Value& x);
 
+/// element-wise arccosine, i.e. x -> acos(x)
+// @param x, the input value
+Value acos(SPUContext* ctx, const Value& x);
+
+/// element-wise arcsine, i.e. x -> asin(x)
+// @param x, the input value
+Value asin(SPUContext* ctx, const Value& x);
+
 /// element-wise reciprocal of square root operation, i.e. x - > 1.0 / sqrt(x)
 // @param in, the input value
 Value rsqrt(SPUContext* ctx, const Value& x);
diff --git a/libspu/kernel/hal/polymorphic_test.cc b/libspu/kernel/hal/polymorphic_test.cc
index 8b661cf3..45c7e877 100644
--- a/libspu/kernel/hal/polymorphic_test.cc
+++ b/libspu/kernel/hal/polymorphic_test.cc
@@ -824,4 +824,40 @@ TYPED_TEST(FpOnlyMathBinaryTest, Atan2) {
       << z << std::endl;
 }
 
+TYPED_TEST(FpOnlyMathUnaryTest, Acos) {
+  using IN_DT = typename std::tuple_element<0, TypeParam>::type;
+  using IN_VT = typename std::tuple_element<1, TypeParam>::type;
+  using RES_DT = float;
+
+  // GIVEN
+  xt::xarray<IN_DT> x = test::xt_random<IN_DT>({5, 6}, -1, 1);
+  xt::xarray<float> expected_y = xt::acos(x);
+
+  // WHAT
+  auto y = test::evalUnaryOp<RES_DT>(IN_VT(), acos, x);
+
+  // THEN
+  EXPECT_TRUE(xt::allclose(expected_y, y, 0.01, 0.001))
+      << expected_y << std::endl
+      << y;
+}
+
+TYPED_TEST(FpOnlyMathUnaryTest, Asin) {
+  using IN_DT = typename std::tuple_element<0, TypeParam>::type;
+  using IN_VT = typename std::tuple_element<1, TypeParam>::type;
+  using RES_DT = float;
+
+  // GIVEN
+  xt::xarray<IN_DT> x = test::xt_random<IN_DT>({5, 6}, -1, 1);
+  xt::xarray<float> expected_y = xt::asin(x);
+
+  // WHAT
+  auto y = test::evalUnaryOp<RES_DT>(IN_VT(), asin, x);
+
+  // THEN
+  EXPECT_TRUE(xt::allclose(expected_y, y, 0.01, 0.001))
+      << expected_y << std::endl
+      << y;
+}
+
 }  // namespace spu::kernel::hal
diff --git a/libspu/kernel/hlo/basic_unary.cc b/libspu/kernel/hlo/basic_unary.cc
index 2b7d8d64..ce1d2c17 100644
--- a/libspu/kernel/hlo/basic_unary.cc
+++ b/libspu/kernel/hlo/basic_unary.cc
@@ -42,6 +42,8 @@ SIMPLE_UNARY_KERNEL_DEFN(Rsqrt, hal::rsqrt)
 SIMPLE_UNARY_KERNEL_DEFN(Sqrt, hal::sqrt)
 SIMPLE_UNARY_KERNEL_DEFN(Sine, hal::sine)
 SIMPLE_UNARY_KERNEL_DEFN(Cosine, hal::cosine)
+SIMPLE_UNARY_KERNEL_DEFN(Acos, hal::acos)
+SIMPLE_UNARY_KERNEL_DEFN(Asin, hal::asin)
 
 #undef SIMPLE_UNARY_KERNEL_DEFN
 
@@ -75,15 +77,17 @@ spu::Value Not(SPUContext *ctx, const spu::Value &in) {
   }
 }
 
-spu::Value Sign(SPUContext *ctx, const spu::Value &in) {
+spu::Value Sign(SPUContext *ctx, const spu::Value &in, bool ignore_zero) {
   SPU_ENFORCE(!in.isComplex());
   // get the (-1, 1) sign
   auto s = hal::sign(ctx, in);
 
-  // s = (in == 0) ? 0 : s
-  s = hal::select(ctx,
-                  hal::equal(ctx, in, hal::zeros(ctx, in.dtype(), in.shape())),
-                  hal::zeros(ctx, s.dtype(), in.shape()), s);
+  if (!ignore_zero) {
+    // s = (in == 0) ? 0 : s
+    s = hal::select(
+        ctx, hal::equal(ctx, in, hal::zeros(ctx, in.dtype(), in.shape())),
+        hal::zeros(ctx, s.dtype(), in.shape()), s);
+  }
   return hal::dtype_cast(ctx, s, in.dtype());
 }
 
@@ -141,4 +145,9 @@ spu::Value Round_RNTE(SPUContext *ctx, const spu::Value &in) {
   return hal::add(ctx, y, comp.setDtype(DT_I64)).setDtype(in.dtype());
 }
 
+spu::Value Popcnt(SPUContext *ctx, const spu::Value &in) {
+  auto bits = getWidth(in.dtype());
+  return hal::_popcount(ctx, in, bits).setDtype(in.dtype());
+}
+
 }  // namespace spu::kernel::hlo
diff --git a/libspu/kernel/hlo/basic_unary.h b/libspu/kernel/hlo/basic_unary.h
index c5770cd7..3b4f4027 100644
--- a/libspu/kernel/hlo/basic_unary.h
+++ b/libspu/kernel/hlo/basic_unary.h
@@ -41,12 +41,17 @@ SIMPLE_UNARY_KERNEL_DECL(Cosine)
 SIMPLE_UNARY_KERNEL_DECL(Not)
 SIMPLE_UNARY_KERNEL_DECL(Rsqrt)
 SIMPLE_UNARY_KERNEL_DECL(Sqrt)
-SIMPLE_UNARY_KERNEL_DECL(Sign)
 SIMPLE_UNARY_KERNEL_DECL(Round_AFZ)
 SIMPLE_UNARY_KERNEL_DECL(Real)
 SIMPLE_UNARY_KERNEL_DECL(Imag)
 SIMPLE_UNARY_KERNEL_DECL(Round_RNTE)
+SIMPLE_UNARY_KERNEL_DECL(Popcnt)
+SIMPLE_UNARY_KERNEL_DECL(Acos)
+SIMPLE_UNARY_KERNEL_DECL(Asin)
 
 #undef SIMPLE_UNARY_KERNEL_DECL
 
+spu::Value Sign(SPUContext *ctx, const spu::Value &in,
+                bool ignore_zero = false);
+
 }  // namespace spu::kernel::hlo
diff --git a/libspu/kernel/hlo/basic_unary_test.cc b/libspu/kernel/hlo/basic_unary_test.cc
index e0ebe466..b10bec00 100644
--- a/libspu/kernel/hlo/basic_unary_test.cc
+++ b/libspu/kernel/hlo/basic_unary_test.cc
@@ -59,6 +59,8 @@ UNARY_EMPTY_TEST(Sqrt)
 UNARY_EMPTY_TEST(Sign)
 UNARY_EMPTY_TEST(Round_AFZ)
 UNARY_EMPTY_TEST(Round_RNTE)
+UNARY_EMPTY_TEST(Acos)
+UNARY_EMPTY_TEST(Asin)
 
 INSTANTIATE_TEST_SUITE_P(
     UnaryTestInstances, UnaryTest,
diff --git a/libspu/mpc/aby3/oram.h b/libspu/mpc/aby3/oram.h
index 6cf87a92..ac50e7c9 100644
--- a/libspu/mpc/aby3/oram.h
+++ b/libspu/mpc/aby3/oram.h
@@ -112,7 +112,7 @@ class OramDpf {
         numel_(numel),
         root_seed_(root_seed),
         aes_crypto_(yacl::crypto::SymmetricCrypto::CryptoType::AES128_ECB,
-                    aes_key, 1){};
+                    aes_key, 1) {};
 
   // genrate 2pc-dpf according to 'ctrl'
   void gen(KernelEvalContext* ctx, DpfGenCtrl ctrl);
@@ -138,7 +138,7 @@ class OramContext {
   explicit OramContext(int64_t dpf_size)
       : dpf_e(2, std::vector<T>(dpf_size)),
         convert_help_v(2, std::vector<T>(dpf_size)),
-        dpf_size_(dpf_size){};
+        dpf_size_(dpf_size) {};
 
   void genDpf(KernelEvalContext* ctx, DpfGenCtrl ctrl, uint128_t aes_key,
               uint128_t target_point);
diff --git a/libspu/version.h b/libspu/version.h
index 60d0de1d..d4a7a9fe 100644
--- a/libspu/version.h
+++ b/libspu/version.h
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#define SPU_VERSION "0.9.1.dev$$DATE$$"
+#define SPU_VERSION "0.9.2.dev$$DATE$$"
 
 #include <string_view>
 
diff --git a/spu/experimental/drop_cached_var_impl.py b/spu/experimental/drop_cached_var_impl.py
index 4237bd81..0222a27c 100644
--- a/spu/experimental/drop_cached_var_impl.py
+++ b/spu/experimental/drop_cached_var_impl.py
@@ -49,7 +49,7 @@ def _drop_cached_var_lowering(ctx, input, *dependences):
     dtype = mlir.ir.RankedTensorType(input.type)
 
     return custom_call(
-        "drop_cached_var",
+        "spu.drop_cached_var",
         # Output types
         result_types=[dtype],
         # The inputs:
diff --git a/spu/experimental/make_cached_var_impl.py b/spu/experimental/make_cached_var_impl.py
index e6e014ab..6d5eff0a 100644
--- a/spu/experimental/make_cached_var_impl.py
+++ b/spu/experimental/make_cached_var_impl.py
@@ -43,7 +43,7 @@ def _make_cached_var_lowering(ctx, input):
     dtype = mlir.ir.RankedTensorType(input.type)
 
     return custom_call(
-        "make_cached_var",
+        "spu.make_cached_var",
         # Output types
         result_types=[dtype],
         # The inputs:
diff --git a/spu/intrinsic/README.md b/spu/intrinsic/README.md
index a07cd95e..6595e1cb 100644
--- a/spu/intrinsic/README.md
+++ b/spu/intrinsic/README.md
@@ -27,4 +27,5 @@ Open `libspu/device/pphlo/pphlo_intrinsic_executor.cc`, find the if branch that
 ### (Optional) Add compile time visibility inference rule
 
 By default, compiler uses a common inference rule to deduce output visibility.
-If your new intrinsic requires some special treatment, please open `libspu/compiler/passes/visibility_inference.cc`, and update `inferIntrinsic` method.
+If your new intrinsic requires some special treatment, please open `libspu/dialect/pphlo/transforms/visibility_inference.cc`,
+and update `inferIntrinsic` method.
diff --git a/spu/tests/distributed_test.py b/spu/tests/distributed_test.py
index 14054fba..54415b09 100644
--- a/spu/tests/distributed_test.py
+++ b/spu/tests/distributed_test.py
@@ -210,12 +210,12 @@ def test_basic_spu_jax(self):
         self.assertTrue(y.device is ppd.current().devices["P2"])
         npt.assert_equal(ppd.get(y), np.array([3, 5]))
 
-    def test_dump_pphlo(self):
+    def test_dump_ir(self):
         a, b = ppd.device("P1")(no_in_two_out)()
         x, y = ppd.device("SPU")(no_in_two_out)()
 
         # dump pphlo
-        text = ppd.device("SPU")(jnp.add).dump_pphlo(a, x)
+        text = ppd.device("SPU")(jnp.add).dump_ir(a, x)
         self.assertIn('pphlo.add', text)
 
     def test_basic_spu_tf(self):
@@ -279,7 +279,7 @@ def suite():
     suite = unittest.TestSuite()
     suite.addTest(UnitTests('test_basic_pyu'))
     suite.addTest(UnitTests('test_basic_spu_jax'))
-    suite.addTest(UnitTests('test_dump_pphlo'))
+    suite.addTest(UnitTests('test_dump_ir'))
     suite.addTest(UnitTests('test_basic_spu_tf'))
     return suite
 
diff --git a/spu/tests/jnp_testbase.py b/spu/tests/jnp_testbase.py
index f89e5cb8..ad106687 100644
--- a/spu/tests/jnp_testbase.py
+++ b/spu/tests/jnp_testbase.py
@@ -88,6 +88,10 @@ def rand_default(rng):
     return partial(_rand_dtype, rng)
 
 
+def rand_extra_large(rng):
+    return partial(jtu._rand_dtype, rng.randn, scale=2**30)
+
+
 def rand_not_small_nonzero(rng):
     def post(x):
         x = np.where(x == 0, np.array(1, dtype=x.dtype), x)
@@ -140,7 +144,7 @@ def post(x):
     REC("array_equiv", 2, number_dtypes, all_shapes, jtu.rand_some_equal),
     REC("reciprocal", 1, float_dtypes, all_shapes, rand_default),
     REC("subtract", 2, number_dtypes, all_shapes, rand_default),
-    REC("signbit", 1, number_dtypes, all_shapes, rand_default),
+    REC("signbit", 1, number_dtypes, all_shapes, rand_extra_large),
     REC("trunc", 1, number_dtypes, all_shapes, rand_default),
     REC("sin", 1, number_dtypes, all_shapes, rand_default),
     REC("cos", 1, number_dtypes, all_shapes, rand_default),
@@ -302,6 +306,7 @@ def post(x):
 ]
 
 BITWISE_OP_RECORDS = [
+    REC("bitwise_count", 1, int_dtypes, all_shapes, jtu.rand_default),
     REC("bitwise_and", 2, int_dtypes, all_shapes, jtu.rand_bool),
     REC("bitwise_not", 1, int_dtypes, all_shapes, jtu.rand_bool),
     REC("invert", 1, int_dtypes, all_shapes, jtu.rand_bool),
diff --git a/spu/tests/spu_runtime_test.py b/spu/tests/spu_runtime_test.py
index 38ca54bc..938b4672 100644
--- a/spu/tests/spu_runtime_test.py
+++ b/spu/tests/spu_runtime_test.py
@@ -39,7 +39,7 @@ def test_no_io(self):
 func.func @main(%arg0: tensor<2x2x!pphlo.secret<i32>>) -> (tensor<2x2x!pphlo.secret<i32>>) {
     %0 = pphlo.constant dense<[[1,2],[3,4]]> : tensor<2x2xi32>
     %1 = pphlo.add %arg0, %0 : (tensor<2x2x!pphlo.secret<i32>>, tensor<2x2xi32>) -> tensor<2x2x!pphlo.secret<i32>>
-    pphlo.custom_call @dbg_print (%1) {has_side_effect = true} : (tensor<2x2x!pphlo.secret<i32>>)->()
+    pphlo.custom_call @spu.dbg_print (%1) {has_side_effect = true} : (tensor<2x2x!pphlo.secret<i32>>)->()
     return %1 : tensor<2x2x!pphlo.secret<i32>>
 }"""
         executable = spu_pb2.ExecutableProto(
diff --git a/spu/utils/distributed_impl.py b/spu/utils/distributed_impl.py
index d48f14f6..79b13426 100644
--- a/spu/utils/distributed_impl.py
+++ b/spu/utils/distributed_impl.py
@@ -706,7 +706,7 @@ def get_share_ref(idx, obj):
 
             return tree_unflatten(out_tree, ret_flat)
 
-        def dump_pphlo(self, *args, **kwargs):
+        def dump_ir(self, *args, **kwargs):
             args, kwargs = self.device._place_arguments(*args, **kwargs)
             executable, *_ = self._compile_jax_func(
                 self.pyfunc, self.static_argnums, self.copts, *args, **kwargs
@@ -932,7 +932,7 @@ def get_share_ref(idx, obj):
                 ret = pytree.tree_unflatten(ret, out_spec)
             return ret
 
-        def dump_pphlo(self, state_dict, *args, **kwargs):
+        def dump_ir(self, state_dict, *args, **kwargs):
             # place state_dict
             self.state_dict = self._place_state_dict(state_dict)
             args, kwargs = self.device._place_arguments(*args, **kwargs)