diff --git a/.dev_scripts/train_duo.sh b/.dev_scripts/train_duo.sh
new file mode 100755
index 0000000..b3f96d0
--- /dev/null
+++ b/.dev_scripts/train_duo.sh
@@ -0,0 +1,21 @@
+PARTITION=$1
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+log_dir="work_dirs/slurm_logs"
+mkdir -p "$log_dir"
+
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-atss_r50_1x             configs/detection/duo_dataset/atss_r50_fpn_1x_duo-coco.py                   work_dirs/duo/atss_r50_fpn_1x_duo-coco                    --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/atss_r50_fpn_1x_duo-coco/eval_result                 > "$log_dir/duo_atss_r50_fpn_1x_duo-coco.log"                     &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-cascade_r50_1x          configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_duo-coco.py           work_dirs/duo/cascade-rcnn_r50_fpn_1x_duo-coco            --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/cascade-rcnn_r50_fpn_1x_duo-coco/eval_result         > "$log_dir/duo_cascade-rcnn_r50_fpn_1x_duo-coco.log"             &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-faster_r50_1x           configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_duo-coco.py            work_dirs/duo/faster-rcnn_r50_fpn_1x_duo-coco             --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/faster-rcnn_r50_fpn_1x_duo-coco/eval_result          > "$log_dir/duo_faster-rcnn_r50_fpn_1x_duo-coco.log"              &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-faster_r101_1x          configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_duo-coco.py           work_dirs/duo/faster-rcnn_r101_fpn_1x_duo-coco            --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/faster-rcnn_r101_fpn_1x_duo-coco/eval_result         > "$log_dir/duo_faster-rcnn_r101_fpn_1x_duo-coco.log"             &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-faster_x101-32x4d_1x    configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_duo-coco.py     work_dirs/duo/faster-rcnn_x101-32x4d_fpn_1x_duo-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/faster-rcnn_x101-32x4d_fpn_1x_duo-coco/eval_result   > "$log_dir/duo_faster-rcnn_x101-32x4d_fpn_1x_duo-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-faster_x101-64x4d_1x    configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_duo-coco.py     work_dirs/duo/faster-rcnn_x101-64x4d_fpn_1x_duo-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/faster-rcnn_x101-64x4d_fpn_1x_duo-coco/eval_result   > "$log_dir/duo_faster-rcnn_x101-64x4d_fpn_1x_duo-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-fcos_r50_1x             configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_duo-coco.py     work_dirs/duo/fcos_r50-caffe_fpn_gn-head_1x_duo-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/fcos_r50-caffe_fpn_gn-head_1x_duo-coco/eval_result   > "$log_dir/duo_fcos_r50-caffe_fpn_gn-head_1x_duo-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-paa_r50_1x              configs/detection/duo_dataset/paa_r50_fpn_1x_duo-coco.py                    work_dirs/duo/paa_r50_fpn_1x_duo-coco                     --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/paa_r50_fpn_1x_duo-coco/eval_result                  > "$log_dir/duo_paa_r50_fpn_1x_duo-coco.log"                      &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-retinanet_r50_1x        configs/detection/duo_dataset/retinanet_r50_fpn_1x_duo-coco.py              work_dirs/duo/retinanet_r50_fpn_1x_duo-coco               --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/retinanet_r50_fpn_1x_duo-coco/eval_result            > "$log_dir/duo_retinanet_r50_fpn_1x_duo-coco.log"                &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-ssd300                  configs/detection/duo_dataset/ssd300_120e_duo-coco.py                       work_dirs/duo/ssd300_120e_duo-coco                        --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/ssd300_120e_duo-coco/eval_result                     > "$log_dir/duo_ssd300_120e_duo-coco.log"                         &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-ssd512                  configs/detection/duo_dataset/ssd512_120e_duo-coco.py                       work_dirs/duo/ssd512_120e_duo-coco                        --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/ssd512_120e_duo-coco/eval_result                     > "$log_dir/duo_ssd512_120e_duo-coco.log"                         &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  duo-tood_r50_1x             configs/detection/duo_dataset/tood_r50_fpn_1x_duo-coco.py                   work_dirs/duo/tood_r50_fpn_1x_duo-coco                    --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/duo/tood_r50_fpn_1x_duo-coco/eval_result                 > "$log_dir/duo_tood_r50_fpn_1x_duo-coco.log"                     &
diff --git a/.dev_scripts/train_rtts.sh b/.dev_scripts/train_rtts.sh
new file mode 100755
index 0000000..23677da
--- /dev/null
+++ b/.dev_scripts/train_rtts.sh
@@ -0,0 +1,16 @@
+PARTITION=$1
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+log_dir="work_dirs/slurm_logs"
+mkdir -p "$log_dir"
+
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-atss_r50_1x             configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py                   work_dirs/rtts/atss_r50_fpn_1x_rtts-coco                    --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/atss_r50_fpn_1x_rtts-coco/eval_result                 > "$log_dir/rtts_atss_r50_fpn_1x_rtts-coco.log"                     &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-cascade_r50_1x          configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py           work_dirs/rtts/cascade-rcnn_r50_fpn_1x_rtts-coco            --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/cascade-rcnn_r50_fpn_1x_rtts-coco/eval_result         > "$log_dir/rtts_cascade-rcnn_r50_fpn_1x_rtts-coco.log"             &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-faster_r50_1x           configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py            work_dirs/rtts/faster-rcnn_r50_fpn_1x_rtts-coco             --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/faster-rcnn_r50_fpn_1x_rtts-coco/eval_result          > "$log_dir/rtts_faster-rcnn_r50_fpn_1x_rtts-coco.log"              &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-fcos_r50_1x             configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py     work_dirs/rtts/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco/eval_result   > "$log_dir/rtts_fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-paa_r50_1x              configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py                    work_dirs/rtts/paa_r50_fpn_1x_rtts-coco                     --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/paa_r50_fpn_1x_rtts-coco/eval_result                  > "$log_dir/rtts_paa_r50_fpn_1x_rtts-coco.log"                      &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-retinanet_r50_1x        configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py              work_dirs/rtts/retinanet_r50_fpn_1x_rtts-coco               --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/retinanet_r50_fpn_1x_rtts-coco/eval_result            > "$log_dir/rtts_retinanet_r50_fpn_1x_rtts-coco.log"                &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  rtts-tood_r50_1x             configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py                   work_dirs/rtts/tood_r50_fpn_1x_rtts-coco                    --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/rtts/tood_r50_fpn_1x_rtts-coco/eval_result                 > "$log_dir/rtts_tood_r50_fpn_1x_rtts-coco.log"                     &
diff --git a/.dev_scripts/train_uprc2020-train_all.sh b/.dev_scripts/train_uprc2020-train_all.sh
new file mode 100755
index 0000000..b2baa12
--- /dev/null
+++ b/.dev_scripts/train_uprc2020-train_all.sh
@@ -0,0 +1,21 @@
+PARTITION=$1
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+log_dir="work_dirs/slurm_logs"
+mkdir -p "$log_dir"
+
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-atss_r50_1x             configs/detection/urpc2020_dataset/train-all_test-A/atss_r50_fpn_1x_urpc-coco.py                   work_dirs/urpc2020-train-all/atss_r50_fpn_1x_urpc-coco                    --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/atss_r50_fpn_1x_urpc-coco/eval_result                 > "$log_dir/urpc2020-train-all_atss_r50_fpn_1x_urpc-coco.log"                     &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-cascade_r50_1x          configs/detection/urpc2020_dataset/train-all_test-A/cascade-rcnn_r50_fpn_1x_urpc-coco.py           work_dirs/urpc2020-train-all/cascade-rcnn_r50_fpn_1x_urpc-coco            --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/cascade-rcnn_r50_fpn_1x_urpc-coco/eval_result         > "$log_dir/urpc2020-train-all_cascade-rcnn_r50_fpn_1x_urpc-coco.log"             &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-faster_r50_1x           configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_1x_urpc-coco.py            work_dirs/urpc2020-train-all/faster-rcnn_r50_fpn_1x_urpc-coco             --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/faster-rcnn_r50_fpn_1x_urpc-coco/eval_result          > "$log_dir/urpc2020-train-all_faster-rcnn_r50_fpn_1x_urpc-coco.log"              &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-faster_r101_1x          configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_1x_urpc-coco.py           work_dirs/urpc2020-train-all/faster-rcnn_r101_fpn_1x_urpc-coco            --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/faster-rcnn_r101_fpn_1x_urpc-coco/eval_result         > "$log_dir/urpc2020-train-all_faster-rcnn_r101_fpn_1x_urpc-coco.log"             &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-faster_x101-32x4d_1x    configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py     work_dirs/urpc2020-train-all/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco/eval_result   > "$log_dir/urpc2020-train-all_faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-faster_x101-64x4d_1x    configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py     work_dirs/urpc2020-train-all/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco/eval_result   > "$log_dir/urpc2020-train-all_faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-fcos_r50_1x             configs/detection/urpc2020_dataset/train-all_test-A/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py     work_dirs/urpc2020-train-all/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco      --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco/eval_result   > "$log_dir/urpc2020-train-all_fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.log"       &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-paa_r50_1x              configs/detection/urpc2020_dataset/train-all_test-A/paa_r50_fpn_1x_urpc-coco.py                    work_dirs/urpc2020-train-all/paa_r50_fpn_1x_urpc-coco                     --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/paa_r50_fpn_1x_urpc-coco/eval_result                  > "$log_dir/urpc2020-train-all_paa_r50_fpn_1x_urpc-coco.log"                      &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-retinanet_r50_1x        configs/detection/urpc2020_dataset/train-all_test-A/retinanet_r50_fpn_1x_urpc-coco.py              work_dirs/urpc2020-train-all/retinanet_r50_fpn_1x_urpc-coco               --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/retinanet_r50_fpn_1x_urpc-coco/eval_result            > "$log_dir/urpc2020-train-all_retinanet_r50_fpn_1x_urpc-coco.log"                &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-ssd300                  configs/detection/urpc2020_dataset/train-all_test-A/ssd300_120e_urpc-coco.py                       work_dirs/urpc2020-train-all/ssd300_120e_urpc-coco                        --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/ssd300_120e_urpc-coco/eval_result                     > "$log_dir/urpc2020-train-all_ssd300_120e_urpc-coco.log"                         &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-ssd512                  configs/detection/urpc2020_dataset/train-all_test-A/ssd512_120e_urpc-coco.py                       work_dirs/urpc2020-train-all/ssd512_120e_urpc-coco                        --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/ssd512_120e_urpc-coco/eval_result                     > "$log_dir/urpc2020-train-all_ssd512_120e_urpc-coco.log"                         &
+SRUN_ARGS=${SRUN_ARGS}  GPUS=$GPUS  GPUS_PER_NODE=$GPUS_PER_NODE  CPUS_PER_TASK=$CPUS_PRE_TASK  ./tools/slurm_train.sh  $PARTITION  urpc2020-tood_r50_1x             configs/detection/urpc2020_dataset/train-all_test-A/tood_r50_fpn_1x_urpc-coco.py                   work_dirs/urpc2020-train-all/tood_r50_fpn_1x_urpc-coco                    --cfg-options default_hooks.checkpoint.max_keep_ckpts=1  randomness.seed=0  val_evaluator.outfile_prefix=work_dirs/urpc2020-train-all/tood_r50_fpn_1x_urpc-coco/eval_result                 > "$log_dir/urpc2020-train-all_tood_r50_fpn_1x_urpc-coco.log"                     &
diff --git a/.gitignore b/.gitignore
index f323d04..70e4220 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,3 +121,6 @@ work_dirs/
 *.pth
 *.py~
 *.sh~
+
+# ignore lark config
+configs/lark/
diff --git a/configs/detection/_base_/datasets/duo_coco_detection.py b/configs/detection/_base_/datasets/duo_coco_detection.py
new file mode 100644
index 0000000..d750084
--- /dev/null
+++ b/configs/detection/_base_/datasets/duo_coco_detection.py
@@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'lqit.DUODataset'  # `lqit` means the scope
+data_root = 'data/DUO/'
+
+backend_args = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_train.json',
+        data_prefix=dict(img='images/train/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_test.json',
+        data_prefix=dict(img='images/test/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/instances_test.json',
+    metric='bbox',
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
diff --git a/configs/detection/_base_/datasets/rtts_coco.py b/configs/detection/_base_/datasets/rtts_coco.py
index cbbb94e..f8b9ac3 100644
--- a/configs/detection/_base_/datasets/rtts_coco.py
+++ b/configs/detection/_base_/datasets/rtts_coco.py
@@ -1,6 +1,6 @@
 # dataset settings
 dataset_type = 'lqit.RTTSCocoDataset'  # `lqit` means the scope
-data_root = 'data/RESIDE/'
+data_root = 'data/RESIDE/RTTS/'
 
 backend_args = None
 
@@ -31,8 +31,8 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='RTTS/annotations_json/rtts_train.json',
-        data_prefix=dict(img='RTTS/'),
+        ann_file='annotations_json/train.json',
+        data_prefix=dict(img='JPEGImages/'),
         filter_cfg=dict(filter_empty_gt=True, min_size=32),
         pipeline=train_pipeline,
         backend_args=backend_args))
@@ -45,8 +45,8 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='RTTS/annotations_json/rtts_val.json',
-        data_prefix=dict(img='RTTS/'),
+        ann_file='annotations_json/test.json',
+        data_prefix=dict(img='JPEGImages/'),
         test_mode=True,
         pipeline=test_pipeline,
         backend_args=backend_args))
@@ -54,7 +54,7 @@
 
 val_evaluator = dict(
     type='CocoMetric',
-    ann_file=data_root + 'RTTS/annotations_json/rtts_val.json',
+    ann_file=data_root + 'annotations_json/test.json',
     metric='bbox',
     format_only=False,
     backend_args=backend_args)
diff --git a/configs/detection/_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py b/configs/detection/_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py
new file mode 100644
index 0000000..ef8593c
--- /dev/null
+++ b/configs/detection/_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py
@@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'lqit.URPCCocoDataset'  # `lqit` means the scope
+data_root = 'data/URPC2020/'
+
+backend_args = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/train_all.json',
+        data_prefix=dict(img='train-image/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/test-A.json',
+        data_prefix=dict(img='test-A-image/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/test-A.json',
+    metric='bbox',
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
diff --git a/configs/detection/_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py b/configs/detection/_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py
new file mode 100644
index 0000000..f5fbbd5
--- /dev/null
+++ b/configs/detection/_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py
@@ -0,0 +1,60 @@
+# dataset settings
+dataset_type = 'lqit.URPCCocoDataset'  # `lqit` means the scope
+data_root = 'data/URPC2020/'
+
+backend_args = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/train_all.json',
+        data_prefix=dict(img='train-image/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/test-B.json',
+        data_prefix=dict(img='test-B-image/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/test-B.json',
+    metric='bbox',
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
diff --git a/configs/detection/_base_/datasets/urpc-2020_coco_detection.py b/configs/detection/_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py
similarity index 86%
rename from configs/detection/_base_/datasets/urpc-2020_coco_detection.py
rename to configs/detection/_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py
index 2363f59..3891837 100644
--- a/configs/detection/_base_/datasets/urpc-2020_coco_detection.py
+++ b/configs/detection/_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py
@@ -1,6 +1,6 @@
 # dataset settings
 dataset_type = 'lqit.URPCCocoDataset'  # `lqit` means the scope
-data_root = 'data/URPC/'
+data_root = 'data/URPC2020/'
 
 backend_args = None
 
@@ -30,8 +30,8 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations_json/train.json',
-        data_prefix=dict(img='JPEGImages/'),
+        ann_file='annotations/train.json',
+        data_prefix=dict(img='train-image/'),
         filter_cfg=dict(filter_empty_gt=True, min_size=32),
         pipeline=train_pipeline,
         backend_args=backend_args))
@@ -44,8 +44,8 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations_json/val.json',
-        data_prefix=dict(img='JPEGImages/'),
+        ann_file='annotations/val.json',
+        data_prefix=dict(img='train-image/'),
         test_mode=True,
         pipeline=test_pipeline,
         backend_args=backend_args))
@@ -53,7 +53,7 @@
 
 val_evaluator = dict(
     type='CocoMetric',
-    ann_file=data_root + 'annotations_json/val.json',
+    ann_file=data_root + 'annotations/val.json',
     metric='bbox',
     format_only=False,
     backend_args=backend_args)
diff --git a/configs/detection/_base_/datasets/urpc-2020_xml_detection.py b/configs/detection/_base_/datasets/urpc2020/urpc2020-validation_xml_detection.py
similarity index 82%
rename from configs/detection/_base_/datasets/urpc-2020_xml_detection.py
rename to configs/detection/_base_/datasets/urpc2020/urpc2020-validation_xml_detection.py
index c7825ad..950c40f 100644
--- a/configs/detection/_base_/datasets/urpc-2020_xml_detection.py
+++ b/configs/detection/_base_/datasets/urpc2020/urpc2020-validation_xml_detection.py
@@ -30,10 +30,10 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='ImageSets/train.txt',
-        meta_file='ImageMetas/train-image-metas.pkl',
-        img_subdir='JPEGImages',
-        ann_subdir='annotations_xml',
+        ann_file='source_data/ImageSets/train.txt',
+        meta_file='source_data/ImageMetas/train-image-metas.pkl',
+        img_subdir='train-image',
+        ann_subdir='source_data/train-box',
         data_prefix=dict(sub_data_root=''),
         filter_cfg=dict(filter_empty_gt=True, min_size=32, bbox_min_size=32),
         pipeline=train_pipeline,
@@ -47,10 +47,10 @@
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='ImageSets/val.txt',
-        meta_file='ImageMetas/val-image-metas.pkl',
-        img_subdir='JPEGImages',
-        ann_subdir='annotations_xml',
+        ann_file='source_data/ImageSets/val.txt',
+        meta_file='source_data/ImageMetas/val-image-metas.pkl',
+        img_subdir='train-image',
+        ann_subdir='source_data/train-box',
         data_prefix=dict(sub_data_root=''),
         test_mode=True,
         pipeline=test_pipeline,
diff --git a/configs/detection/_base_/default_runtime.py b/configs/detection/_base_/default_runtime.py
index 8e8c4ac..d2817fe 100644
--- a/configs/detection/_base_/default_runtime.py
+++ b/configs/detection/_base_/default_runtime.py
@@ -8,7 +8,6 @@
     sampler_seed=dict(type='DistSamplerSeedHook'),
     visualization=dict(type='DetVisualizationHook'))
 
-# set seed = 0 as default
 randomness = dict(seed=None, deterministic=False)
 
 env_cfg = dict(
diff --git a/configs/detection/duo_dataset/README.md b/configs/detection/duo_dataset/README.md
index e074880..589f424 100644
--- a/configs/detection/duo_dataset/README.md
+++ b/configs/detection/duo_dataset/README.md
@@ -1,7 +1,6 @@
 # Detecting Underwater Objects
 
 > [Detecting Underwater Objects](https://arxiv.org/abs/2106.05681)
-> [Underwater Robot Professional Contest 2020](https://www.heywhale.com/home/competition/5e535a612537a0002ca864ac/content/0)
 
 <!-- [DATASET] -->
 
diff --git a/configs/detection/duo_dataset/atss_r50_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/atss_r50_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..b7875c0
--- /dev/null
+++ b/configs/detection/duo_dataset/atss_r50_fpn_1x_duo-coco.py
@@ -0,0 +1,86 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ATSS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='atss_r50_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..a843236
--- /dev/null
+++ b/configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_duo-coco.py
@@ -0,0 +1,204 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='CascadeRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='CascadeRoIHead',
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+        ]),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='cascade-rcnn_r50_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..a0fd4aa
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_duo-coco.py
@@ -0,0 +1,21 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_duo-coco.py'
+
+model = dict(
+    backbone=dict(
+        depth=101,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet101')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_r101_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_r101_fpn_2x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_r101_fpn_2x_duo-coco.py
new file mode 100644
index 0000000..9a051f9
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_r101_fpn_2x_duo-coco.py
@@ -0,0 +1,21 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_duo-coco.py'
+
+model = dict(
+    backbone=dict(
+        depth=101,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet101')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_r101_fpn_2x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..de11f4e
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_duo-coco.py
@@ -0,0 +1,133 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_r50_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_urpc-xml.py b/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_urpc-xml.py
deleted file mode 100644
index 426038c..0000000
--- a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_urpc-xml.py
+++ /dev/null
@@ -1,7 +0,0 @@
-_base_ = [
-    '../_base_/models/faster-rcnn_r50_fpn.py',
-    '../_base_/datasets/urpc-2020_xml_detection.py',
-    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
-]
-
-model = dict(roi_head=dict(bbox_head=dict(num_classes=4)))
diff --git a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_2x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_r50_fpn_2x_duo-coco.py
new file mode 100644
index 0000000..6a7135f
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_r50_fpn_2x_duo-coco.py
@@ -0,0 +1,133 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_r50_fpn_2x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..c8abe42
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_duo-coco.py
@@ -0,0 +1,29 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_duo-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=32,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_x101-32x4d_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_2x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_2x_duo-coco.py
new file mode 100644
index 0000000..57419be
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_2x_duo-coco.py
@@ -0,0 +1,29 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_duo-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=32,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_x101-32x4d_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..e9e9760
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_duo-coco.py
@@ -0,0 +1,29 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_duo-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_x101-64x4d_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_2x_duo-coco.py b/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_2x_duo-coco.py
new file mode 100644
index 0000000..1e9de74
--- /dev/null
+++ b/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_2x_duo-coco.py
@@ -0,0 +1,29 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_duo-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='faster-rcnn_x101-64x4d_fpn_2x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_duo-coco.py b/configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_duo-coco.py
new file mode 100644
index 0000000..248bfea
--- /dev/null
+++ b/configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_duo-coco.py
@@ -0,0 +1,91 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FCOS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[102.9801, 115.9465, 122.7717],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='open-mmlab://detectron/resnet50_caffe')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # testing settings
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(lr=0.01),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='fcos_r50-caffe_fpn_gn-head_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/paa_r50_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/paa_r50_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..5f542cd
--- /dev/null
+++ b/configs/detection/duo_dataset/paa_r50_fpn_1x_duo-coco.py
@@ -0,0 +1,107 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='paa_r50_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/retinanet_r50_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/retinanet_r50_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..7c1d5f6
--- /dev/null
+++ b/configs/detection/duo_dataset/retinanet_r50_fpn_1x_duo-coco.py
@@ -0,0 +1,105 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='RetinaNet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type='RetinaHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='PseudoSampler'),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='retinanet_r50_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/ssd300_120e_duo-coco.py b/configs/detection/duo_dataset/ssd300_120e_duo-coco.py
new file mode 100644
index 0000000..0a0a857
--- /dev/null
+++ b/configs/detection/duo_dataset/ssd300_120e_duo-coco.py
@@ -0,0 +1,101 @@
+_base_ = [
+    '../_base_/models/ssd300.py', '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
+]
+# model settings
+model = dict(bbox_head=dict(num_classes=4))
+
+# dataset settings
+input_size = 300
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Expand',
+        mean={{_base_.model.data_preprocessor.mean}},
+        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
+        ratio_range=(1, 4)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=2,
+    batch_sampler=None,
+    dataset=dict(
+        _delete_=True,
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type={{_base_.dataset_type}},
+            data_root={{_base_.data_root}},
+            ann_file='annotations/instances_train.json',
+            data_prefix=dict(img='images/train/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline)))
+val_dataloader = dict(batch_size=8, dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+custom_hooks = [
+    dict(type='NumClassCheckHook'),
+    dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')
+]
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='ssd300_120e__duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/ssd512_120e_duo-coco.py b/configs/detection/duo_dataset/ssd512_120e_duo-coco.py
new file mode 100644
index 0000000..921cedc
--- /dev/null
+++ b/configs/detection/duo_dataset/ssd512_120e_duo-coco.py
@@ -0,0 +1,74 @@
+_base_ = 'ssd300_120e_duo-coco.py'
+
+# model settings
+input_size = 512
+model = dict(
+    neck=dict(
+        out_channels=(512, 1024, 512, 256, 256, 256, 256),
+        level_strides=(2, 2, 2, 2, 1),
+        level_paddings=(1, 1, 1, 1, 1),
+        last_kernel_size=4),
+    bbox_head=dict(
+        in_channels=(512, 1024, 512, 256, 256, 256, 256),
+        anchor_generator=dict(
+            type='SSDAnchorGenerator',
+            scale_major=False,
+            input_size=input_size,
+            basesize_ratio_range=(0.1, 0.9),
+            strides=[8, 16, 32, 64, 128, 256, 512],
+            ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]])))
+
+# dataset settings
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Expand',
+        mean={{_base_.model.data_preprocessor.mean}},
+        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
+        ratio_range=(1, 4)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='ssd512_120e_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/tood_r50_fpn_1x_duo-coco.py b/configs/detection/duo_dataset/tood_r50_fpn_1x_duo-coco.py
new file mode 100644
index 0000000..d105470
--- /dev/null
+++ b/configs/detection/duo_dataset/tood_r50_fpn_1x_duo-coco.py
@@ -0,0 +1,95 @@
+_base_ = [
+    '../_base_/datasets/duo_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='TOOD',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='TOODHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=6,
+        feat_channels=256,
+        anchor_type='anchor_free',
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        initial_loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_cls=dict(
+            type='QualityFocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            beta=2.0,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
+    train_cfg=dict(
+        initial_epoch=4,
+        initial_assigner=dict(type='ATSSAssigner', topk=9),
+        assigner=dict(type='TaskAlignedAssigner', topk=13),
+        alpha=1,
+        beta=6,
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='DUO_detection',
+#             name='tood_r50_fpn_1x_duo',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/lqit/edit/hooks/__init__.py b/configs/detection/rtts_dataset/README.md
similarity index 100%
rename from lqit/edit/hooks/__init__.py
rename to configs/detection/rtts_dataset/README.md
diff --git a/configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py
new file mode 100644
index 0000000..a274ead
--- /dev/null
+++ b/configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py
@@ -0,0 +1,86 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ATSS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='atss_r50_fpn_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py
new file mode 100644
index 0000000..b575435
--- /dev/null
+++ b/configs/detection/rtts_dataset/cascade-rcnn_r50_fpn_1x_rtts-coco.py
@@ -0,0 +1,204 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='CascadeRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='CascadeRoIHead',
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=5,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=5,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=5,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+        ]),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='cascade-rcnn_r50_fpn_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py
new file mode 100644
index 0000000..02f15e5
--- /dev/null
+++ b/configs/detection/rtts_dataset/faster-rcnn_r50_fpn_1x_rtts-coco.py
@@ -0,0 +1,133 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=5,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='faster-rcnn_r50_fpn_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py b/configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py
new file mode 100644
index 0000000..b57a75a
--- /dev/null
+++ b/configs/detection/rtts_dataset/fcos_r50-caffe_fpn_gn-head_1x_rtts-coco.py
@@ -0,0 +1,91 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FCOS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[102.9801, 115.9465, 122.7717],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='open-mmlab://detectron/resnet50_caffe')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # testing settings
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(lr=0.01),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='fcos_r50-caffe_fpn_gn-head_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py
new file mode 100644
index 0000000..ae5f7eb
--- /dev/null
+++ b/configs/detection/rtts_dataset/paa_r50_fpn_1x_rtts-coco.py
@@ -0,0 +1,107 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='paa_r50_fpn_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py
new file mode 100644
index 0000000..5d6c8ba
--- /dev/null
+++ b/configs/detection/rtts_dataset/retinanet_r50_fpn_1x_rtts-coco.py
@@ -0,0 +1,105 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='RetinaNet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type='RetinaHead',
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='PseudoSampler'),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='retinanet_r50_fpn_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py b/configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py
new file mode 100644
index 0000000..7f41d2a
--- /dev/null
+++ b/configs/detection/rtts_dataset/tood_r50_fpn_1x_rtts-coco.py
@@ -0,0 +1,95 @@
+_base_ = [
+    '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='TOOD',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='TOODHead',
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=6,
+        feat_channels=256,
+        anchor_type='anchor_free',
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        initial_loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_cls=dict(
+            type='QualityFocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            beta=2.0,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
+    train_cfg=dict(
+        initial_epoch=4,
+        initial_assigner=dict(type='ATSSAssigner', topk=9),
+        assigner=dict(type='TaskAlignedAssigner', topk=13),
+        alpha=1,
+        beta=6,
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='rtts_detection',
+#             name='tood_r50_fpn_1x_rtts',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/atss_r50_fpn_1x_ruod.py b/configs/detection/ruod_dataset/atss_r50_fpn_1x_ruod.py
index ac7fbf0..eaa603d 100644
--- a/configs/detection/ruod_dataset/atss_r50_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/atss_r50_fpn_1x_ruod.py
@@ -70,3 +70,17 @@
 # optimizer
 optim_wrapper = dict(
     optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='atss_r50_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/cascade-rcnn_r50_fpn_1x_ruod.py b/configs/detection/ruod_dataset/cascade-rcnn_r50_fpn_1x_ruod.py
index b124cec..4c4bbd9 100644
--- a/configs/detection/ruod_dataset/cascade-rcnn_r50_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/cascade-rcnn_r50_fpn_1x_ruod.py
@@ -188,3 +188,17 @@
             score_thr=0.05,
             nms=dict(type='nms', iou_threshold=0.5),
             max_per_img=100)))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='cascade-rcnn_r50_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/faster-rcnn_r101_fpn_1x_ruod.py b/configs/detection/ruod_dataset/faster-rcnn_r101_fpn_1x_ruod.py
index 4b766c4..825c5c3 100644
--- a/configs/detection/ruod_dataset/faster-rcnn_r101_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/faster-rcnn_r101_fpn_1x_ruod.py
@@ -5,3 +5,17 @@
         depth=101,
         init_cfg=dict(type='Pretrained',
                       checkpoint='torchvision://resnet101')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='faster-rcnn_r101_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_1x_ruod.py b/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_1x_ruod.py
index 9a89a75..72c8951 100644
--- a/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_1x_ruod.py
@@ -117,3 +117,17 @@
         # soft-nms is also supported for rcnn testing
         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
     ))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='faster-rcnn_r50_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_2x_ruod.py b/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_2x_ruod.py
index 9a89a75..21801b4 100644
--- a/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_2x_ruod.py
+++ b/configs/detection/ruod_dataset/faster-rcnn_r50_fpn_2x_ruod.py
@@ -117,3 +117,17 @@
         # soft-nms is also supported for rcnn testing
         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
     ))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='faster-rcnn_r50_fpn_2x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/faster-rcnn_x101-32x4d_fpn_1x_ruod.py b/configs/detection/ruod_dataset/faster-rcnn_x101-32x4d_fpn_1x_ruod.py
index d3cd0fe..2d3e31a 100644
--- a/configs/detection/ruod_dataset/faster-rcnn_x101-32x4d_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/faster-rcnn_x101-32x4d_fpn_1x_ruod.py
@@ -13,3 +13,17 @@
         style='pytorch',
         init_cfg=dict(
             type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='faster-rcnn_x101-32x4d_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/faster-rcnn_x101-64x4d_fpn_1x_ruod.py b/configs/detection/ruod_dataset/faster-rcnn_x101-64x4d_fpn_1x_ruod.py
index 0e12094..f60c5af 100644
--- a/configs/detection/ruod_dataset/faster-rcnn_x101-64x4d_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/faster-rcnn_x101-64x4d_fpn_1x_ruod.py
@@ -13,3 +13,17 @@
         style='pytorch',
         init_cfg=dict(
             type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='faster-rcnn_x101-64x4d_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/fcos_r50-caffe_fpn_gn-head_1x_ruod.py b/configs/detection/ruod_dataset/fcos_r50-caffe_fpn_gn-head_1x_ruod.py
index 259f4ec..5205176 100644
--- a/configs/detection/ruod_dataset/fcos_r50-caffe_fpn_gn-head_1x_ruod.py
+++ b/configs/detection/ruod_dataset/fcos_r50-caffe_fpn_gn-head_1x_ruod.py
@@ -75,3 +75,17 @@
     optimizer=dict(lr=0.01),
     paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
     clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='fcos_r50-caffe_fpn_gn_head_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/paa_r50_fpn_1x_ruod.py b/configs/detection/ruod_dataset/paa_r50_fpn_1x_ruod.py
new file mode 100644
index 0000000..002b790
--- /dev/null
+++ b/configs/detection/ruod_dataset/paa_r50_fpn_1x_ruod.py
@@ -0,0 +1,107 @@
+_base_ = [
+    '../_base_/datasets/ruod_coco_detection.py',
+    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=10,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='paa_r50_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/retinanet_r50_fpn_1x_ruod.py b/configs/detection/ruod_dataset/retinanet_r50_fpn_1x_ruod.py
index faba249..89405fc 100644
--- a/configs/detection/ruod_dataset/retinanet_r50_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/retinanet_r50_fpn_1x_ruod.py
@@ -89,3 +89,17 @@
         milestones=[8, 11],
         gamma=0.1)
 ]
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='retinanet_r50_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/ssd300_120e_ruod.py b/configs/detection/ruod_dataset/ssd300_120e_ruod.py
index f189bf8..75cfa32 100644
--- a/configs/detection/ruod_dataset/ssd300_120e_ruod.py
+++ b/configs/detection/ruod_dataset/ssd300_120e_ruod.py
@@ -85,3 +85,17 @@
 # USER SHOULD NOT CHANGE ITS VALUES.
 # base_batch_size = (8 GPUs) x (8 samples per GPU)
 auto_scale_lr = dict(base_batch_size=64)
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='ssd300_120e_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/ssd512_120e_ruod.py b/configs/detection/ruod_dataset/ssd512_120e_ruod.py
index ae6db55..6a9590a 100644
--- a/configs/detection/ruod_dataset/ssd512_120e_ruod.py
+++ b/configs/detection/ruod_dataset/ssd512_120e_ruod.py
@@ -58,3 +58,17 @@
 # USER SHOULD NOT CHANGE ITS VALUES.
 # base_batch_size = (8 GPUs) x (8 samples per GPU)
 auto_scale_lr = dict(base_batch_size=64)
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='ssd512_120e_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+# type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/ruod_dataset/tood_r50_fpn_1x_ruod.py b/configs/detection/ruod_dataset/tood_r50_fpn_1x_ruod.py
index 0ed81d5..57b44f0 100644
--- a/configs/detection/ruod_dataset/tood_r50_fpn_1x_ruod.py
+++ b/configs/detection/ruod_dataset/tood_r50_fpn_1x_ruod.py
@@ -79,3 +79,17 @@
 # optimizer
 optim_wrapper = dict(
     optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# add WandbVisBackend
+# vis_backends = [
+#     dict(type='LocalVisBackend'),
+#     dict(type='WandbVisBackend',
+#          init_kwargs=dict(
+#             project='RUOD_detection',
+#             name='tood_r50_fpn_1x_ruod',
+#             entity='lqit',
+#             )
+#         )
+# ]
+# visualizer = dict(
+#     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
diff --git a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/uod_air/faster-rcnn_r50_fpn_1x_urpc-coco.py
similarity index 98%
rename from configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_urpc-coco.py
rename to configs/detection/uod_air/faster-rcnn_r50_fpn_1x_urpc-coco.py
index c9baaa3..2cb93f6 100644
--- a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_1x_urpc-coco.py
+++ b/configs/detection/uod_air/faster-rcnn_r50_fpn_1x_urpc-coco.py
@@ -1,5 +1,5 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
+    '../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 ]
 
diff --git a/configs/detection/duo_dataset/retinanet_r50_fpn_1x_urpc-coco.py b/configs/detection/uod_air/retinanet_r50_fpn_1x_urpc-coco.py
similarity index 97%
rename from configs/detection/duo_dataset/retinanet_r50_fpn_1x_urpc-coco.py
rename to configs/detection/uod_air/retinanet_r50_fpn_1x_urpc-coco.py
index 1113e34..eef8d7a 100644
--- a/configs/detection/duo_dataset/retinanet_r50_fpn_1x_urpc-coco.py
+++ b/configs/detection/uod_air/retinanet_r50_fpn_1x_urpc-coco.py
@@ -1,5 +1,5 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
+    '../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 ]
 
diff --git a/configs/detection/urpc2020_dataset/README.md b/configs/detection/urpc2020_dataset/README.md
new file mode 100644
index 0000000..e074880
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/README.md
@@ -0,0 +1,69 @@
+# Detecting Underwater Objects
+
+> [Detecting Underwater Objects](https://arxiv.org/abs/2106.05681)
+> [Underwater Robot Professional Contest 2020](https://www.heywhale.com/home/competition/5e535a612537a0002ca864ac/content/0)
+
+<!-- [DATASET] -->
+
+Underwater object detection for robot picking has attracted a lot of interest. However, it is still an unsolved problem due to several challenges. We take steps towards making it more realistic by addressing the following challenges. Firstly, the currently available datasets basically lack the test set annotations, causing researchers must compare their method with other SOTAs on a self-divided test set (from the training set). Training other methods lead to an increase in workload and different researchers divide different datasets, resulting there is no unified benchmark to compare the performance of different algorithms. Secondly, these datasets also have other shortcomings, e.g., too many similar images or incomplete labels. Towards these challenges we introduce a dataset, Detecting Underwater Objects (DUO), and a corresponding benchmark, based on the collection and re-annotation of all relevant datasets. DUO contains a collection of diverse underwater images with more rational annotations. The corresponding benchmark provides indicators of both efficiency and accuracy of SOTAs (under the MMDtection framework) for academic research and industrial applications, where JETSON AGX XAVIER is used to assess detector speed to simulate the robot-embedded environment.
+
+<!-- [IMAGE] -->
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/48282753/233964524-73b49b46-03c2-48ba-9786-697c9d2c081a.png" height="400"/>
+</div>
+
+**Note:** DUO contains URPC2020, the categories of both datasets are same. DUO introduced URPC2020 and other underwater object detection datasets in the paper.
+
+**TODO:**
+
+- [ ] Support DUO Dataset and release models.
+- [ ] Unify Dataset name in `LQIT`
+
+## Results and Models
+
+### URPC2020
+
+| Architecture  |  Backbone   |  Style  | Lr schd | box AP |                         Config                         |                                                                                                                                           Download                                                                                                                                           |
+| :-----------: | :---------: | :-----: | :-----: | :----: | :----------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| Faster R-CNN  |    R-50     | pytorch |   1x    |  43.5  |    [config](./faster-rcnn_r50_fpn_1x_urpc-coco.py)     |        [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_r50_fpn_1x_urpc-coco_20220226_105840-09ef8403.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_r50_fpn_1x_urpc-coco_20220226_105840.log.json)        |
+| Faster R-CNN  |    R-101    | pytorch |   1x    |  44.8  |    [config](./faster-rcnn_r101_fpn_1x_urpc-coco.py)    |       [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_r101_fpn_1x_urpc-coco_20220227_182523-de4a666c.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_r101_fpn_1x_urpc-coco_20220227_182523.log.json)       |
+| Faster R-CNN  | X-101-32x4d | pytorch |   1x    |  44.6  | [config](./faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py) | [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco_20230511_190905-7074a9f7.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco_20230511_190905.log.json) |
+| Faster R-CNN  | X-101-64x4d | pytorch |   1x    |  45.3  | [config](./faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py) | [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco_20220405_193758-5d2a37e4.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco_20220405_193758.log.json) |
+| Cascade R-CNN |    R-50     | pytorch |   1x    |  44.3  |    [config](./cascade-rcnn_r50_fpn_1x_urpc-coco.py)    |       [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/cascade-rcnn_r50_fpn_1x_urpc-coco_20220405_160342-044e6858.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/cascade-rcnn_r50_fpn_1x_urpc-coco_20220405_160342.log.json)       |
+|   RetinaNet   |    R-50     | pytorch |   1x    |  40.7  |     [config](./retinanet_r50_fpn_1x_urpc-coco.py)      |          [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/retinanet_r50_fpn_1x_urpc-coco_20220405_214951-a39f054e.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/retinanet_r50_fpn_1x_urpc-coco_20220405_214951.log.json)          |
+|     FCOS      |    R-50     |  caffe  |   1x    |  41.4  | [config](./fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py) | [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco_20220227_204555-305ab6aa.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco_20220227_204555.log.json) |
+|     ATSS      |    R-50     | pytorch |   1x    |  44.8  |        [config](./atss_r50_fpn_1x_urpc-coco.py)        |               [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/atss_r50_fpn_1x_urpc-coco_20220405_160345-cf776917.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/atss_r50_fpn_1x_urpc-coco_20220405_160345.log.json)               |
+|     TOOD      |    R-50     | pytorch |   1x    |  45.4  |        [config](./tood_r50_fpn_1x_urpc-coco.py)        |               [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/tood_r50_fpn_1x_urpc-coco_20220405_164450-1fbf815b.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/tood_r50_fpn_1x_urpc-coco_20220405_164450.log.json)               |
+|    SSD300     |    VGG16    |    -    |  120e   |  35.1  |          [config](./ssd300_120e_urpc-coco.py)          |                   [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/ssd300_120e_urpc-coco_20230426_122625-b6f0b01e.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/ssd512_120e_urpc-coco_20220405_185511.log.json)                   |
+|    SSD512     |    VGG16    |    -    |  120e   |  38.6  |          [config](./ssd300_120e_urpc-coco.py)          |                   [model](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/ssd512_120e_urpc-coco_20220405_185511-88c18764.pth) \| [log](https://github.com/BIGWangYuDong/lqit/releases/download/v0.0.1rc1/ssd512_120e_urpc-coco_20220405_185511.log.json)                   |
+
+### DUO
+
+Coming soon
+
+## Citation
+
+- If you use `URPC2020` or other `URPC` series dataset in your research, please cite it as below:
+
+  **Note:** The URL may not be valid, but this link is cited by many papers.
+
+  ```latex
+  @online{urpc,
+  title = {Underwater Robot Professional Contest},
+  url = {http://uodac.pcl.ac.cn/},
+  }
+  ```
+
+- If you use `DUO` dataset in your research, please cite it as below:
+
+  ```latex
+  @inproceedings{liu2021dataset,
+    title={A dataset and benchmark of underwater object detection for robot picking},
+    author={Liu, Chongwei and Li, Haojie and Wang, Shuchang and Zhu, Ming and Wang, Dong and Fan, Xin and Wang, Zhihui},
+    booktitle={2021 IEEE International Conference on Multimedia \& Expo Workshops (ICMEW)},
+    pages={1--6},
+    year={2021},
+    organization={IEEE}
+  }
+  ```
diff --git a/configs/detection/duo_dataset/atss_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/atss_r50_fpn_1x_urpc-coco.py
similarity index 93%
rename from configs/detection/duo_dataset/atss_r50_fpn_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/atss_r50_fpn_1x_urpc-coco.py
index 1a4d929..6626294 100644
--- a/configs/detection/duo_dataset/atss_r50_fpn_1x_urpc-coco.py
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/atss_r50_fpn_1x_urpc-coco.py
@@ -1,6 +1,6 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
-    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 ]
 
 # model settings
diff --git a/configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/cascade-rcnn_r50_fpn_1x_urpc-coco.py
similarity index 97%
rename from configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/cascade-rcnn_r50_fpn_1x_urpc-coco.py
index bb1524b..4193877 100644
--- a/configs/detection/duo_dataset/cascade-rcnn_r50_fpn_1x_urpc-coco.py
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/cascade-rcnn_r50_fpn_1x_urpc-coco.py
@@ -1,6 +1,6 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
-    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 ]
 
 # model settings
diff --git a/configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_1x_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_1x_urpc-coco.py
diff --git a/configs/detection/duo_dataset/faster-rcnn_r101_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_2x_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/faster-rcnn_r101_fpn_2x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_2x_urpc-coco.py
diff --git a/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..aff4439
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,119 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
diff --git a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_2x_urpc-coco.py
similarity index 96%
rename from configs/detection/duo_dataset/faster-rcnn_r50_fpn_2x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_2x_urpc-coco.py
index c9baaa3..aff4439 100644
--- a/configs/detection/duo_dataset/faster-rcnn_r50_fpn_2x_urpc-coco.py
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r50_fpn_2x_urpc-coco.py
@@ -1,6 +1,6 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
-    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 ]
 
 # model settings
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
diff --git a/configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
diff --git a/configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
similarity index 93%
rename from configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
index 01212bb..6a6cd1e 100644
--- a/configs/detection/duo_dataset/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
@@ -1,6 +1,6 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
-    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 ]
 
 # model settings
diff --git a/configs/detection/urpc2020_dataset/train-all_test-A/paa_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/paa_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..97d05e5
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/paa_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,93 @@
+_base_ = [
+    '../../_base_/datasets//urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
diff --git a/configs/detection/urpc2020_dataset/train-all_test-A/retinanet_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/retinanet_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..baa88cc
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/retinanet_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,91 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='RetinaNet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type='RetinaHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='PseudoSampler'),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
diff --git a/configs/detection/urpc2020_dataset/train-all_test-A/ssd300_120e_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/ssd300_120e_urpc-coco.py
new file mode 100644
index 0000000..f150e29
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/ssd300_120e_urpc-coco.py
@@ -0,0 +1,88 @@
+_base_ = [
+    '../../_base_/models/ssd300.py',
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_2x.py', '../../_base_/default_runtime.py'
+]
+# model settings
+model = dict(bbox_head=dict(num_classes=4))
+
+# dataset settings
+input_size = 300
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Expand',
+        mean={{_base_.model.data_preprocessor.mean}},
+        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
+        ratio_range=(1, 4)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=2,
+    batch_sampler=None,
+    dataset=dict(
+        _delete_=True,
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type={{_base_.dataset_type}},
+            data_root={{_base_.data_root}},
+            ann_file='annotations/train_all.json',
+            data_prefix=dict(img='train-image/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline)))
+val_dataloader = dict(batch_size=8, dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+custom_hooks = [
+    dict(type='NumClassCheckHook'),
+    dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')
+]
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)
diff --git a/configs/detection/duo_dataset/ssd512_120e_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/ssd512_120e_urpc-coco.py
similarity index 100%
rename from configs/detection/duo_dataset/ssd512_120e_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/ssd512_120e_urpc-coco.py
diff --git a/configs/detection/duo_dataset/tood_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-A/tood_r50_fpn_1x_urpc-coco.py
similarity index 93%
rename from configs/detection/duo_dataset/tood_r50_fpn_1x_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-A/tood_r50_fpn_1x_urpc-coco.py
index 8d55456..ea58839 100644
--- a/configs/detection/duo_dataset/tood_r50_fpn_1x_urpc-coco.py
+++ b/configs/detection/urpc2020_dataset/train-all_test-A/tood_r50_fpn_1x_urpc-coco.py
@@ -1,6 +1,6 @@
 _base_ = [
-    '../_base_/datasets/urpc-2020_coco_detection.py',
-    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
+    '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 ]
 
 # model settings
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/atss_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/atss_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..0e2407d
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/atss_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,72 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ATSS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/cascade-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/cascade-rcnn_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..4a8df85
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/cascade-rcnn_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,190 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='CascadeRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='CascadeRoIHead',
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+        ]),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..8ac8ab8
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_1x_urpc-coco.py
@@ -0,0 +1,7 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        depth=101,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet101')))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..54e3908
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_2x_urpc-coco.py
@@ -0,0 +1,7 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        depth=101,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet101')))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..193a9e7
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,119 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r50_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r50_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..193a9e7
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r50_fpn_2x_urpc-coco.py
@@ -0,0 +1,119 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..dbf5e42
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=32,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..db90e14
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=32,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..9b76afd
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..a183df6
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
new file mode 100644
index 0000000..f493b77
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
@@ -0,0 +1,77 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FCOS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[102.9801, 115.9465, 122.7717],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='open-mmlab://detectron/resnet50_caffe')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # testing settings
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(lr=0.01),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/paa_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/paa_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..305a4e0
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/paa_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,93 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/retinanet_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/retinanet_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..1455853
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/retinanet_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,91 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='RetinaNet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type='RetinaHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='PseudoSampler'),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
diff --git a/configs/detection/duo_dataset/ssd300_120e_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/ssd300_120e_urpc-coco.py
similarity index 89%
rename from configs/detection/duo_dataset/ssd300_120e_urpc-coco.py
rename to configs/detection/urpc2020_dataset/train-all_test-B/ssd300_120e_urpc-coco.py
index 7d2658e..38f4c90 100644
--- a/configs/detection/duo_dataset/ssd300_120e_urpc-coco.py
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/ssd300_120e_urpc-coco.py
@@ -1,7 +1,7 @@
 _base_ = [
-    '../_base_/models/ssd300.py',
-    '../_base_/datasets/urpc-2020_coco_detection.py',
-    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
+    '../../_base_/models/ssd300.py',
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_2x.py', '../../_base_/default_runtime.py'
 ]
 # model settings
 model = dict(bbox_head=dict(num_classes=4))
@@ -50,8 +50,8 @@
         dataset=dict(
             type={{_base_.dataset_type}},
             data_root={{_base_.data_root}},
-            ann_file='annotations_json/train.json',
-            data_prefix=dict(img='JPEGImages/'),
+            ann_file='annotations/train.json',
+            data_prefix=dict(img='train-image/'),
             filter_cfg=dict(filter_empty_gt=True, min_size=32),
             pipeline=train_pipeline)))
 val_dataloader = dict(batch_size=8, dataset=dict(pipeline=test_pipeline))
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/ssd512_120e_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/ssd512_120e_urpc-coco.py
new file mode 100644
index 0000000..3aa042e
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/ssd512_120e_urpc-coco.py
@@ -0,0 +1,60 @@
+_base_ = 'ssd300_120e_urpc-coco.py'
+
+# model settings
+input_size = 512
+model = dict(
+    neck=dict(
+        out_channels=(512, 1024, 512, 256, 256, 256, 256),
+        level_strides=(2, 2, 2, 2, 1),
+        level_paddings=(1, 1, 1, 1, 1),
+        last_kernel_size=4),
+    bbox_head=dict(
+        in_channels=(512, 1024, 512, 256, 256, 256, 256),
+        anchor_generator=dict(
+            type='SSDAnchorGenerator',
+            scale_major=False,
+            input_size=input_size,
+            basesize_ratio_range=(0.1, 0.9),
+            strides=[8, 16, 32, 64, 128, 256, 512],
+            ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]])))
+
+# dataset settings
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Expand',
+        mean={{_base_.model.data_preprocessor.mean}},
+        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
+        ratio_range=(1, 4)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)
diff --git a/configs/detection/urpc2020_dataset/train-all_test-B/tood_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train-all_test-B/tood_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..6f35645
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train-all_test-B/tood_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,81 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='TOOD',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='TOODHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=6,
+        feat_channels=256,
+        anchor_type='anchor_free',
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        initial_loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_cls=dict(
+            type='QualityFocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            beta=2.0,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
+    train_cfg=dict(
+        initial_epoch=4,
+        initial_assigner=dict(type='ATSSAssigner', topk=9),
+        assigner=dict(type='TaskAlignedAssigner', topk=13),
+        alpha=1,
+        beta=6,
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
diff --git a/configs/detection/urpc2020_dataset/train_validation/atss_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/atss_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..190eb71
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/atss_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,72 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='ATSS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='ATSSHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(type='ATSSAssigner', topk=9),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
diff --git a/configs/detection/urpc2020_dataset/train_validation/cascade-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/cascade-rcnn_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..2be7089
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/cascade-rcnn_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,190 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='CascadeRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='CascadeRoIHead',
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=4,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+        ]),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..8ac8ab8
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_1x_urpc-coco.py
@@ -0,0 +1,7 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        depth=101,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet101')))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..54e3908
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_2x_urpc-coco.py
@@ -0,0 +1,7 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        depth=101,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet101')))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..a28ff6f
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,119 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-xml.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-xml.py
new file mode 100644
index 0000000..17500d4
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-xml.py
@@ -0,0 +1,7 @@
+_base_ = [
+    '../../_base_/models/faster-rcnn_r50_fpn.py',
+    '../../_base_/datasets/urpc2020/urpc2020-validation_xml_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+model = dict(roi_head=dict(bbox_head=dict(num_classes=4)))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..a28ff6f
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_2x_urpc-coco.py
@@ -0,0 +1,119 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FasterRCNN',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=4,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
+    ))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..dbf5e42
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=32,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..db90e14
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=32,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..9b76afd
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
diff --git a/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
new file mode 100644
index 0000000..a183df6
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py
@@ -0,0 +1,15 @@
+_base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py'
+
+model = dict(
+    backbone=dict(
+        type='ResNeXt',
+        depth=101,
+        groups=64,
+        base_width=4,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        style='pytorch',
+        init_cfg=dict(
+            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))
diff --git a/configs/detection/urpc2020_dataset/train_validation/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
new file mode 100644
index 0000000..8f27672
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py
@@ -0,0 +1,77 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='FCOS',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[102.9801, 115.9465, 122.7717],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='open-mmlab://detectron/resnet50_caffe')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',  # use P5
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        strides=[8, 16, 32, 64, 128],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
+    # testing settings
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(lr=0.01),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
diff --git a/configs/detection/urpc2020_dataset/train_validation/paa_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/paa_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..a229916
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/paa_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,93 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=5,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
diff --git a/configs/detection/urpc2020_dataset/train_validation/retinanet_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/retinanet_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..bfc825d
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/retinanet_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,91 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='RetinaNet',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_input',
+        num_outs=5),
+    bbox_head=dict(
+        type='RetinaHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            octave_base_scale=4,
+            scales_per_octave=3,
+            ratios=[0.5, 1.0, 2.0],
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.4,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='PseudoSampler'),  # Focal loss should use PseudoSampler
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.5),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
diff --git a/configs/detection/urpc2020_dataset/train_validation/ssd300_120e_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/ssd300_120e_urpc-coco.py
new file mode 100644
index 0000000..94c1806
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/ssd300_120e_urpc-coco.py
@@ -0,0 +1,88 @@
+_base_ = [
+    '../../_base_/models/ssd300.py',
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_2x.py', '../../_base_/default_runtime.py'
+]
+# model settings
+model = dict(bbox_head=dict(num_classes=4))
+
+# dataset settings
+input_size = 300
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Expand',
+        mean={{_base_.model.data_preprocessor.mean}},
+        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
+        ratio_range=(1, 4)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=2,
+    batch_sampler=None,
+    dataset=dict(
+        _delete_=True,
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type={{_base_.dataset_type}},
+            data_root={{_base_.data_root}},
+            ann_file='annotations/train.json',
+            data_prefix=dict(img='train-image/'),
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            pipeline=train_pipeline)))
+val_dataloader = dict(batch_size=8, dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4),
+    clip_grad=dict(max_norm=35, norm_type=2))  # loss may NaN without clip_grad
+
+custom_hooks = [
+    dict(type='NumClassCheckHook'),
+    dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')
+]
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)
diff --git a/configs/detection/urpc2020_dataset/train_validation/ssd512_120e_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/ssd512_120e_urpc-coco.py
new file mode 100644
index 0000000..3aa042e
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/ssd512_120e_urpc-coco.py
@@ -0,0 +1,60 @@
+_base_ = 'ssd300_120e_urpc-coco.py'
+
+# model settings
+input_size = 512
+model = dict(
+    neck=dict(
+        out_channels=(512, 1024, 512, 256, 256, 256, 256),
+        level_strides=(2, 2, 2, 2, 1),
+        level_paddings=(1, 1, 1, 1, 1),
+        last_kernel_size=4),
+    bbox_head=dict(
+        in_channels=(512, 1024, 512, 256, 256, 256, 256),
+        anchor_generator=dict(
+            type='SSDAnchorGenerator',
+            scale_major=False,
+            input_size=input_size,
+            basesize_ratio_range=(0.1, 0.9),
+            strides=[8, 16, 32, 64, 128, 256, 512],
+            ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]])))
+
+# dataset settings
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Expand',
+        mean={{_base_.model.data_preprocessor.mean}},
+        to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}},
+        ratio_range=(1, 4)),
+    dict(
+        type='MinIoURandomCrop',
+        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
+        min_crop_size=0.3),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='RandomFlip', prob=0.5),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=(input_size, input_size), keep_ratio=False),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)
diff --git a/configs/detection/urpc2020_dataset/train_validation/tood_r50_fpn_1x_urpc-coco.py b/configs/detection/urpc2020_dataset/train_validation/tood_r50_fpn_1x_urpc-coco.py
new file mode 100644
index 0000000..66cd58f
--- /dev/null
+++ b/configs/detection/urpc2020_dataset/train_validation/tood_r50_fpn_1x_urpc-coco.py
@@ -0,0 +1,81 @@
+_base_ = [
+    '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='TOOD',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='TOODHead',
+        num_classes=4,
+        in_channels=256,
+        stacked_convs=6,
+        feat_channels=256,
+        anchor_type='anchor_free',
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        initial_loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_cls=dict(
+            type='QualityFocalLoss',
+            use_sigmoid=True,
+            activated=True,  # use probability instead of logit as input
+            beta=2.0,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
+    train_cfg=dict(
+        initial_epoch=4,
+        initial_assigner=dict(type='ATSSAssigner', topk=9),
+        assigner=dict(type='TaskAlignedAssigner', topk=13),
+        alpha=1,
+        beta=6,
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
diff --git a/configs/lark/README.md b/configs/lark/README.md
new file mode 100644
index 0000000..5ccb4ed
--- /dev/null
+++ b/configs/lark/README.md
@@ -0,0 +1,27 @@
+# Feishu robot
+
+## Config
+
+Put the webhook path of your FeiShu (Lark) robot into `lark.py`, and make the following settings:
+
+```python
+lark = 'https://open.feishu.cn/open-apis/bot/v2/hook/XXXX-XXXX-XXXX-XXXX'
+```
+
+**Note:** Pay attention to privacy!
+
+For more details about FeiShu robot, please refer to [here](https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot).
+
+## Running command
+
+If you want to use FerShu robot during training and testing, add `-l` or `--lark` in the running command.
+
+Examples:
+
+```
+# training script
+python tools/train.py ${CONFIG_FILE} -l ${Other setting}
+
+# testing script
+python tools/test.py  ${CONFIG_FILE} ${CHECKPOINT} -l ${Other setting}
+```
diff --git a/docs/en/prepare_data/urpc_2020.md b/docs/en/prepare_data/urpc_2020.md
index 32ca6df..4046bc3 100644
--- a/docs/en/prepare_data/urpc_2020.md
+++ b/docs/en/prepare_data/urpc_2020.md
@@ -11,14 +11,13 @@
 }
 ```
 
-The dataset contains 5,543 underwater images, covering four categories: holothurian, echinus, scallop, and starfish.
+The dataset contains 5,543 underwater images for training, 800 and 1,200 underwater images for testing (test-A and test-B set), covering four categories: holothurian, echinus, scallop, and starfish.
 
 ## Download URPC2020 Dataset
 
-The Underwater Robot Professional Contest (URPC) 2020 dataset can be downloaded from [here](https://drive.google.com/file/d/1PgP7gY1FkcpQ1D6XW_lPzTYCgsMhItbw/view?usp=sharing).
+The Underwater Robot Professional Contest (URPC) 2020 dataset, including training set, test-A set, and test-B set from [here](https://openi.pcl.ac.cn/OpenOrcinus_orca/URPC_opticalimage_dataset/datasets). You can also download the processed data from [here](https://drive.google.com/file/d/1PgP7gY1FkcpQ1D6XW_lPzTYCgsMhItbw/view?usp=sharing).
 
-We randomly divides the URPC2020 dataset into training and testing groups with 4,434 and 1,019 images, respectively.
-If users want to divide by their own, `tools/misc/write_txt.py` should be used to split the train and val set first.
+For validation, we randomly divides the URPC2020 training set into training and validation groups with 4,434 and 1,019 images, respectively. If users want to divide by their own, `tools/misc/write_txt.py` should be used to split the train and val set first.
 Then `tools/dataset_converters/xml_to_json.py` can use to convert xml style annotations to coco format.
 
 The data structure is as follows:
@@ -30,21 +29,48 @@ lqit
 ├── configs
 ├── data
 │   ├── URPC
-│   │   ├── ImageSets
-│   │   │   ├── train.txt
-│   │   │   ├── val.txt
-│   │   ├── ImageMetas    # get image meta information from scripts
-│   │   │   ├── train-image-metas.pkl
-│   │   │   ├── val-image-metas.pkl
-│   │   ├── annotations_xml     # pascal voc style annotations
-│   │   │   ├── 000001.xml
-│   │   │   ├── 000002.xml
-│   │   │   ├── ...
-│   │   ├── annotations_json    # coco style annotations
-│   │   │   ├── train.json
-│   │   │   ├── val.json
-│   │   ├── JPEGImages          # Raw images
-│   │   │   ├── 000001.jpg
-│   │   │   ├── 000002.jpg
-│   │   │   ├── ...
+│   │   ├── annotations_json       # coco style annotations
+│   │   │   ├── train.json         # training group from training set, with 4,434 images
+│   │   │   ├── val.json           # validation group from training set, with 1,019 images
+│   │   │   ├── train_all.json     # training set, with all 5,543 images
+│   │   │   ├── test-A.json        # testing-A set, with 800 images
+│   │   │   ├── test-B.json        # testing-B set, with 1,200 images
+│   │   │   ├── train-image        # training images
+│   │   │   │   ├── 000001.jpg
+│   │   │   │   ├── 000002.jpg
+│   │   │   │   ├── ...
+│   │   │   ├── test-A-image       # test-A images
+│   │   │   │   ├── 000001.jpg
+│   │   │   │   ├── 000002.jpg
+│   │   │   │   ├── ...
+│   │   │   ├── test-B-image       # test-B images
+│   │   │   │   ├── 000001.jpg
+│   │   │   │   ├── 000002.jpg
+│   │   │   │   ├── ...
+|   |   ├── source_data            # source data download from https://openi.pcl.ac.cn/OpenOrcinus_orca/URPC_opticalimage_dataset/datasets
+│   │   │   ├── ImageSets          # get training, vaidation, testing image name from scripts
+│   │   │   │   ├── train.txt
+│   │   │   │   ├── val.txt
+│   │   │   │   ├── train_all.txt
+│   │   │   │   ├── test-A.txt
+│   │   │   │   ├── test-B.txt
+│   │   │   ├── ImageMetas         # get image meta information from scripts
+│   │   │   │   ├── train-image-metas.pkl
+│   │   │   │   ├── val-image-metas.pkl
+│   │   │   │   ├── train_all-image-metas.pkl
+│   │   │   │   ├── test-A-image-metas.pkl
+│   │   │   │   ├── test-B-image-metas.pkl
+│   │   │   ├── train-box          # pascal voc style annotations for the training set
+│   │   │   │   ├── 000001.xml
+│   │   │   │   ├── 000002.xml
+│   │   │   │   ├── ...
+│   │   │   ├── test-A-box         # pascal voc style annotations for the test-A set
+│   │   │   │   ├── 000001.xml
+│   │   │   │   ├── 000002.xml
+│   │   │   │   ├── ...
+│   │   │   ├── test-B-box         # pascal voc style annotations for the test-B set
+│   │   │   │   ├── 000001.xml
+│   │   │   │   ├── 000002.xml
+│   │   │   │   ├── ...
+
 ```
diff --git a/docs/zh_cn/prepare_data/urpc_2020.md b/docs/zh_cn/prepare_data/urpc_2020.md
index 1a70d7a..1183eb1 100644
--- a/docs/zh_cn/prepare_data/urpc_2020.md
+++ b/docs/zh_cn/prepare_data/urpc_2020.md
@@ -11,13 +11,13 @@
 }
 ```
 
-该数据集包含 5,543 张水下图像，涵盖四类：海参 (holothurian)、海胆 (echinus)、扇贝 (scallop)和海星 (starfish)。
+该数据集包含 5,543 张用于训练的水下图像，800 和 1,200 张用于测试的（测试集 A 和测试集B0）水下图像，涵盖四类：海参 (holothurian)、海胆 (echinus)、扇贝 (scallop)和海星 (starfish)。
 
 ## 下载 URPC2020 数据集
 
-水下机器人专业竞赛 (Underwater Robot Professional Contest, URPC) 2020 数据集可从[此处](https://drive.google.com/file/d/1PgP7gY1FkcpQ1D6XW_lPzTYCgsMhItbw/view?usp=sharing)下载。
+水下机器人专业竞赛 (Underwater Robot Professional Contest, URPC) 2020 数据集，包括训练集、测试集-A 和测试集-B 可以从[此处](https://openi.pcl.ac.cn/OpenOrcinus_orca/URPC_opticalimage_dataset/datasets)下载。你也可以从[这里](https://drive.google.com/file/d/1PgP7gY1FkcpQ1D6XW_lPzTYCgsMhItbw/view?usp=sharing)下载我们处理好的数据。
 
-我们将 URPC2020 数据集随机分为训练组和测试组，分别有 4,434 和 1,019 张图像。
+我们将 URPC2020 训练数据集随机分为训练组和验证组，分别有 4,434 和 1,019 张图像。
 如果用户想自己划分，应该先使用`tools/misc/write_txt.py`来划分train和val集合。
 然后 `tools/dataset_converters/xml_to_json.py` 可以用来将 xml 样式的注释转换为 coco 格式。
 
@@ -25,26 +25,53 @@
 
 ```text
 lqit
+lqit
 ├── lqit
 ├── tools
 ├── configs
 ├── data
 │   ├── URPC
-│   │   ├── ImageSets
-│   │   │   ├── train.txt
-│   │   │   ├── val.txt
-│   │   ├── ImageMetas    # get image meta information from scripts
-│   │   │   ├── train-image-metas.pkl
-│   │   │   ├── val-image-metas.pkl
-│   │   ├── annotations_xml     # pascal voc style annotations
-│   │   │   ├── 000001.xml
-│   │   │   ├── 000002.xml
-│   │   │   ├── ...
-│   │   ├── annotations_json    # coco style annotations
-│   │   │   ├── train.json
-│   │   │   ├── val.json
-│   │   ├── JPEGImages          # Raw image
-│   │   │   ├── 000001.jpg
-│   │   │   ├── 000002.jpg
-│   │   │   ├── ...
+│   │   ├── annotations_json       # coco 风格的标注文件夹
+│   │   │   ├── train.json         # 从训练数据中划分的训练组标注文件，包括 4,434 张图片
+│   │   │   ├── val.json           # 从训练数据中划分的验证组标注文件，包括 1,019 张图片
+│   │   │   ├── train_all.json     # 训练集标注文件，包括 5,543 张图片
+│   │   │   ├── test-A.json        # 测试集 A 标注文件，包含 800 张图片
+│   │   │   ├── test-B.json        # 测试集 B 标注文件，包含 1,200 张图片
+│   │   │   ├── train-image        # 训练图片
+│   │   │   │   ├── 000001.jpg
+│   │   │   │   ├── 000002.jpg
+│   │   │   │   ├── ...
+│   │   │   ├── test-A-image       # 测试集 A 图片
+│   │   │   │   ├── 000001.jpg
+│   │   │   │   ├── 000002.jpg
+│   │   │   │   ├── ...
+│   │   │   ├── test-B-image       # 测试集 B 图片
+│   │   │   │   ├── 000001.jpg
+│   │   │   │   ├── 000002.jpg
+│   │   │   │   ├── ...
+|   |   ├── source_data            # 原始数据文件，下载地址：https://openi.pcl.ac.cn/OpenOrcinus_orca/URPC_opticalimage_dataset/datasets
+│   │   │   ├── ImageSets          # 从脚本获得的训练、验证、测试图像名文件
+│   │   │   │   ├── train.txt
+│   │   │   │   ├── val.txt
+│   │   │   │   ├── train_all.txt
+│   │   │   │   ├── test-A.txt
+│   │   │   │   ├── test-B.txt
+│   │   │   ├── ImageMetas         # 从脚本获得的图像信息文件
+│   │   │   │   ├── train-image-metas.pkl
+│   │   │   │   ├── val-image-metas.pkl
+│   │   │   │   ├── train_all-image-metas.pkl
+│   │   │   │   ├── test-A-image-metas.pkl
+│   │   │   │   ├── test-B-image-metas.pkl
+│   │   │   ├── train-box          # pascal voc 风格的训练集标注文件
+│   │   │   │   ├── 000001.xml
+│   │   │   │   ├── 000002.xml
+│   │   │   │   ├── ...
+│   │   │   ├── test-A-box         # pascal voc 风格的测试集 A 标注文件
+│   │   │   │   ├── 000001.xml
+│   │   │   │   ├── 000002.xml
+│   │   │   │   ├── ...
+│   │   │   ├── test-B-box         # pascal voc 风格的测试集 B 标注文件
+│   │   │   │   ├── 000001.xml
+│   │   │   │   ├── 000002.xml
+│   │   │   │   ├── ...
 ```
diff --git a/lqit/common/__init__.py b/lqit/common/__init__.py
index bea6524..aad5e77 100644
--- a/lqit/common/__init__.py
+++ b/lqit/common/__init__.py
@@ -1,3 +1,4 @@
 from .datasets import *  # noqa: F401,F403
+from .engine import *  # noqa: F401,F403
 from .models import *  # noqa: F401,F403
 from .structures import *  # noqa: F401,F403
diff --git a/lqit/common/engine/__init__.py b/lqit/common/engine/__init__.py
new file mode 100644
index 0000000..24906aa
--- /dev/null
+++ b/lqit/common/engine/__init__.py
@@ -0,0 +1 @@
+from .hooks import *  # noqa: F401,F403
diff --git a/lqit/common/engine/hooks/__init__.py b/lqit/common/engine/hooks/__init__.py
new file mode 100644
index 0000000..3e74c27
--- /dev/null
+++ b/lqit/common/engine/hooks/__init__.py
@@ -0,0 +1,3 @@
+from .lark_hook import LarkHook
+
+__all__ = ['LarkHook']
diff --git a/lqit/common/engine/hooks/lark_hook.py b/lqit/common/engine/hooks/lark_hook.py
new file mode 100644
index 0000000..f9a19a0
--- /dev/null
+++ b/lqit/common/engine/hooks/lark_hook.py
@@ -0,0 +1,217 @@
+# Modified from https://github.com/InternLM/opencompass/
+# Modified from https://github.com/InternLM/InternLM/
+import datetime
+import os
+import time
+from typing import Dict, Optional
+
+from mmengine.hooks import Hook
+from mmengine.hooks.hook import DATA_BATCH
+
+from lqit.common.utils.lark_manager import get_user_name, send_alert_message
+from lqit.registry import HOOKS
+
+
+def set_env_var(key, value):
+    os.environ[str(key)] = str(value)
+
+
+@HOOKS.register_module()
+class LarkHook(Hook):
+    """Hook that sends message to Lark.
+
+    Args:
+        url (str): The url of Lark webhook.
+        interval (int): The interval of sending message. Default: 1.
+    """
+
+    priority = 'BELOW_NORMAL'
+
+    def __init__(
+        self,
+        url: str,
+        cfg_file: str,
+        user_name: Optional[str] = None,
+        interval: int = 1,
+        by_epoch: bool = True,
+        silent: bool = True,
+        first_eta_iter: int = 200,
+    ):
+        self.url = url
+        self.interval = interval
+        self.by_epoch = by_epoch
+        self.cfg_file = cfg_file
+        if user_name is None:
+            user_name = get_user_name()
+            if user_name is None:
+                user_name = 'lqit'
+        self.user_name = user_name
+        self.silent = silent
+        # sent eta message after `first_eta_iter` iterations
+        self.first_eta_log = True
+        self.first_eta_iter = first_eta_iter
+        self.metrics_str = None
+
+    @staticmethod
+    def get_eta_time(runner) -> str:
+        eta = runner.message_hub.get_info('eta')
+        if eta is None:
+            return None
+        else:
+            eta_str = str(datetime.timedelta(seconds=int(eta)))
+            return eta_str
+
+    def get_metric_results(self, metrics) -> str:
+        if len(metrics) == 0:
+            metrics_str = 'Empty metrics'
+        else:
+            metrics_str = 'Results:\n'
+            for key, value in metrics.items():
+                metrics_str += f'{key}: {value}\n'
+        self.metrics_str = metrics_str
+        return metrics_str
+
+    def get_train_msg(self, runner) -> str:
+        if self.by_epoch:
+            progress = f'Finished {runner.epoch + 1} / ' \
+                       f'{runner.max_epochs} epochs'
+        else:
+            progress = f'Finished {runner.iter + 1} / ' \
+                       f'{runner.max_iters} iterations'
+        eta_str = self.get_eta_time(runner=runner)
+
+        title = 'Task Progress Report'
+        msg = f"{self.user_name}'s task\n" \
+              f'Config file: {self.cfg_file}\n' \
+              f'Training progress: {progress}\n'
+        if eta_str is not None:
+            msg += f'Estimated time of completion: {eta_str}\n'
+        return msg, title
+
+    def get_val_msg(self, runner, metrics_str) -> str:
+        if self.by_epoch:
+            # epoch based runner will add 1 before call validation
+            progress = f'Finished {runner.epoch} / ' \
+                       f'{runner.max_epochs} epochs'
+        else:
+            progress = f'Finished {runner.iter + 1} / ' \
+                       f'{runner.max_iters} iterations'
+
+        title = 'Task Progress Report'
+        msg = f"{self.user_name}'s task\n" \
+              f'Config file: {self.cfg_file}\n' \
+              f'Training progress: {progress}\n'
+        msg += metrics_str
+        return msg, title
+
+    def get_test_msg(self, runner, metrics_str) -> str:
+
+        title = 'Task Progress Report'
+        msg = f"{self.user_name}'s task\n" \
+              f'Config file: {self.cfg_file}\n'
+
+        msg += metrics_str
+        return msg, title
+
+    def get_first_eta_msg(self, runner) -> str:
+        eta_str = self.get_eta_time(runner=runner)
+        if eta_str is None:
+            return None, None
+        else:
+            title = 'Task Progress Report'
+            msg = f"{self.user_name}'s Training task\n" \
+                  f'Config file: {self.cfg_file}\n' \
+                  f'Estimated time of completion: {eta_str}\n'
+            return msg, title
+
+    def before_train(self, runner) -> None:
+        if self.silent:
+            return
+        title = 'Task Initiation Report'
+        content = f"{self.user_name}'s task has started training!\n" \
+                  f'Config file: {self.cfg_file}\n' \
+                  f'Output path: {runner.work_dir}' \
+                  f'Total epoch: {runner.max_epochs}\n' \
+                  f'Total iter: {runner.max_iters}'
+
+        send_alert_message(url=self.url, content=content, title=title)
+
+    def before_test(self, runner) -> None:
+        if self.silent:
+            return
+        # TODO: Check
+        title = 'Task Initiation Report'
+        content = f"{self.user_name}'s task has started testing!\n" \
+                  f'Config file: {self.cfg_file}\n' \
+                  f'Output path: {runner.work_dir}'
+
+        send_alert_message(url=self.url, content=content, title=title)
+
+    def after_train_epoch(self, runner):
+        if not self.by_epoch:
+            return
+        if self.silent:
+            return
+        if self.every_n_epochs(runner, self.interval):
+            msg, title = self.get_train_msg(runner)
+            if msg is not None:
+                send_alert_message(url=self.url, content=msg, title=title)
+
+    def after_val_epoch(self,
+                        runner,
+                        metrics: Optional[Dict[str, float]] = None) -> None:
+        metrics_str = self.get_metric_results(metrics)
+        if self.silent:
+            return
+
+        msg, title = self.get_val_msg(runner, metrics_str)
+        if msg is not None:
+            send_alert_message(url=self.url, content=msg, title=title)
+
+    def after_test_epoch(self,
+                         runner,
+                         metrics: Optional[Dict[str, float]] = None) -> None:
+        metrics_str = self.get_metric_results(metrics)
+        if self.silent:
+            return
+        msg, title = self.get_test_msg(runner, metrics_str)
+        if msg is not None:
+            send_alert_message(url=self.url, content=msg, title=title)
+
+    def after_train_iter(self,
+                         runner,
+                         batch_idx: int,
+                         data_batch: DATA_BATCH = None,
+                         outputs: Optional[dict] = None) -> None:
+        # set LAST_ACTIVE_TIMESTAMP in the environ, so that the monitor
+        # manager can check if the process is stuck
+        set_env_var(key='LAST_ACTIVE_TIMESTAMP', value=int(time.time()))
+
+        if self.first_eta_log:
+            if self.every_n_train_iters(runner, self.first_eta_iter):
+                msg, title = self.get_first_eta_msg(runner)
+                self.first_eta_log = False
+                if msg is not None:
+                    send_alert_message(url=self.url, content=msg, title=title)
+        if self.by_epoch:
+            return
+        if not self.silent:
+            return
+        if not self.every_n_iters(runner, self.interval):
+            msg, title = self.get_train_msg(runner)
+            if msg is not None:
+                send_alert_message(url=self.url, content=msg, title=title)
+
+    def after_val_iter(self, *args, **kwargs) -> None:
+        # set LAST_ACTIVE_TIMESTAMP in the environ, so that the monitor
+        # manager can check if the process is stuck
+        set_env_var(key='LAST_ACTIVE_TIMESTAMP', value=int(time.time()))
+
+    def after_test_iter(self, *args, **kwargs) -> None:
+        # set LAST_ACTIVE_TIMESTAMP in the environ, so that the monitor
+        # manager can check if the process is stuck
+        set_env_var(key='LAST_ACTIVE_TIMESTAMP', value=int(time.time()))
+
+    def after_run(self, runner) -> None:
+        if self.metrics_str is not None:
+            set_env_var(key='LAST_METRIC_RESULTS', value=self.metrics_str)
diff --git a/lqit/common/models/data_preprocessor/gt_pixel_preprocessor.py b/lqit/common/models/data_preprocessor/gt_pixel_preprocessor.py
index 23cbc5a..06cb228 100644
--- a/lqit/common/models/data_preprocessor/gt_pixel_preprocessor.py
+++ b/lqit/common/models/data_preprocessor/gt_pixel_preprocessor.py
@@ -11,6 +11,12 @@
 
 @MODELS.register_module()
 class GTPixelPreprocessor(ImgDataPreprocessor):
+    """Preprocess the gt pixel data. This usually used in the detector with
+    enhance head.
+
+    Note: The setting should be same as the setting in
+    `detector.data_processor`.
+    """
 
     def __init__(self,
                  mean: Sequence[Number] = None,
@@ -37,7 +43,9 @@ def __init__(self,
         self.register_buffer('outputs_std',
                              torch.tensor(std).view(batched_output_view),
                              False)
-        self.norm_input_flag = None  # If input is normalized to [0, 1]
+        # If input is not normalized to [0, 1],
+        # the input will divide by 255
+        self.norm_input_flag = None
 
     def forward(self, batch_data_samples, training=True):
         data = {}
diff --git a/lqit/common/utils/__init__.py b/lqit/common/utils/__init__.py
new file mode 100644
index 0000000..d30586f
--- /dev/null
+++ b/lqit/common/utils/__init__.py
@@ -0,0 +1,8 @@
+from .lark_manager import (MonitorManager, MonitorTracker,
+                           context_monitor_manager, get_user_name,
+                           initialize_monitor_manager, send_alert_message)
+
+__all__ = [
+    'send_alert_message', 'get_user_name', 'initialize_monitor_manager',
+    'context_monitor_manager', 'MonitorTracker', 'MonitorManager'
+]
diff --git a/lqit/common/utils/lark_manager.py b/lqit/common/utils/lark_manager.py
new file mode 100644
index 0000000..bc57499
--- /dev/null
+++ b/lqit/common/utils/lark_manager.py
@@ -0,0 +1,348 @@
+# Modified from https://github.com/InternLM/opencompass/
+# Modified from https://github.com/InternLM/InternLM/
+import json
+import os
+import signal
+import time
+import traceback
+from contextlib import contextmanager
+from threading import Thread
+from typing import Dict, List, Optional, Union
+
+import requests
+from func_timeout import FunctionTimedOut, func_set_timeout
+from mmengine.dist import master_only
+from mmengine.logging import print_log
+
+
+def get_user_name():
+    for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'):
+        user = os.environ.get(name)
+        if user:
+            return user
+    return None
+
+
+def get_rank():
+    rank = os.getenv('RANK')
+    if rank is None:
+        rank = os.getenv('SLURM_PROCID')
+    return rank
+
+
+@master_only
+def send_alert_message(url: str,
+                       content: Union[str, List[List[Dict]]],
+                       title: Optional[str] = None):
+    """Post a message to Lark.
+
+    When title is None, message must be a str.
+    otherwise msg can be in rich text format (see
+    https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/im-v1/message/create_json#45e0953e
+    for details).
+    """
+    if title is None:
+        assert isinstance(content, str)
+        msg = {'msg_type': 'text', 'content': {'text': content}}
+    else:
+        if isinstance(content, str):
+            content = [[{'tag': 'text', 'text': content}]]
+        msg = {
+            'msg_type': 'post',
+            'content': {
+                'post': {
+                    'zh_cn': {
+                        'title': title,
+                        'content': content
+                    }
+                }
+            }
+        }
+    try:
+        # avoid connection timeout
+        func_set_timeout(5)(requests.post(url, data=json.dumps(msg)))
+    except FunctionTimedOut as e:
+        print(e)
+
+
+class MonitorTracker(Thread):
+    """Track job status and alert to Feishu during job training.
+
+    Args:
+        user_name (str): The user name of the job.
+        cfg_file (str): The config file of the job.
+        url (str): The Feishu webhook address for sending alerting messages.
+        task_type (str): The type of the task, 'train' or 'test'.
+            Defaults to 'train'.
+        check_interval (int): The interval in seconds for monitoring checks.
+            Defaults to 300.
+    """
+
+    def __init__(self,
+                 user_name: str,
+                 cfg_file: str,
+                 url: str,
+                 task_type: str = 'train',
+                 check_interval: int = 300):
+        super().__init__()
+        self.user_name = user_name
+        self.cfg_file = cfg_file
+        self.url = url
+        assert isinstance(check_interval, int) and check_interval > 0
+        self.check_interval = check_interval
+
+        assert task_type in ['train', 'test']
+        if task_type == 'train':
+            self.task_type = 'Training'
+        elif task_type == 'test':
+            self.task_type = 'Testing'
+        else:
+            raise NotImplementedError
+
+        self.last_active_time = -1
+        self.last_loss_value = -1
+        self.stopped = False
+        self.start()
+
+    def run(self):
+        """start the monitor tracker."""
+
+        while not self.stopped:
+            try:
+                self._check_stuck()
+            except Exception:
+                continue
+            # time.sleep(self.check_interval)
+            for _ in range(self.check_interval):
+                time.sleep(1)
+                if self.stopped:
+                    break
+
+    def _check_stuck(self):
+        """Check training status for potential stuck condition."""
+
+        new_active_time = -1
+        # LAST_ACTIVE_TIMESTAMP will be added in `LarkHook.after_XXX_iter`
+        # using
+        # `set_env_var(key="LAST_ACTIVE_TIMESTAMP", value=int(time.time()))`
+        # to set LAST_ACTIVE_TIMESTAMP
+        if os.getenv('LAST_ACTIVE_TIMESTAMP') is not None:
+            new_active_time = os.getenv('LAST_ACTIVE_TIMESTAMP')
+        if int(new_active_time) <= int(self.last_active_time) and \
+                new_active_time != -1:
+            title = 'Task Progress Report'
+            content = f"{self.user_name}'s {self.task_type} task\n" \
+                      f'Config file: {self.cfg_file}\n' \
+                      f'Task may be in stuck status, please check it.'
+
+            # the process is not main, cannot directly use `send_alert_message`
+            # send_alert_message(
+            #     url=self.url,
+            #     content=content,
+            #     title=title)
+            msg = {
+                'msg_type': 'post',
+                'content': {
+                    'post': {
+                        'zh_cn': {
+                            'title': title,
+                            'content': [[{
+                                'tag': 'text',
+                                'text': content
+                            }]]
+                        }
+                    }
+                }
+            }
+
+            try:
+                # avoid connection timeout
+                func_set_timeout(5)(
+                    requests.post(self.url, data=json.dumps(msg)))
+            except FunctionTimedOut as e:
+                print(e)
+        self.last_active_time = new_active_time
+
+    def stop(self):
+        """Stop the monitor tracker."""
+
+        self.stopped = True
+
+
+class SingletonMeta(type):
+    """Singleton Meta."""
+
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            cls._instances[cls] = super().__call__(*args, **kwargs)
+        else:
+            assert (
+                len(args) == 0 and len(kwargs) == 0
+            ), f'{cls.__name__} is a singleton class and ' \
+               'a instance has been created.'
+        return cls._instances[cls]
+
+
+class MonitorManager(metaclass=SingletonMeta):
+    """Monitor Manager for managing monitor thread and monitoring training
+    status."""
+
+    def __init__(self) -> None:
+        self.monitor_thread = None
+        self.user_name = None
+        self.cfg_file = None
+        self.task_type = None
+        self.url = None
+
+    def monitor_exception(self) -> None:
+        """Catch and format exception information, send alert message to
+        Feishu."""
+
+        assert self.url is not None, \
+            'Please run `MonitorManager.start_monitor` first.'
+
+        filtered_trace = traceback.format_exc().split('\n')[-15:]
+        format_trace = ''
+        for line in filtered_trace:
+            format_trace += '\n' + line
+
+        # try to add error message into logger else directly print message
+        try:
+            print_log(format_trace, logger='current')
+        except Exception:
+            print(format_trace)
+        title = 'Task Error Report'
+        content = f"{self.user_name}'s {self.task_type} task\n" \
+                  f'Config file: {self.cfg_file}\n' \
+                  f'Task got exception: {format_trace}.\n' \
+                  'Please check it.'
+        send_alert_message(url=self.url, content=content, title=title)
+
+    def handle_sigterm(self):
+        """Catch SIGTERM signal, and send alert message to Feishu."""
+        assert self.url is not None, \
+            'Please run `MonitorManager.start_monitor` first.'
+
+        def sigterm_handler(sys_signal, frame):
+            print('receive frame: ', frame)
+            print('receive signal: ', sys_signal)
+            title = 'Task Report'
+            content = f"{self.user_name}'s {self.task_type} task\n" \
+                      f'Config file: {self.cfg_file}\n' \
+                      f'Process received signal {signal} and exited.'
+            send_alert_message(url=self.url, content=content, title=title)
+
+        signal.signal(signal.SIGTERM, sigterm_handler)
+
+    def start_monitor(self,
+                      user_name: str,
+                      cfg_file: str,
+                      url: str,
+                      task_type: str,
+                      monitor_interval_seconds: int = 300,
+                      ckpt_path: Optional[str] = None) -> None:
+        """Initialize and start monitor thread for checking training job status
+        and other task.
+
+        Args:
+            user_name (str): The user name of the job.
+            cfg_file (str): The config file of the job.
+            url (str): The Feishu webhook address for sending alert messages.
+            task_type (str): The type of the task, 'train' or 'test'.
+            monitor_interval_seconds (int): The time of monitor interval
+                in seconds. Defaults to 300.
+        """
+        # start a monitor thread, periodically check the training status
+        self.monitor_thread = MonitorTracker(
+            user_name=user_name,
+            cfg_file=cfg_file,
+            url=url,
+            task_type=task_type,
+            check_interval=monitor_interval_seconds,
+        )
+        # start a monitor thread, set the important information of the task
+        if task_type == 'train':
+            self.task_type = 'Training'
+        elif task_type == 'test':
+            self.task_type = 'Testing'
+        else:
+            raise NotImplementedError
+        self.user_name = user_name
+        self.cfg_file = cfg_file
+        self.url = url
+        title = 'Task Initiation Report'
+        content = f"{self.user_name}'s {self.task_type} task has started!\n" \
+                  f'Config file: {self.cfg_file}\n'
+        if ckpt_path is not None:
+            content += f'Checkpoint file: {ckpt_path}'
+        rank = get_rank()
+        if rank == '0' or rank == 0 or rank is None:
+            send_alert_message(url=url, content=content, title=title)
+
+    def stop_monitor(self) -> None:
+        """Stop the monitor and alert thread."""
+        assert self.url is not None, \
+            'Please run `MonitorManager.start_monitor` first.'
+
+        if self.monitor_thread is not None:
+            self.monitor_thread.stop()
+        title = 'Task Finish Report'
+
+        content = f"{self.user_name}'s {self.task_type} task completed!\n" \
+                  f'Config file: {self.cfg_file}\n'
+        if os.getenv('LAST_METRIC_RESULTS') is not None:
+            metric_content = os.getenv('LAST_METRIC_RESULTS')
+            content += metric_content
+
+        rank = get_rank()
+        if rank == '0' or rank == 0 or rank is None:
+            send_alert_message(url=self.url, content=content, title=title)
+
+
+def initialize_monitor_manager(cfg_file: str,
+                               url: str,
+                               task_type: str,
+                               user_name: Optional[str] = None,
+                               monitor_interval_seconds: int = 300,
+                               ckpt_path: Optional[str] = None) -> None:
+    """Initialize and start monitor thread for checking training job status and
+    other task.
+
+    Args:
+        user_name (str): The user name of the job.
+        cfg_file (str): The config file of the job.
+        url (str): The Feishu webhook address for sending alert messages.
+        task_type (str): The type of the task, 'train' or 'test'.
+        monitor_interval_seconds (int): The time of monitor interval
+            in seconds. Defaults to 300.
+    """
+    if user_name is None:
+        user_name = get_user_name()
+        if user_name is None:
+            user_name = 'lqit'
+    monitor_manager = MonitorManager()
+    monitor_manager.start_monitor(
+        user_name=user_name,
+        cfg_file=cfg_file,
+        url=url,
+        task_type=task_type,
+        monitor_interval_seconds=monitor_interval_seconds,
+        ckpt_path=ckpt_path)
+    return monitor_manager
+
+
+@contextmanager
+def context_monitor_manager(monitor_manager: Optional[MonitorManager] = None):
+    # `monitor_manager.start_monitor` should be called outside of the context
+    if monitor_manager is not None and monitor_manager.url is not None:
+        try:
+            # start monitor should be called outside of the context
+            # monitor_manager.start_monitor(job_name=job_name, url=url)
+            monitor_manager.handle_sigterm()
+            yield
+        finally:
+            monitor_manager.stop_monitor()
+    else:
+        yield
diff --git a/lqit/common/utils/process_lark_hook.py b/lqit/common/utils/process_lark_hook.py
new file mode 100644
index 0000000..9e5e9f6
--- /dev/null
+++ b/lqit/common/utils/process_lark_hook.py
@@ -0,0 +1,91 @@
+import os.path as osp
+import warnings
+
+from mmengine.config import Config
+from mmengine.runner import EpochBasedTrainLoop, IterBasedTrainLoop
+
+from ..engine.hooks.lark_hook import LarkHook
+
+
+def process_lark_hook(cfg: Config, lark_file: str) -> list:
+    """Process LarkHook in custom_hooks.
+
+    Here are three cases:
+    1. If `custom_hooks` is None, add a LarkHook.
+    2. If `custom_hooks` has LarkHook, update it.
+    3. If `custom_hooks` does not have LarkHook, add a LarkHook.
+
+    Args:
+        cfg (:obj:`Config`): Full config.
+        lark_file (str): Lark config file.
+
+    Returns:
+        list[dict]: Custom hooks with processed `LarkHook`.
+    """
+    custom_hooks = cfg.get('custom_hooks', None)
+
+    process_lark = True
+    if not osp.exists(lark_file):
+        warnings.warn(f'{lark_file} not exists, skip process lark hook.')
+        process_lark = False
+    else:
+        lark_url = Config.fromfile(lark_file).get('lark', None)
+        if lark_url is None:
+            warnings.warn(f'{lark_file} does not have `lark`, '
+                          'skip process lark hook.')
+            process_lark = False
+
+    if not process_lark:
+        return custom_hooks
+
+    train_cfg = cfg['train_cfg']
+    train_cfg_type = cfg['train_cfg']['type']
+
+    if train_cfg_type == 'EpochBasedTrainLoop' or \
+            isinstance(train_cfg_type, EpochBasedTrainLoop):
+        by_epoch = True
+        # max_epoch = train_cfg['max_epochs']
+        val_interval = train_cfg['val_interval']
+
+    elif train_cfg['type'] == 'IterBasedTrainLoop' or \
+            isinstance(train_cfg_type, IterBasedTrainLoop):
+        by_epoch = False
+        # max_iters = train_cfg['max_iters']
+        val_interval = train_cfg['val_interval']
+
+    else:
+        raise NotImplementedError
+
+    base_lark_hook = dict(
+        type='lqit.LarkHook',
+        url=lark_url,
+        cfg_file=cfg.filename,
+        user_name=None,
+        interval=val_interval,
+        by_epoch=by_epoch,
+        silent=True,
+        first_eta_iter=200,
+    )
+    if custom_hooks is None:
+        # does not set custom hook,  custom hood
+        new_custom_hooks = [base_lark_hook]
+    else:
+        assert isinstance(custom_hooks, list)
+        has_lark_hook = False
+        new_custom_hooks = []
+        for hook in custom_hooks:
+            hook_type = hook['type']
+            if hook_type == 'LarkHook' or hook_type == 'lqit.LarkHook' or \
+                    isinstance(hook_type, LarkHook):
+                has_lark_hook = True
+                if by_epoch != hook['by_epoch']:
+                    warnings.warn('LarkHook `by_epoch` is different from '
+                                  'train_cfg, this may cause error!')
+                base_lark_hook.update(hook)
+                new_custom_hooks.append(base_lark_hook)
+            else:
+                new_custom_hooks.append(hook)
+
+        if not has_lark_hook:
+            new_custom_hooks.append(base_lark_hook)
+    return new_custom_hooks
diff --git a/lqit/detection/__init__.py b/lqit/detection/__init__.py
index f64ecc3..54f7891 100644
--- a/lqit/detection/__init__.py
+++ b/lqit/detection/__init__.py
@@ -9,7 +9,7 @@
     HAS_MMDET = False
 
 mmdet_minimum_version = '3.0.0'
-mmdet_maximum_version = '3.1.0'
+mmdet_maximum_version = '3.2.0'
 if HAS_MMDET:
     mmdet_version = digit_version(mmdet.__version__)
     assert (mmdet_version >= digit_version(mmdet_minimum_version)
diff --git a/lqit/detection/datasets/__init__.py b/lqit/detection/datasets/__init__.py
index d0f56e1..7862a9d 100644
--- a/lqit/detection/datasets/__init__.py
+++ b/lqit/detection/datasets/__init__.py
@@ -1,4 +1,5 @@
 from .class_names import *  # noqa: F401,F403
+from .duo import DUODataset
 from .rtts import RTTSCocoDataset
 from .ruod import RUODDataset
 from .urpc import URPCCocoDataset, URPCXMLDataset
@@ -6,5 +7,5 @@
 
 __all__ = [
     'XMLDatasetWithMetaFile', 'URPCCocoDataset', 'URPCXMLDataset',
-    'RTTSCocoDataset', 'RUODDataset'
+    'RTTSCocoDataset', 'RUODDataset', 'DUODataset'
 ]
diff --git a/lqit/detection/datasets/duo.py b/lqit/detection/datasets/duo.py
new file mode 100644
index 0000000..e4ee00a
--- /dev/null
+++ b/lqit/detection/datasets/duo.py
@@ -0,0 +1,17 @@
+from mmdet.datasets import CocoDataset
+
+from lqit.registry import DATASETS
+
+DUO_METAINFO = {
+    'classes': ('holothurian', 'echinus', 'scallop', 'starfish'),
+    'palette': [(235, 211, 70), (106, 90, 205), (160, 32, 240), (176, 23, 31)]
+}
+
+
+@DATASETS.register_module()
+class DUODataset(CocoDataset):
+    """Detecting Underwater Objects dataset `DUO.
+
+    <https://arxiv.org/abs/2106.05681>`_
+    """
+    METAINFO = DUO_METAINFO
diff --git a/lqit/detection/datasets/urpc.py b/lqit/detection/datasets/urpc.py
index e89023f..7c1125b 100644
--- a/lqit/detection/datasets/urpc.py
+++ b/lqit/detection/datasets/urpc.py
@@ -13,7 +13,7 @@
 class URPCCocoDataset(CocoDataset):
     """Underwater Robot Professional Contest dataset `URPC.
 
-    <https://arxiv.org/abs/2106.05681>`_
+    <https://openi.pcl.ac.cn/OpenOrcinus_orca/URPC_opticalimage_dataset/datasets>`_
     """
     METAINFO = URPC_METAINFO
 
diff --git a/lqit/detection/models/detectors/detector_with_enhance_head.py b/lqit/detection/models/detectors/detector_with_enhance_head.py
new file mode 100644
index 0000000..6bcf7bc
--- /dev/null
+++ b/lqit/detection/models/detectors/detector_with_enhance_head.py
@@ -0,0 +1,320 @@
+import copy
+from typing import Optional
+
+import torch
+from mmdet.models import SingleStageDetector, TwoStageDetector
+from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig
+from torch import Tensor
+
+from lqit.common.structures import SampleList
+from lqit.edit.models import add_pixel_pred_to_datasample
+from lqit.registry import MODELS
+
+
+@MODELS.register_module()
+class SingleStageWithEnhanceHead(SingleStageDetector):
+    """Base class for two-stage detectors with enhance head.
+
+    Two-stage detectors typically consisting of a region proposal network and a
+    task-specific regression head.
+    """
+
+    def __init__(self,
+                 backbone: ConfigType,
+                 neck: OptConfigType = None,
+                 bbox_head: OptConfigType = None,
+                 enhance_head: OptConfigType = None,
+                 vis_enhance: Optional[bool] = False,
+                 train_cfg: OptConfigType = None,
+                 test_cfg: OptConfigType = None,
+                 data_preprocessor: OptConfigType = None,
+                 init_cfg: OptMultiConfig = None) -> None:
+        super().__init__(
+            backbone=backbone,
+            neck=neck,
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            data_preprocessor=data_preprocessor,
+            init_cfg=init_cfg)
+
+        if enhance_head is not None:
+            self.enhance_head = MODELS.build(enhance_head)
+        self.vis_enhance = vis_enhance
+
+    @property
+    def with_enhance_head(self) -> bool:
+        """bool: whether the detector has a RoI head"""
+        return hasattr(self, 'enhance_head') and self.enhance_head is not None
+
+    def _forward(self, batch_inputs: Tensor,
+                 batch_data_samples: SampleList) -> tuple:
+        """Network forward process. Usually includes backbone, neck and head
+        forward without any post-processing.
+
+        Args:
+            batch_inputs (Tensor): Inputs with shape (N, C, H, W).
+
+        Returns:
+            tuple: A tuple of features from ``rpn_head`` and ``roi_head``
+            forward.
+        """
+        x = self.extract_feat(batch_inputs)
+        results = self.bbox_head.forward(x)
+        if self.with_enhance_head:
+            enhance_outs = self.enhance_head.forward(x)
+            results = results + (enhance_outs, )
+        return results
+
+    def loss(self, batch_inputs: Tensor,
+             batch_data_samples: SampleList) -> dict:
+        """Calculate losses from a batch of inputs and data samples.
+
+        Args:
+            batch_inputs (Tensor): Input images of shape (N, C, H, W).
+                These should usually be mean centered and std scaled.
+            batch_data_samples (List[:obj:`DetDataSample`]): The batch
+                data samples. It usually includes information such
+                as `gt_instance` or `gt_panoptic_seg` or `gt_sem_seg`.
+
+        Returns:
+            dict: A dictionary of loss components
+        """
+        x = self.extract_feat(batch_inputs)
+
+        losses = dict()
+        if self.with_enhance_head:
+
+            enhance_loss = self.enhance_head.loss(x, batch_data_samples)
+            # avoid loss override
+            assert not set(enhance_loss.keys()) & set(losses.keys())
+            losses.update(enhance_loss)
+
+        det_losses = self.bbox_head.loss(x, batch_data_samples)
+        losses.update(det_losses)
+        return losses
+
+    def predict(self,
+                batch_inputs: Tensor,
+                batch_data_samples: SampleList,
+                rescale: bool = True) -> SampleList:
+        """Predict results from a batch of inputs and data samples with post-
+        processing.
+
+        Args:
+            batch_inputs (Tensor): Inputs with shape (N, C, H, W).
+            batch_data_samples (List[:obj:`DetDataSample`]): The Data
+                Samples. It usually includes information such as
+                `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+            rescale (bool): Whether to rescale the results.
+                Defaults to True.
+
+        Returns:
+            list[:obj:`DataSample`]: Return the detection results of the
+            input images. The returns value is DetDataSample,
+            which usually contain 'pred_instances'. And the
+            ``pred_instances`` usually contains following keys.
+
+                - scores (Tensor): Classification scores, has a shape
+                    (num_instance, )
+                - labels (Tensor): Labels of bboxes, has a shape
+                    (num_instances, ).
+                - bboxes (Tensor): Has a shape (num_instances, 4),
+                    the last dimension 4 arrange as (x1, y1, x2, y2).
+                - masks (Tensor): Has a shape (num_instances, H, W).
+        """
+        x = self.extract_feat(batch_inputs)
+        results_list = self.bbox_head.predict(
+            x, batch_data_samples, rescale=rescale)
+
+        if self.vis_enhance and self.with_enhance_head:
+            enhance_list = self.enhance_head.predict(
+                x, batch_data_samples, rescale=rescale)
+            batch_data_samples = add_pixel_pred_to_datasample(
+                data_samples=batch_data_samples, pixel_list=enhance_list)
+
+        batch_data_samples = self.add_pred_to_datasample(
+            batch_data_samples, results_list)
+        return batch_data_samples
+
+
+@MODELS.register_module()
+class TwoStageWithEnhanceHead(TwoStageDetector):
+    """Base class for two-stage detectors with enhance head.
+
+    Two-stage detectors typically consisting of a region proposal network and a
+    task-specific regression head.
+    """
+
+    def __init__(self,
+                 backbone: ConfigType,
+                 neck: OptConfigType = None,
+                 rpn_head: OptConfigType = None,
+                 roi_head: OptConfigType = None,
+                 enhance_head: OptConfigType = None,
+                 vis_enhance: Optional[bool] = False,
+                 train_cfg: OptConfigType = None,
+                 test_cfg: OptConfigType = None,
+                 data_preprocessor: OptConfigType = None,
+                 init_cfg: OptMultiConfig = None) -> None:
+        super().__init__(
+            backbone=backbone,
+            neck=neck,
+            rpn_head=rpn_head,
+            roi_head=roi_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            data_preprocessor=data_preprocessor,
+            init_cfg=init_cfg)
+
+        if enhance_head is not None:
+            self.enhance_head = MODELS.build(enhance_head)
+        self.vis_enhance = vis_enhance
+
+    @property
+    def with_enhance_head(self) -> bool:
+        """bool: whether the detector has a RoI head"""
+        return hasattr(self, 'enhance_head') and self.enhance_head is not None
+
+    def _forward(self, batch_inputs: Tensor,
+                 batch_data_samples: SampleList) -> tuple:
+        """Network forward process. Usually includes backbone, neck and head
+        forward without any post-processing.
+
+        Args:
+            batch_inputs (Tensor): Inputs with shape (N, C, H, W).
+
+        Returns:
+            tuple: A tuple of features from ``rpn_head`` and ``roi_head``
+            forward.
+        """
+        results = ()
+        x = self.extract_feat(batch_inputs)
+
+        if self.with_rpn:
+            rpn_results_list = self.rpn_head.predict(
+                x, batch_data_samples, rescale=False)
+        else:
+            assert batch_data_samples[0].get('proposals', None) is not None
+            rpn_results_list = [
+                data_sample.proposals for data_sample in batch_data_samples
+            ]
+
+        if self.with_enhance_head:
+            enhance_outs = self.enhance_head.forward(x)
+            results = results + (enhance_outs, )
+
+        roi_outs = self.roi_head.forward(x, rpn_results_list)
+        results = results + (roi_outs, )
+        return results
+
+    def loss(self, batch_inputs: Tensor,
+             batch_data_samples: SampleList) -> dict:
+        """Calculate losses from a batch of inputs and data samples.
+
+        Args:
+            batch_inputs (Tensor): Input images of shape (N, C, H, W).
+                These should usually be mean centered and std scaled.
+            batch_data_samples (List[:obj:`DetDataSample`]): The batch
+                data samples. It usually includes information such
+                as `gt_instance` or `gt_panoptic_seg` or `gt_sem_seg`.
+
+        Returns:
+            dict: A dictionary of loss components
+        """
+        x = self.extract_feat(batch_inputs)
+
+        losses = dict()
+
+        if self.with_enhance_head:
+
+            enhance_loss = self.enhance_head.loss(x, batch_data_samples)
+            # avoid loss override
+            assert not set(enhance_loss.keys()) & set(losses.keys())
+            losses.update(enhance_loss)
+
+        # RPN forward and loss
+        if self.with_rpn:
+            proposal_cfg = self.train_cfg.get('rpn_proposal',
+                                              self.test_cfg.rpn)
+            rpn_data_samples = copy.deepcopy(batch_data_samples)
+            # set cat_id of gt_labels to 0 in RPN
+            for data_sample in rpn_data_samples:
+                data_sample.gt_instances.labels = \
+                    torch.zeros_like(data_sample.gt_instances.labels)
+
+            rpn_losses, rpn_results_list = self.rpn_head.loss_and_predict(
+                x, rpn_data_samples, proposal_cfg=proposal_cfg)
+            # avoid get same name with roi_head loss
+            keys = rpn_losses.keys()
+            for key in keys:
+                if 'loss' in key and 'rpn' not in key:
+                    rpn_losses[f'rpn_{key}'] = rpn_losses.pop(key)
+            losses.update(rpn_losses)
+        else:
+            # TODO: Not support currently, should have a check at Fast R-CNN
+            assert batch_data_samples[0].get('proposals', None) is not None
+            # use pre-defined proposals in InstanceData for the second stage
+            # to extract ROI features.
+            rpn_results_list = [
+                data_sample.proposals for data_sample in batch_data_samples
+            ]
+        roi_losses = self.roi_head.loss(x, rpn_results_list,
+                                        batch_data_samples)
+        losses.update(roi_losses)
+
+        return losses
+
+    def predict(self,
+                batch_inputs: Tensor,
+                batch_data_samples: SampleList,
+                rescale: bool = True) -> SampleList:
+        """Predict results from a batch of inputs and data samples with post-
+        processing.
+
+        Args:
+            batch_inputs (Tensor): Inputs with shape (N, C, H, W).
+            batch_data_samples (List[:obj:`DetDataSample`]): The Data
+                Samples. It usually includes information such as
+                `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+            rescale (bool): Whether to rescale the results.
+                Defaults to True.
+
+        Returns:
+            list[:obj:`DataSample`]: Return the detection results of the
+            input images. The returns value is DetDataSample,
+            which usually contain 'pred_instances'. And the
+            ``pred_instances`` usually contains following keys.
+
+                - scores (Tensor): Classification scores, has a shape
+                    (num_instance, )
+                - labels (Tensor): Labels of bboxes, has a shape
+                    (num_instances, ).
+                - bboxes (Tensor): Has a shape (num_instances, 4),
+                    the last dimension 4 arrange as (x1, y1, x2, y2).
+                - masks (Tensor): Has a shape (num_instances, H, W).
+        """
+        assert self.with_bbox, 'Bbox head must be implemented.'
+        x = self.extract_feat(batch_inputs)
+
+        # If there are no pre-defined proposals, use RPN to get proposals
+        if batch_data_samples[0].get('proposals', None) is None:
+            rpn_results_list = self.rpn_head.predict(
+                x, batch_data_samples, rescale=False)
+        else:
+            rpn_results_list = [
+                data_sample.proposals for data_sample in batch_data_samples
+            ]
+
+        if self.vis_enhance and self.with_enhance_head:
+            enhance_list = self.enhance_head.predict(
+                x, batch_data_samples, rescale=rescale)
+            batch_data_samples = add_pixel_pred_to_datasample(
+                data_samples=batch_data_samples, pixel_list=enhance_list)
+
+        results_list = self.roi_head.predict(
+            x, rpn_results_list, batch_data_samples, rescale=rescale)
+
+        batch_data_samples = self.add_pred_to_datasample(
+            batch_data_samples, results_list)
+        return batch_data_samples
diff --git a/lqit/detection/models/detectors/single_stage_enhance_head.py b/lqit/detection/models/detectors/single_stage_enhance_head.py
index f118d17..a86e1e3 100644
--- a/lqit/detection/models/detectors/single_stage_enhance_head.py
+++ b/lqit/detection/models/detectors/single_stage_enhance_head.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 from mmdet.models import SingleStageDetector
 from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig
 from torch import Tensor
@@ -11,7 +9,7 @@
 
 @MODELS.register_module()
 class SingleStageWithEnhanceHead(SingleStageDetector):
-    """Base class for two-stage detectors with enhance head.
+    """Base class for single-stage detectors with enhance head.
 
     Two-stage detectors typically consisting of a region proposal network and a
     task-specific regression head.
@@ -22,7 +20,7 @@ def __init__(self,
                  neck: OptConfigType = None,
                  bbox_head: OptConfigType = None,
                  enhance_head: OptConfigType = None,
-                 vis_enhance: Optional[bool] = False,
+                 vis_enhance: bool = False,
                  train_cfg: OptConfigType = None,
                  test_cfg: OptConfigType = None,
                  data_preprocessor: OptConfigType = None,
diff --git a/lqit/edit/engine/__init__.py b/lqit/edit/engine/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index cb186f3..b42b56c 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,3 +1,4 @@
+func_timeout
 matplotlib
 numpy
 scipy
diff --git a/tools/dataset_converters/xml_to_json.py b/tools/dataset_converters/xml_to_json.py
index d3f8910..41112c8 100644
--- a/tools/dataset_converters/xml_to_json.py
+++ b/tools/dataset_converters/xml_to_json.py
@@ -13,13 +13,14 @@
 """
 import argparse
 import os.path as osp
+import sys
 import xml.etree.ElementTree as ET
 from typing import List
 
 import numpy as np
 from mmcv import imread
 from mmengine.fileio import dump, isdir, isfile, list_from_file
-from mmengine.utils import mkdir_or_exist, track_progress
+from mmengine.utils import ProgressBar, mkdir_or_exist
 
 from lqit.detection.datasets import get_classes
 
@@ -34,7 +35,7 @@ def parse_args():
         'img_path', help='The path of directory that saving images.')
     parser.add_argument('ann_file', help='Annotation file path')
     parser.add_argument('-o', '--out-dir', help='output path')
-    parser.add_argument('--img-suffix', default='jpg', help='The image suffix')
+    parser.add_argument('--img-suffix', default='png', help='The image suffix')
     args = parser.parse_args()
     return args
 
@@ -60,12 +61,19 @@ def cvt_annotations(xml_path: str, img_path: str, ann_file: str,
     img_paths = [
         f'{img_path}/{img_name}.{img_suffix}' for img_name in img_names
     ]
-    part_annotations = track_progress(
-        parse_xml,
-        list(
-            zip(xml_paths, img_paths,
-                [dataset_name for _ in range(len(xml_paths))])))
-    annotations.extend(part_annotations)
+    dataset_names = [dataset_name for _ in range(len(xml_paths))]
+
+    assert len(xml_paths) == len(img_paths) == len(dataset_names)
+
+    prog_bar = ProgressBar(len(xml_paths), 50, sys.stdout)
+
+    for xml_path, img_path, dataset_name in zip(xml_paths, img_paths,
+                                                dataset_names):
+        annotation = parse_xml(xml_path, img_path, dataset_name)
+        if annotation is not None:
+            annotations.append(annotation)
+        prog_bar.update()
+    prog_bar.file.write('\n')
 
     annotations = cvt_to_coco_json(
         annotations=annotations, dataset_name=dataset_name)
diff --git a/tools/misc/gather_results.py b/tools/misc/gather_results.py
new file mode 100644
index 0000000..9709850
--- /dev/null
+++ b/tools/misc/gather_results.py
@@ -0,0 +1,120 @@
+import argparse
+import glob
+import json
+import os
+import os.path as osp
+from collections import defaultdict
+from datetime import datetime
+
+import pandas as pd
+from rich.console import Console
+from rich.table import Table
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Gather Results')
+    parser.add_argument('root', help='saving root path of log and checkpoint')
+    parser.add_argument(
+        '--keys',
+        default=[
+            'bbox_mAP', 'bbox_mAP_50', 'bbox_mAP_75', 'bbox_mAP_s',
+            'bbox_mAP_m', 'bbox_mAP_l'
+        ],
+        nargs='+',
+        help='keys to be gathered from log file')
+    args = parser.parse_args()
+    return args
+
+
+def load_json_log(json_log):
+    # load and convert json_logs to log_dict, key is epoch, value is a sub dict
+    # keys of sub dict is different metrics, e.g. memory, bbox_mAP
+    # value of sub dict is a list of corresponding values of all iterations
+    log_dict = dict()
+    with open(json_log) as log_file:
+        epoch = 1
+        for line in log_file:
+            log = json.loads(line)
+            # skip lines only contains one key
+            if not len(log) > 1:
+                continue
+            if epoch not in log_dict:
+                log_dict[epoch] = defaultdict(list)
+            for k, v in log.items():
+                if '/' in k:
+                    log_dict[epoch][k.split('/')[-1]].append(v)
+                else:
+                    log_dict[epoch][k].append(v)
+            if 'epoch' in log.keys():
+                epoch = log['epoch']
+    return log_dict
+
+
+def main():
+    args = parse_args()
+
+    root = args.root
+    sub_dir = os.listdir(root)
+    sub_dir.sort()
+
+    keys = args.keys
+
+    results_dict = defaultdict(list)
+
+    for ckpt_dir in sub_dir:
+        ckpt_dir_path = osp.join(root, ckpt_dir)
+        if not osp.isdir(ckpt_dir_path):
+            continue
+        log_path = list(
+            sorted(glob.glob(osp.join(root, ckpt_dir, '*', 'vis_data'))))[-1]
+        log_json_path = list(glob.glob(osp.join(log_path, '*_*.json')))[-1]
+
+        log_dict = load_json_log(log_json_path)
+        max_key = max(list(log_dict.keys()))
+        max_key_dict = log_dict.get(max_key, None)
+        if max_key_dict is None:
+            print(f'Warning: Cannot get results from {log_json_path}!')
+            continue
+        results_dict['ckpt_dir'].append(ckpt_dir)
+        for key in keys:
+            result = max_key_dict.get(key)
+            if result is None:
+                result = '-'
+            else:
+                if isinstance(result, list):
+                    assert len(result) == 1
+                    result = round(result[0] * 100, 2)
+                elif isinstance(result, float):
+                    result = round(result * 100, 2)
+                else:
+                    raise TypeError
+            results_dict[key].append(result)
+
+    df = pd.DataFrame(results_dict)
+    now = datetime.now().strftime('%Y%m%d_%H%M%S')
+    save_name = f'gather_results_{now}.xlsx'
+    saving_path = osp.join(root, save_name)
+    df.to_excel(saving_path, index=False)
+
+    print(f'Results are saved to {saving_path}')
+
+    # print table
+    title = f'Results gather from {root}'
+    table = Table(title=title)
+    headers = ['ckpt_dir'] + keys
+    for header in headers:
+        table.add_column(header)
+    results_list = df.values.tolist()
+    for result in results_list:
+        new_result = []
+        for _res in result:
+            if isinstance(_res, float):
+                _res = str(_res)
+            new_result.append(_res)
+        table.add_row(*new_result)
+    console = Console()
+    console.print(table, end='')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/misc/gather_ruod_results.py b/tools/misc/gather_ruod_results.py
deleted file mode 100644
index 3217e55..0000000
--- a/tools/misc/gather_ruod_results.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import argparse
-import glob
-import json
-import os.path as osp
-from collections import defaultdict
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Gather RUOD Detection Results')
-    parser.add_argument('root', help='saving path of log and checkpoint')
-    args = parser.parse_args()
-    return args
-
-
-def load_json_log(json_log):
-    # load and convert json_logs to log_dict, key is epoch, value is a sub dict
-    # keys of sub dict is different metrics, e.g. memory, bbox_mAP
-    # value of sub dict is a list of corresponding values of all iterations
-    log_dict = dict()
-    with open(json_log) as log_file:
-        epoch = 1
-        for line in log_file:
-            log = json.loads(line)
-            # skip lines only contains one key
-            if not len(log) > 1:
-                continue
-            if epoch not in log_dict:
-                log_dict[epoch] = defaultdict(list)
-            for k, v in log.items():
-                if '/' in k:
-                    log_dict[epoch][k.split('/')[-1]].append(v)
-                else:
-                    log_dict[epoch][k].append(v)
-            if 'epoch' in log.keys():
-                epoch = log['epoch']
-    return log_dict
-
-
-def main():
-    args = parse_args()
-
-    root = args.root
-    default_ckpk_dir = [
-        'faster-rcnn_r50_fpn_1x_ruod',
-        'cascade-rcnn_r50_fpn_1x_ruod',
-        'retinanet_r50_fpn_1x_ruod',
-        'fcos_r50-caffe_fpn_gn-head_1x_ruod',
-        'atss_r50_fpn_1x_ruod',
-        'tood_r50_fpn_1x_ruod',
-        'ssd300_120e_ruod',
-    ]
-
-    map_dict = {
-        'atss_r50_fpn_1x_ruod': 'ATSS',
-        'cascade-rcnn_r50_fpn_1x_ruod': 'Cascade R-CNN',
-        'faster-rcnn_r50_fpn_1x_ruod': 'Faster R-CNN',
-        'fcos_r50-caffe_fpn_gn-head_1x_ruod': 'FCOS',
-        'retinanet_r50_fpn_1x_ruod': 'RetinaNet',
-        'ssd300_120e_ruod': 'SSD',
-        'tood_r50_fpn_1x_ruod': 'TOOD',
-    }
-
-    for ckpt_dir in default_ckpk_dir:
-        ckpt_dir_path = osp.join(root, ckpt_dir)
-        if not osp.exists(ckpt_dir_path):
-            continue
-        log_path = list(
-            sorted(glob.glob(osp.join(root, ckpt_dir, '*', 'vis_data'))))[-1]
-        log_json_path = list(glob.glob(osp.join(log_path, '*_*.json')))[-1]
-
-        log_dict = load_json_log(log_json_path)
-        max_key = max(list(log_dict.keys()))
-        bbox_AP = log_dict[max_key].get('bbox_mAP')
-        assert bbox_AP is not None and len(bbox_AP) == 1
-        bbox_mAP = bbox_AP[0]
-        name = map_dict[ckpt_dir]
-        print(name, round(bbox_mAP * 100, 1))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/tools/misc/write_txt.py b/tools/misc/write_txt.py
index 5f0f19d..eafd213 100644
--- a/tools/misc/write_txt.py
+++ b/tools/misc/write_txt.py
@@ -95,18 +95,20 @@ def main():
     total_size = len(filenames)
     assert total_size > 0, 'Total size must larger than 0'
 
-    shuffle_index = np.random.permutation(total_size)
-    filenames = [filenames[i] for i in shuffle_index]
     if args.split:
+        shuffle_index = np.random.permutation(total_size)
+        filenames = [filenames[i] for i in shuffle_index]
         assert 0 < args.proportion < 1, \
             'Proportion of the validation to the total need ' \
             f'between 0 and 1, but get {args.proportion}'
         proportion = args.proportion
         val_size = round(proportion * total_size)
         val_filename = filenames[:val_size]
+        val_filename.sort()
 
         train_size = int(total_size - val_size)
         train_filename = filenames[val_size:]
+        train_filename.sort()
         print('-' * 10)
         print(f'Collect {total_size} files, proportion is {proportion}, '
               f'split {train_size} for training and {val_size} for validation')
diff --git a/tools/test.py b/tools/test.py
index cddc22c..9b00051 100644
--- a/tools/test.py
+++ b/tools/test.py
@@ -1,10 +1,14 @@
 import argparse
 import os
 import os.path as osp
+import warnings
 
 from mmengine.config import Config, DictAction
 from mmengine.runner import Runner
 
+from lqit.common.utils.lark_manager import (context_monitor_manager,
+                                            initialize_monitor_manager)
+from lqit.common.utils.process_lark_hook import process_lark_hook
 from lqit.registry import RUNNERS
 from lqit.utils import setup_cache_size_limit_of_dynamo
 
@@ -42,6 +46,17 @@ def parse_args():
         choices=['none', 'pytorch', 'slurm', 'mpi'],
         default='none',
         help='job launcher')
+    parser.add_argument(
+        '-l',
+        '--lark',
+        help='Report the running status to lark bot',
+        action='store_true',
+        default=False)
+    parser.add_argument(
+        '--lark-file',
+        default='configs/lark/lark.py',
+        type=str,
+        help='lark bot config file path')
     # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
     # will pass the `--local-rank` parameter to `tools/train.py` instead
     # of `--local_rank`.
@@ -72,9 +87,7 @@ def trigger_visualization_hook(cfg, args):
     return cfg
 
 
-def main():
-    args = parse_args()
-
+def main(args):
     # Reduce the number of repeated compilations and improve
     # testing speed.
     setup_cache_size_limit_of_dynamo()
@@ -99,6 +112,10 @@ def main():
     if args.show or args.show_dir:
         cfg = trigger_visualization_hook(cfg, args)
 
+    if args.lark:
+        custom_hooks = process_lark_hook(cfg=cfg, lark_file=args.lark_file)
+        cfg.custom_hooks = custom_hooks
+
     # build the runner from config
     if 'runner_type' not in cfg:
         # build the default runner
@@ -113,4 +130,38 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    args = parse_args()
+
+    monitor_manager = None
+
+    if args.lark:
+        lark_file = args.lark_file
+        if not osp.exists(lark_file):
+            warnings.warn(f'{lark_file} not exists, skip.')
+            lark_url = None
+        else:
+            lark = Config.fromfile(lark_file)
+            lark_url = lark.get('lark', None)
+            if lark_url is None:
+                warnings.warn(f'{lark_file} does not have `lark`, skip.')
+
+            monitor_interval_seconds = lark.get('monitor_interval_seconds',
+                                                None)
+            if monitor_interval_seconds is None:
+                monitor_interval_seconds = 300
+
+            user_name = lark.get('user_name', None)
+
+        monitor_manager = initialize_monitor_manager(
+            cfg_file=args.config,
+            url=lark_url,
+            task_type='test',
+            user_name=user_name,
+            monitor_interval_seconds=monitor_interval_seconds,
+            ckpt_path=args.checkpoint)
+    with context_monitor_manager(monitor_manager):
+        try:
+            main(args)
+        except Exception:
+            if monitor_manager is not None:
+                monitor_manager.monitor_exception()
diff --git a/tools/train.py b/tools/train.py
index 380682f..c8bc541 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -2,12 +2,16 @@
 import logging
 import os
 import os.path as osp
+import warnings
 
 from mmengine.config import Config, DictAction
 from mmengine.logging import print_log
 from mmengine.registry import RUNNERS
 from mmengine.runner import Runner
 
+from lqit.common.utils.lark_manager import (context_monitor_manager,
+                                            initialize_monitor_manager)
+from lqit.common.utils.process_lark_hook import process_lark_hook
 from lqit.utils import print_colored_log, setup_cache_size_limit_of_dynamo
 
 
@@ -47,6 +51,17 @@ def parse_args():
         choices=['none', 'pytorch', 'slurm', 'mpi'],
         default='none',
         help='job launcher')
+    parser.add_argument(
+        '-l',
+        '--lark',
+        help='Report the running status to lark bot',
+        action='store_true',
+        default=False)
+    parser.add_argument(
+        '--lark-file',
+        default='configs/lark/lark.py',
+        type=str,
+        help='lark bot config file path')
     # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
     # will pass the `--local-rank` parameter to `tools/train.py` instead
     # of `--local_rank`.
@@ -58,9 +73,7 @@ def parse_args():
     return args
 
 
-def main():
-    args = parse_args()
-
+def main(args):
     # Reduce the number of repeated compilations and improve
     # training speed.
     setup_cache_size_limit_of_dynamo()
@@ -115,6 +128,10 @@ def main():
         cfg.resume = True
         cfg.load_from = args.resume
 
+    if args.lark:
+        custom_hooks = process_lark_hook(cfg=cfg, lark_file=args.lark_file)
+        cfg.custom_hooks = custom_hooks
+
     # build the runner from config
     if 'runner_type' not in cfg:
         # build the default runner
@@ -135,4 +152,37 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    args = parse_args()
+
+    monitor_manager = None
+
+    if args.lark:
+        lark_file = args.lark_file
+        if not osp.exists(lark_file):
+            warnings.warn(f'{lark_file} not exists, skip.')
+            lark_url = None
+        else:
+            lark = Config.fromfile(lark_file)
+            lark_url = lark.get('lark', None)
+            if lark_url is None:
+                warnings.warn(f'{lark_file} does not have `lark`, skip.')
+
+            monitor_interval_seconds = lark.get('monitor_interval_seconds',
+                                                None)
+            if monitor_interval_seconds is None:
+                monitor_interval_seconds = 300
+
+            user_name = lark.get('user_name', None)
+
+        monitor_manager = initialize_monitor_manager(
+            cfg_file=args.config,
+            url=lark_url,
+            task_type='train',
+            user_name=user_name,
+            monitor_interval_seconds=monitor_interval_seconds)
+    with context_monitor_manager(monitor_manager):
+        try:
+            main(args)
+        except Exception:
+            if monitor_manager is not None:
+                monitor_manager.monitor_exception()