diff --git a/chatlearn/models/vllm_module_v2.py b/chatlearn/models/vllm_module_v2.py
index 4c6ccea..4e36cb1 100644
--- a/chatlearn/models/vllm_module_v2.py
+++ b/chatlearn/models/vllm_module_v2.py
@@ -48,6 +48,7 @@ def __init__(self, *args, **kwargs):
         if 'worker_module_name' in kwargs and 'worker_class_name' in kwargs:
             RayWorkerWrapper.__init__(self, **kwargs) # pylint: disable=non-parent-init-called
         os.environ['VLLM_HOST_IP'] = self.get_address()
+
         self.tokenizer = None
         self._model = None
         self.set_vllm_pp_layer_partition()
diff --git a/chatlearn/synchronizer/parameter_sync.py b/chatlearn/synchronizer/parameter_sync.py
index 135a560..6daa5a8 100644
--- a/chatlearn/synchronizer/parameter_sync.py
+++ b/chatlearn/synchronizer/parameter_sync.py
@@ -1185,7 +1185,7 @@ def setup_rank_mapping(self):
                 and get_args().runtime_args.routed_expert_regrouping_comm_type == ROUTED_EXPERT_REGROUPING_COMM_TYPE.ALLTOALL
             ):
                 raise NotImplementedError(
-                    "All-to-all routed expert weight is only supported when src TP size * src EP size = dst TP size. "
+                    "all-to-all routed expert weight is only supported when src TP size * src EP size = dst TP size. "
                     "Please consider setting `routed_expert_regrouping_comm_type` to allgather or adjusting the model's parallel size."
                 )