diff --git a/python/flexflow/serve/serve.py b/python/flexflow/serve/serve.py
index 0b8a78e94e..42cee2aa80 100644
--- a/python/flexflow/serve/serve.py
+++ b/python/flexflow/serve/serve.py
@@ -31,11 +31,48 @@
 from peft import PeftModel, PeftConfig
 from huggingface_hub import HfApi, HfFolder, Repository
 import torch, shutil, hashlib, json, gc, os
+from huggingface_hub import HfApi, HfFolder, Repository
+import torch, shutil, hashlib, json, gc, os
 from typing import Union, List
 import tempfile
 
 
 
+class GenerationConfig:
+    """A class to store the sampling configs."""
+
+    def __init__(
+        self,
+        do_sample: bool = False,
+        temperature: float = 0.9,
+        topp: float = 0.8,
+        topk: int = 1,
+    ):
+        """Initialize the sampling configs
+
+        :param do_sample: Whether to perform sampling, or use greedy decoding, defaults to False
+        :type do_sample: bool, optional
+        :param temperature: The temperature setting, defaults to 0.9
+        :type temperature: float, optional
+        :param topp: The top probabilities (top-p) setting, defaults to 0.8
+        :type topp: float, optional
+        :param topk: The top-k setting, defaults to 1
+        :type topk: int, optional
+        """
+        self.do_sample = do_sample
+        self.temperature = temperature
+        self.topp = topp
+        self.topk = topk
+
+
+class GenerationResult:
+    """A class to store the output of a generation request."""
+
+    def __init__(self, text: str = None, tokens: list = None):
+        self.output_text = text
+        self.output_tokens = tokens
+
+
 class _SupportedModels:
     def __init__(
         self,
@@ -816,6 +853,47 @@ def download_hf_weights_if_needed(self):
         else:
             print(f"Loading '{self.peft_model_id}' model weights from the cache...")
 
+    def upload_hf_model(self, new_model_id: str, model_path:str, private: bool = False):
+        """
+        Uploads the PEFT model to the Hugging Face Hub, with reverse conversion of weights.
+        
+        :param new_model_id: The new model ID for the Hugging Face Hub.
+        :param model_path: The path where the FlexFlow weights are stored.
+        :param private: Whether to upload the model as a private model.
+        """
+        print(f"Preparing model for upload to Hugging Face Hub: {new_model_id}")
+        
+        # Initialize a new Hugging Face model instance
+        hf_model = AutoModelForCausalLM.from_config(self.hf_config)
+        weights_path = self.weights_path
+        
+        # Load FlexFlow weights into the Hugging Face model instance
+        try:
+            self.model_class.load_weights_into_hf_model(hf_model, weights_path)
+        except Exception as e:
+            print(f"Error loading weights into model: {e}")
+            return
+        
+        # Save the model with converted weights to a temporary directory
+        temp_dir = tempfile.mkdtemp()
+        hf_model.save_pretrained(temp_dir)
+        
+        # Ensure Hugging Face CLI is logged in
+        if not HfFolder.get_token():
+            print("Hugging Face token not found. Please login using `huggingface-cli login`.")
+            return
+        
+        # Upload the model
+        api = HfApi()
+        print(f"Uploading processed model to Hugging Face Hub: {new_model_id}")
+        api.create_repo(repo_id=new_model_id, private=private, exist_ok=True)
+        api.upload_folder(folder_path=temp_dir, repo_id=new_model_id)
+        
+        # Cleanup temporary directory
+        shutil.rmtree(temp_dir)
+        
+        print("Upload completed successfully.")
+
     def process_and_upload_hf_model(self, model_id: str, private: bool = False):
         """
         Processes the PEFT model and uploads it to the Hugging Face Hub.