diff --git a/apps/language_models/langchain/gpt_langchain.py b/apps/language_models/langchain/gpt_langchain.py
index a21529cadc..ada465de6a 100644
--- a/apps/language_models/langchain/gpt_langchain.py
+++ b/apps/language_models/langchain/gpt_langchain.py
@@ -436,7 +436,7 @@ class GradioInference(LLM):
     chat_client: bool = False
 
     return_full_text: bool = True
-    stream: bool = False
+    stream_output: bool = Field(False, alias="stream")
     sanitize_bot_response: bool = False
 
     prompter: Any = None
@@ -481,7 +481,7 @@ def _call(
         # so server should get prompt_type or '', not plain
         # This is good, so gradio server can also handle stopping.py conditions
         # this is different than TGI server that uses prompter to inject prompt_type prompting
-        stream_output = self.stream
+        stream_output = self.stream_output
         gr_client = self.client
         client_langchain_mode = "Disabled"
         client_langchain_action = LangChainAction.QUERY.value
@@ -596,7 +596,7 @@ class H2OHuggingFaceTextGenInference(HuggingFaceTextGenInference):
     inference_server_url: str = ""
     timeout: int = 300
     headers: dict = None
-    stream: bool = False
+    stream_output: bool = Field(False, alias="stream")
     sanitize_bot_response: bool = False
     prompter: Any = None
     tokenizer: Any = None
@@ -663,7 +663,7 @@ def _call(
         # lower bound because client is re-used if multi-threading
         self.client.timeout = max(300, self.timeout)
 
-        if not self.stream:
+        if not self.stream_output:
             res = self.client.generate(
                 prompt,
                 **gen_server_kwargs,
@@ -852,7 +852,7 @@ def get_llm(
                 top_p=top_p,
                 # typical_p=top_p,
                 callbacks=callbacks if stream_output else None,
-                stream=stream_output,
+                stream_output=stream_output,
                 prompter=prompter,
                 tokenizer=tokenizer,
                 client=hf_client,