From 9277fd050762ba8da1401d524295b5ede3b634a9 Mon Sep 17 00:00:00 2001
From: one-lithe-rune <skapusniak@lithe-runes.com>
Date: Fri, 4 Aug 2023 21:29:24 +0100
Subject: [PATCH] gpt_langchain.py fixes for pydantic

* change 'stream' instance variables to 'stream_output' and aliased them
back to 'stream' to clear clash with Pydantic BaseModel.
---
 apps/language_models/langchain/gpt_langchain.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/apps/language_models/langchain/gpt_langchain.py b/apps/language_models/langchain/gpt_langchain.py
index a21529cadc..ada465de6a 100644
--- a/apps/language_models/langchain/gpt_langchain.py
+++ b/apps/language_models/langchain/gpt_langchain.py
@@ -436,7 +436,7 @@ class GradioInference(LLM):
     chat_client: bool = False
 
     return_full_text: bool = True
-    stream: bool = False
+    stream_output: bool = Field(False, alias="stream")
     sanitize_bot_response: bool = False
 
     prompter: Any = None
@@ -481,7 +481,7 @@ def _call(
         # so server should get prompt_type or '', not plain
         # This is good, so gradio server can also handle stopping.py conditions
         # this is different than TGI server that uses prompter to inject prompt_type prompting
-        stream_output = self.stream
+        stream_output = self.stream_output
         gr_client = self.client
         client_langchain_mode = "Disabled"
         client_langchain_action = LangChainAction.QUERY.value
@@ -596,7 +596,7 @@ class H2OHuggingFaceTextGenInference(HuggingFaceTextGenInference):
     inference_server_url: str = ""
     timeout: int = 300
     headers: dict = None
-    stream: bool = False
+    stream_output: bool = Field(False, alias="stream")
     sanitize_bot_response: bool = False
     prompter: Any = None
     tokenizer: Any = None
@@ -663,7 +663,7 @@ def _call(
         # lower bound because client is re-used if multi-threading
         self.client.timeout = max(300, self.timeout)
 
-        if not self.stream:
+        if not self.stream_output:
             res = self.client.generate(
                 prompt,
                 **gen_server_kwargs,
@@ -852,7 +852,7 @@ def get_llm(
                 top_p=top_p,
                 # typical_p=top_p,
                 callbacks=callbacks if stream_output else None,
-                stream=stream_output,
+                stream_output=stream_output,
                 prompter=prompter,
                 tokenizer=tokenizer,
                 client=hf_client,