Merge pull request #399 from openchatai/fix/socrates_is_mortal

Adding extra llm call to create a standalone consolidated prompt
openchatai · Dec 15, 2023 · a49d602 · a49d602
2 parents a45a83e + 6b13f79
commit a49d602
Show file tree

Hide file tree

Showing 13 changed files with 97 additions and 26 deletions.
diff --git a/llm-server/app.py b/llm-server/app.py
@@ -16,6 +16,7 @@
 from shared.models.opencopilot_db import create_database_schema
 from flask import jsonify
 from utils.db import Database
+
 # from routes.uploads.celery_service import celery
 db_instance = Database()
 mongo = db_instance.get_db()
@@ -26,7 +27,9 @@
 
 app = Flask(__name__)
 app.url_map.strict_slashes = False
-app.register_blueprint(workflow, url_prefix="/backend/flows")  # todo delete this one once the new flows are ready
+app.register_blueprint(
+    workflow, url_prefix="/backend/flows"
+)  # todo delete this one once the new flows are ready
 app.register_blueprint(flow, url_prefix="/backend/flows-new")
 app.register_blueprint(_swagger, url_prefix="/backend/swagger_api")
 app.register_blueprint(chat_workflow, url_prefix="/backend/chat")
@@ -38,13 +41,16 @@
 
 app.config.from_object(Config)
 
-@app.route('/healthcheck')
+
+@app.route("/healthcheck")
 def health_check():
     info = mongo.client
     # c_info = celery.connection()
-    
+
     # print(c_info)
-    return jsonify(status='OK', servers={"mongo": info.options.pool_options.max_pool_size})
+    return jsonify(
+        status="OK", servers={"mongo": info.options.pool_options.max_pool_size}
+    )
 
 
 init_qdrant_collections()

diff --git a/llm-server/models/repository/flow_repo.py b/llm-server/models/repository/flow_repo.py
@@ -1,6 +1,6 @@
 from typing import Optional, Type
 
-from opencopilot_db import engine
+from shared.models.opencopilot_db import engine
 from sqlalchemy.orm import sessionmaker
 
 from shared.models.opencopilot_db.flow import Flow

diff --git a/llm-server/presenters/flow_presenters.py b/llm-server/presenters/flow_presenters.py
@@ -1,4 +1,4 @@
-from opencopilot_db import engine
+from shared.models.opencopilot_db import engine
 from sqlalchemy.orm import sessionmaker
 
 from shared.models.opencopilot_db.flow import Flow

diff --git a/llm-server/prompts/consolidated_prompt.py b/llm-server/prompts/consolidated_prompt.py
@@ -0,0 +1,51 @@
+from typing import List, cast
+from langchain.schema import BaseMessage, AIMessage, HumanMessage, SystemMessage
+from utils.get_chat_model import get_chat_model
+
+
+def get_last_4(arr):
+    if arr is None or len(arr) == 0:
+        return []
+    if len(arr) <= 4:
+        return arr[-len(arr):]
+    return arr[-4:]
+
+async def get_consolidate_question(
+    conversation_history: List[BaseMessage], user_input: str
+) -> str:
+    if len(conversation_history) == 0:
+        return user_input
+
+    conversation_str = ""
+    for message in get_last_4(conversation_history):
+        if message.type == "ai":
+            conversation_str += f"Assistant: {message.content} \n"
+        if message.type == "Human":
+            conversation_str += f"Human: {message.content} \n"
+
+    messages: List[BaseMessage] = []
+    messages.append(SystemMessage(content="You are a chat inspector, everytime you look at a new line of chat, you are able to piece references in the chat with information you looked at earlier, and output them."))
+    messages.append(
+        HumanMessage(
+            content="""Given a conversation history, replace occurrences of references like "it," "they," etc. in the latest input with their actual values. Remember, you are not supposed to answer the user question, merely enhance the latest user input
+
+Conversation history:
+---
+User: Can you recommend a good restaurant nearby?
+Assistant: Sure, how about trying that Italian place on Main Street? It has great pasta.
+
+User input: What do they serve?
+---
+
+Output: "What does the Italian place on Main Street serve?"
+"""
+        )
+    )
+
+    messages.append(HumanMessage(content=f"Here's the conversation history: ```{conversation_history}```"))
+    messages.append(HumanMessage(content=f"{user_input}"))
+
+    chat = get_chat_model()
+    result = await chat.ainvoke(messages)
+
+    return cast(str, result.content)
diff --git a/llm-server/requirements.txt b/llm-server/requirements.txt
@@ -86,8 +86,6 @@ onnxruntime==1.16.1
 openai==0.28.1
 openapi-schema-validator==0.6.2
 openapi-spec-validator==0.7.1
-opencopilot-db==6.0.0rc1
-opencopilot-utils==2.1.2
 opentelemetry-api==1.21.0
 opentelemetry-exporter-otlp-proto-common==1.21.0
 opentelemetry-exporter-otlp-proto-grpc==1.21.0
@@ -167,4 +165,4 @@ Werkzeug==3.0.1
 wrapt==1.16.0
 wsproto==1.2.0
 yarl==1.9.2
-zipp==3.17.0
+zipp==3.17.0
diff --git a/llm-server/routes/prompt/prompt_controller.py b/llm-server/routes/prompt/prompt_controller.py
@@ -66,7 +66,6 @@ def delete_prompt(id: str) -> Response:
 
     return Response(status=204)
 
-
 def get_validated_prompt_data(request) -> Optional[Prompt]:
     try:
         data = Prompt(**request.get_json())

diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py
@@ -4,6 +4,7 @@
 from langchain.schema import BaseMessage
 from custom_types.api_operation import ApiOperation_vs
 from models.repository.chat_history_repo import get_chat_message_as_llm_conversation
+from prompts.consolidated_prompt import get_consolidate_question
 from routes.workflow.typings.response_dict import ResponseDict
 from routes.workflow.typings.run_workflow_input import WorkflowData
 from routes.workflow.utils import (
@@ -97,16 +98,24 @@ async def handle_request(
     apis: List[ApiOperation_vs] = []
     flows: List[WorkflowFlowType] = []
     prev_conversations: List[BaseMessage] = []
+    prev_conversations = await get_chat_message_as_llm_conversation(session_id)
+    consolidated_question = await get_consolidate_question(
+        conversation_history=prev_conversations, user_input=text
+    )
+    logger.info(
+        "Comparing consolidated prompt with user input",
+        consolidated_question=consolidated_question,
+        text=text,
+    )
     try:
         tasks = [
-            get_relevant_docs(text, bot_id),
-            get_relevant_apis_summaries(text, bot_id),
-            get_relevant_flows(text, bot_id),
-            get_chat_message_as_llm_conversation(session_id),
+            get_relevant_docs(consolidated_question, bot_id),
+            get_relevant_apis_summaries(consolidated_question, bot_id),
+            get_relevant_flows(consolidated_question, bot_id),
         ]
 
         results = await asyncio.gather(*tasks)
-        context, apis, flows, prev_conversations = results
+        context, apis, flows = results
         # also provide a list of workflows here itself, the llm should be able to figure out if a workflow needs to be run
         step = process_conversation_step(
             user_requirement=text,

diff --git a/llm-server/routes/workflow/extractors/extract_body.py b/llm-server/routes/workflow/extractors/extract_body.py
@@ -41,10 +41,7 @@ async def gen_body_from_schema(
         HumanMessage(content="prev api responses: {}".format(prev_api_response)),
         HumanMessage(content="current_state: {}".format(current_state)),
         HumanMessage(
-            content="If the user is asking to generate values for some fields, likes product descriptions, jokes etc add them."
-        ),
-        HumanMessage(
-            content="Given the provided information, generate the appropriate minified JSON payload to use as body for the API request. If a user doesn't provide a required parameter, use sensible defaults for required params, and leave optional params."
+            content="Generate the compact JSON payload for the API request based on the provided information, without adding commentary. If a user fails to provide a necessary parameter, default values for required parameters will be used, while optional parameters will be left unchanged."
         ),
     ]
 

diff --git a/llm-server/routes/workflow/extractors/extract_param.py b/llm-server/routes/workflow/extractors/extract_param.py
@@ -27,13 +27,10 @@ async def gen_params_from_schema(
         HumanMessage(content="prev api responses: {}.".format(prev_resp)),
         HumanMessage(content="User's requirement: {}.".format(text)),
         HumanMessage(content="Current state: {}.".format(current_state)),
-        HumanMessage(
-            content="If the user is asking to generate values for some fields, likes product descriptions, jokes etc add them."
-        ),
         HumanMessage(
             content="Based on the information provided, construct a valid parameter object to be used with python requests library. In cases where user input doesnot contain information for a query, DO NOT add that specific query parameter to the output. If a user doesn't provide a required parameter, use sensible defaults for required params, and leave optional params."
         ),
-        HumanMessage(content="Your output must be a valid json"),
+        HumanMessage(content="Your output must be a valid json, without any commentary"),
     ]
     result = chat(messages)
     logger.info("[OpenCopilot] LLM Body Response: {}".format(result.content))

diff --git a/llm-server/routes/workflow/utils/process_conversation_step.py b/llm-server/routes/workflow/utils/process_conversation_step.py
@@ -87,7 +87,7 @@ def process_conversation_step(
     "bot_message": "your response based on the instructions provided at the beginning, this could also be clarification if the information provided by the user is not complete / accurate",  
 }
 
-Don't add operation ids if you can reply by merely looking in the conversation history.
+Don't add operation ids if you can reply by merely looking in the conversation history, also don't add any commentary.
 """
         )
     )

diff --git a/llm-server/shared/models/opencopilot_db/flow.py b/llm-server/shared/models/opencopilot_db/flow.py
@@ -1,7 +1,7 @@
 import datetime
 import uuid
 
-from opencopilot_db.database_setup import Base, engine
+from shared.models.opencopilot_db.database_setup import Base, engine
 from sqlalchemy import Column, String, DateTime, JSON, Text
 
 

diff --git a/llm-server/shared/models/opencopilot_db/flow_variables.py b/llm-server/shared/models/opencopilot_db/flow_variables.py
@@ -1,7 +1,7 @@
 import datetime
 import uuid
 
-from opencopilot_db.database_setup import Base, engine
+from shared.models.opencopilot_db.database_setup import Base, engine
 from sqlalchemy import Column, String, DateTime
 
 

diff --git a/llm-server/utils/get_chat_model.py b/llm-server/utils/get_chat_model.py
@@ -12,6 +12,19 @@
 model_name = os.getenv(llm_consts.model_env_var, CHAT_MODELS.gpt_3_5_turbo_16k)
 
 
+# To allow support for json responses, pass the modelKwargs
+"""
+const chat = new ChatOpenAI({
+   modelName: "gpt-4-1106-preview",
+   modelKwargs: {
+     "response_format": {
+       type: "json_object"
+     }
+   }
+});
+"""
+
+
 @lru_cache(maxsize=1)
 def get_chat_model() -> BaseChatModel:
     if model_name == CHAT_MODELS.gpt_3_5_turbo:
@@ -25,6 +38,7 @@ def get_chat_model() -> BaseChatModel:
     elif model_name == "claude":
         model = ChatAnthropic(
             anthropic_api_key=os.getenv("CLAUDE_API_KEY"),
+            model_name="claude-instant-1.2"
         )
     elif model_name == "openchat":
         model = ChatOllama(base_url=f"{localip}:11434", model="openchat", temperature=0)
-Original file line number
+Diff line change
@@ Expand Up / @@ -66,7 +66,6 @@ def delete_prompt(id: str) -> Response: @@
         return Response(status=204)
     def get_validated_prompt_data(request) -> Optional[Prompt]:
         try:
             data = Prompt(**request.get_json())
@@ Expand Down @@