diff --git a/recipes/audio/audio_to_text/app/whisper_client.py b/recipes/audio/audio_to_text/app/whisper_client.py index 93653304..6b1982ce 100644 --- a/recipes/audio/audio_to_text/app/whisper_client.py +++ b/recipes/audio/audio_to_text/app/whisper_client.py @@ -6,7 +6,8 @@ st.set_page_config(page_title="Whisper Speech Recognition", page_icon=":studio_microphone:") st.title(":studio_microphone: Speech Recognition") st.markdown("Upload an audio file you wish to have translated") -endpoint = os.getenv("MODEL_ENDPOINT", default="http://0.0.0.0:8001/inference") +endpoint = os.getenv("MODEL_ENDPOINT", default="http://0.0.0.0:8001") +endpoint = f"{endpoint}/inference" audio = st.file_uploader("", type=["wav"], accept_multiple_files=False) # read audio file if audio: diff --git a/recipes/audio/audio_to_text/quadlet/audio-text.yaml b/recipes/audio/audio_to_text/quadlet/audio-text.yaml index 2307c478..a254fe83 100644 --- a/recipes/audio/audio_to_text/quadlet/audio-text.yaml +++ b/recipes/audio/audio_to_text/quadlet/audio-text.yaml @@ -14,8 +14,8 @@ spec: mountPath: /shared containers: - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/inference + - name: MODEL_ENDPOINT + value: http://0.0.0.0:8001 image: quay.io/redhat-et/locallm-audio-to-text:latest name: audio-to-text ports: diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common index d8b0175e..6800f651 100644 --- a/recipes/common/Makefile.common +++ b/recipes/common/Makefile.common @@ -153,7 +153,7 @@ quadlet: .PHONY: run run: - podman run -it -p $(PORT):$(PORT) -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 ${APP_IMAGE} + podman run -it -p $(PORT):$(PORT) -e MODEL_ENDPOINT=http://10.88.0.1:8001 ${APP_IMAGE} .PHONY: clean clean: diff --git a/recipes/computer_vision/object_detection/client/object_detection_client.py b/recipes/computer_vision/object_detection/client/object_detection_client.py index 990ca35d..e843145f 100644 --- a/recipes/computer_vision/object_detection/client/object_detection_client.py +++ b/recipes/computer_vision/object_detection/client/object_detection_client.py @@ -6,7 +6,7 @@ import io st.title("🕵️‍♀️ Object Detection") -endpoint =os.getenv("MODEL_ENDPOINT", default = "http://0.0.0.0:8000/detection") +endpoint =os.getenv("MODEL_ENDPOINT", default = "http://0.0.0.0:8000") headers = {"accept": "application/json", "Content-Type": "application/json"} image = st.file_uploader("Upload Image") @@ -27,7 +27,7 @@ img_bytes = bytes_io.getvalue() b64_image = base64.b64encode(img_bytes).decode('utf-8') data = {'image': b64_image} - response = requests.post(endpoint, headers=headers,json=data, verify=False) + response = requests.post(f'{endpoint}/detection', headers=headers,json=data, verify=False) # parse response and display outputs response_json = response.json() image = response_json["image"] diff --git a/recipes/multimodal/image_understanding/README.md b/recipes/multimodal/image_understanding/README.md index b1e19712..1c9a5e6f 100644 --- a/recipes/multimodal/image_understanding/README.md +++ b/recipes/multimodal/image_understanding/README.md @@ -30,7 +30,7 @@ podman run -it -p 8001:8001 -v /locallm/models:/locallm/models:Z -e ### Run AI Application Image Locally ```bash -podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 image_understanding +podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 image_understanding ``` Interact with the application from your local browser at `localhost:8501`. You can upload an image file from your host machine and the app will provide a natural language description of the image. diff --git a/recipes/multimodal/image_understanding/app/image_understanding.py b/recipes/multimodal/image_understanding/app/image_understanding.py index 3fd7b351..20dda679 100644 --- a/recipes/multimodal/image_understanding/app/image_understanding.py +++ b/recipes/multimodal/image_understanding/app/image_understanding.py @@ -3,15 +3,15 @@ import base64 import os -model_service = os.getenv("MODEL_SERVICE_ENDPOINT", - default="http://localhost:8001/v1") +model_service = os.getenv("MODEL_ENDPOINT", + default="http://localhost:8001") st.title("📷 Image Analysis") image = st.file_uploader("Upload Image:",) top_container = st.container(border=True) if image is not None: b64_image = base64.b64encode(image.read()).decode("utf-8") - client = OpenAI(base_url=model_service, + client = OpenAI(base_url=f'{model_service}/v1', api_key="sk-xxx") with st.spinner("Analyzing Image..."): st.image(image) diff --git a/recipes/natural_language_processing/chatbot/README.md b/recipes/natural_language_processing/chatbot/README.md index 644c1431..3d91a36b 100644 --- a/recipes/natural_language_processing/chatbot/README.md +++ b/recipes/natural_language_processing/chatbot/README.md @@ -88,7 +88,7 @@ If you wish to run this as a codesnippet instead of a make command checkout the ### Build and Deploy the AI Application -Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To build and deploy the AI application use the following: +Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To build and deploy the AI application use the following: ```bash # Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes) diff --git a/recipes/natural_language_processing/chatbot/app/chatbot_ui.py b/recipes/natural_language_processing/chatbot/app/chatbot_ui.py index 8ef92ff9..e366dffc 100644 --- a/recipes/natural_language_processing/chatbot/app/chatbot_ui.py +++ b/recipes/natural_language_processing/chatbot/app/chatbot_ui.py @@ -9,8 +9,9 @@ import json import os -model_service = os.getenv("MODEL_SERVICE_ENDPOINT", - "http://localhost:8001/v1") +model_service = os.getenv("MODEL_ENDPOINT", + "http://localhost:8001") +model_service = f"{model_service}/v1" @st.cache_resource(show_spinner=False) def checking_model_service(): diff --git a/recipes/natural_language_processing/chatbot/provision/playbook.yml b/recipes/natural_language_processing/chatbot/provision/playbook.yml index 3670e104..366cabe3 100644 --- a/recipes/natural_language_processing/chatbot/provision/playbook.yml +++ b/recipes/natural_language_processing/chatbot/provision/playbook.yml @@ -57,4 +57,4 @@ ports: - 8501:8501 env: - MODEL_SERVICE_ENDPOINT: http://10.88.0.1:8001/v1 + MODEL_ENDPOINT: http://10.88.0.1:8001 diff --git a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml index e5304633..0ef386c9 100644 --- a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml +++ b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml @@ -14,8 +14,8 @@ spec: mountPath: /shared containers: - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/v1 + - name: MODEL_ENDPOINT + value: http://0.0.0.0:8001 image: APP_IMAGE name: chatbot-inference ports: diff --git a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py index b7e2ff68..d024235f 100644 --- a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py +++ b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py @@ -42,7 +42,7 @@ CB = pytest_container.Container( url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", extra_environment_variables={ - "MODEL_SERVICE_ENDPOINT": "http://10.88.0.1:8001/v1" + "MODEL_ENDPOINT": "http://10.88.0.1:8001" }, forwarded_ports=[ pytest_container.PortForwarding( diff --git a/recipes/natural_language_processing/codegen/README.md b/recipes/natural_language_processing/codegen/README.md index d7999f47..2b0f65c0 100644 --- a/recipes/natural_language_processing/codegen/README.md +++ b/recipes/natural_language_processing/codegen/README.md @@ -82,10 +82,10 @@ podman build -t codegen app ``` ### Deploy the AI Application -Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application: +Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application: ```bash -podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 codegen +podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 codegen ``` ### Interact with the AI Application diff --git a/recipes/natural_language_processing/codegen/app/codegen-app.py b/recipes/natural_language_processing/codegen/app/codegen-app.py index 741b7d34..89ba80b2 100644 --- a/recipes/natural_language_processing/codegen/app/codegen-app.py +++ b/recipes/natural_language_processing/codegen/app/codegen-app.py @@ -8,7 +8,8 @@ import requests import time -model_service = os.getenv("MODEL_SERVICE_ENDPOINT", "http://localhost:8001/v1") +model_service = os.getenv("MODEL_ENDPOINT", "http://localhost:8001") +model_service = f"{model_service}/v1" @st.cache_resource(show_spinner=False) def checking_model_service(): diff --git a/recipes/natural_language_processing/codegen/quadlet/codegen.yaml b/recipes/natural_language_processing/codegen/quadlet/codegen.yaml index 1de317b6..fe19f0b9 100644 --- a/recipes/natural_language_processing/codegen/quadlet/codegen.yaml +++ b/recipes/natural_language_processing/codegen/quadlet/codegen.yaml @@ -14,8 +14,8 @@ spec: mountPath: /shared containers: - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/v1 + - name: MODEL_ENDPOINT + value: http://0.0.0.0:8001 image: APP_IMAGE name: codegen-inference ports: diff --git a/recipes/natural_language_processing/rag/README.md b/recipes/natural_language_processing/rag/README.md index 2473a28d..f4832930 100644 --- a/recipes/natural_language_processing/rag/README.md +++ b/recipes/natural_language_processing/rag/README.md @@ -133,7 +133,7 @@ make APP_IMAGE=rag build ### Deploy the AI Application -Make sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine. +Make sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine. There also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database. @@ -141,7 +141,7 @@ The following Podman command can be used to run your AI Application: ```bash podman run --rm -it -p 8501:8501 \ --e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 \ +-e MODEL_ENDPOINT=http://10.88.0.1:8001 \ -e VECTORDB_HOST=10.88.0.1 \ -v Local/path/to/locallm/models/:/rag/models \ rag diff --git a/recipes/natural_language_processing/rag/app/rag_app.py b/recipes/natural_language_processing/rag/app/rag_app.py index 56e1615f..e71fd8a4 100644 --- a/recipes/natural_language_processing/rag/app/rag_app.py +++ b/recipes/natural_language_processing/rag/app/rag_app.py @@ -18,7 +18,8 @@ import argparse import pathlib -model_service = os.getenv("MODEL_SERVICE_ENDPOINT","http://0.0.0.0:8001/v1") +model_service = os.getenv("MODEL_ENDPOINT","http://0.0.0.0:8001/v1") +model_service = f"{model_service}/v1" chunk_size = os.getenv("CHUNK_SIZE", 150) embedding_model = os.getenv("EMBEDDING_MODEL","BAAI/bge-base-en-v1.5") vdb_host = os.getenv("VECTORDB_HOST", "0.0.0.0") diff --git a/recipes/natural_language_processing/rag/quadlet/rag.yaml b/recipes/natural_language_processing/rag/quadlet/rag.yaml index 00ff0b52..3e9ff41b 100644 --- a/recipes/natural_language_processing/rag/quadlet/rag.yaml +++ b/recipes/natural_language_processing/rag/quadlet/rag.yaml @@ -14,8 +14,8 @@ spec: mountPath: /shared containers: - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/v1 + - name: MODEL_ENDPOINT + value: http://0.0.0.0:8001 image: APP_IMAGE name: rag-inference ports: diff --git a/recipes/natural_language_processing/summarizer/README.md b/recipes/natural_language_processing/summarizer/README.md index cfa67086..748d9cdd 100644 --- a/recipes/natural_language_processing/summarizer/README.md +++ b/recipes/natural_language_processing/summarizer/README.md @@ -110,10 +110,10 @@ make APP_IMAGE=summarizer build ### Deploy the AI Application -Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application: +Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application: ```bash -podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 summarizer +podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 summarizer ``` ### Interact with the AI Application diff --git a/recipes/natural_language_processing/summarizer/app/summarizer.py b/recipes/natural_language_processing/summarizer/app/summarizer.py index 741912ca..8c4df01f 100644 --- a/recipes/natural_language_processing/summarizer/app/summarizer.py +++ b/recipes/natural_language_processing/summarizer/app/summarizer.py @@ -7,8 +7,9 @@ import time import os -model_service = os.getenv("MODEL_SERVICE_ENDPOINT", - "http://localhost:8001/v1") +model_service = os.getenv("MODEL_ENDPOINT", + "http://localhost:8001") +model_service = f"{model_service}/v1" @st.cache_resource(show_spinner=False) def checking_model_service(): diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml index 8d38faf5..cc13ba3d 100644 --- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml +++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml @@ -14,8 +14,8 @@ spec: mountPath: /shared containers: - env: - - name: MODEL_SERVICE_ENDPOINT - value: http://0.0.0.0:8001/v1 + - name: MODEL_ENDPOINT + value: http://0.0.0.0:8001 image: APP_IMAGE name: summarizer-inference ports: