diff --git a/recipes/audio/audio_to_text/app/whisper_client.py b/recipes/audio/audio_to_text/app/whisper_client.py
index 93653304..6b1982ce 100644
--- a/recipes/audio/audio_to_text/app/whisper_client.py
+++ b/recipes/audio/audio_to_text/app/whisper_client.py
@@ -6,7 +6,8 @@
 st.set_page_config(page_title="Whisper Speech Recognition", page_icon=":studio_microphone:")
 st.title(":studio_microphone: Speech Recognition")
 st.markdown("Upload an audio file you wish to have translated")
-endpoint = os.getenv("MODEL_ENDPOINT", default="http://0.0.0.0:8001/inference")
+endpoint = os.getenv("MODEL_ENDPOINT", default="http://0.0.0.0:8001")
+endpoint = f"{endpoint}/inference"
 audio = st.file_uploader("", type=["wav"], accept_multiple_files=False)
 # read audio file
 if audio:
diff --git a/recipes/audio/audio_to_text/quadlet/audio-text.yaml b/recipes/audio/audio_to_text/quadlet/audio-text.yaml
index 2307c478..a254fe83 100644
--- a/recipes/audio/audio_to_text/quadlet/audio-text.yaml
+++ b/recipes/audio/audio_to_text/quadlet/audio-text.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/inference
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: quay.io/redhat-et/locallm-audio-to-text:latest
     name: audio-to-text
     ports:
diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common
index d8b0175e..6800f651 100644
--- a/recipes/common/Makefile.common
+++ b/recipes/common/Makefile.common
@@ -153,7 +153,7 @@ quadlet:
 
 .PHONY: run
 run: 
-	podman run -it -p $(PORT):$(PORT) -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 ${APP_IMAGE}
+	podman run -it -p $(PORT):$(PORT) -e MODEL_ENDPOINT=http://10.88.0.1:8001 ${APP_IMAGE}
 
 .PHONY: clean
 clean:
diff --git a/recipes/computer_vision/object_detection/client/object_detection_client.py b/recipes/computer_vision/object_detection/client/object_detection_client.py
index 990ca35d..e843145f 100644
--- a/recipes/computer_vision/object_detection/client/object_detection_client.py
+++ b/recipes/computer_vision/object_detection/client/object_detection_client.py
@@ -6,7 +6,7 @@
 import io
 
 st.title("🕵️‍♀️ Object Detection")
-endpoint =os.getenv("MODEL_ENDPOINT", default = "http://0.0.0.0:8000/detection")
+endpoint =os.getenv("MODEL_ENDPOINT", default = "http://0.0.0.0:8000")
 headers = {"accept": "application/json",
            "Content-Type": "application/json"}
 image = st.file_uploader("Upload Image")
@@ -27,7 +27,7 @@
     img_bytes = bytes_io.getvalue()
     b64_image = base64.b64encode(img_bytes).decode('utf-8')
     data = {'image': b64_image}
-    response = requests.post(endpoint, headers=headers,json=data, verify=False)
+    response = requests.post(f'{endpoint}/detection', headers=headers,json=data, verify=False)
     # parse response and display outputs
     response_json = response.json()
     image = response_json["image"]
diff --git a/recipes/multimodal/image_understanding/README.md b/recipes/multimodal/image_understanding/README.md
index b1e19712..1c9a5e6f 100644
--- a/recipes/multimodal/image_understanding/README.md
+++ b/recipes/multimodal/image_understanding/README.md
@@ -30,7 +30,7 @@ podman run -it -p 8001:8001 -v <LOCAL_PATH>/locallm/models:/locallm/models:Z -e
 ### Run AI Application Image Locally
 
 ```bash
-podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 image_understanding   
+podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 image_understanding   
 ```
 
 Interact with the application from your local browser at `localhost:8501`. You can upload an image file from your host machine and the app will provide a natural language description of the image.   
diff --git a/recipes/multimodal/image_understanding/app/image_understanding.py b/recipes/multimodal/image_understanding/app/image_understanding.py
index 3fd7b351..20dda679 100644
--- a/recipes/multimodal/image_understanding/app/image_understanding.py
+++ b/recipes/multimodal/image_understanding/app/image_understanding.py
@@ -3,15 +3,15 @@
 import base64
 import os
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT",
-                          default="http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT",
+                          default="http://localhost:8001")
 
 st.title("📷 Image Analysis")
 image = st.file_uploader("Upload Image:",)
 top_container = st.container(border=True)
 if image is not None:
     b64_image = base64.b64encode(image.read()).decode("utf-8")
-    client = OpenAI(base_url=model_service, 
+    client = OpenAI(base_url=f'{model_service}/v1', 
                     api_key="sk-xxx")
     with st.spinner("Analyzing Image..."):
         st.image(image)
diff --git a/recipes/natural_language_processing/chatbot/README.md b/recipes/natural_language_processing/chatbot/README.md
index 644c1431..3d91a36b 100644
--- a/recipes/natural_language_processing/chatbot/README.md
+++ b/recipes/natural_language_processing/chatbot/README.md
@@ -88,7 +88,7 @@ If you wish to run this as a codesnippet instead of a make command checkout the
 
 ### Build and Deploy the AI Application
 
-Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To build and deploy the AI application use the following:
+Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To build and deploy the AI application use the following:
 
 ```bash
 # Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)
diff --git a/recipes/natural_language_processing/chatbot/app/chatbot_ui.py b/recipes/natural_language_processing/chatbot/app/chatbot_ui.py
index 8ef92ff9..e366dffc 100644
--- a/recipes/natural_language_processing/chatbot/app/chatbot_ui.py
+++ b/recipes/natural_language_processing/chatbot/app/chatbot_ui.py
@@ -9,8 +9,9 @@
 import json
 import os 
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT",
-                          "http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT",
+                          "http://localhost:8001")
+model_service = f"{model_service}/v1"
 
 @st.cache_resource(show_spinner=False)
 def checking_model_service():
diff --git a/recipes/natural_language_processing/chatbot/provision/playbook.yml b/recipes/natural_language_processing/chatbot/provision/playbook.yml
index 3670e104..366cabe3 100644
--- a/recipes/natural_language_processing/chatbot/provision/playbook.yml
+++ b/recipes/natural_language_processing/chatbot/provision/playbook.yml
@@ -57,4 +57,4 @@
       ports:
       - 8501:8501
       env:
-        MODEL_SERVICE_ENDPOINT: http://10.88.0.1:8001/v1
+        MODEL_ENDPOINT: http://10.88.0.1:8001
diff --git a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
index e5304633..0ef386c9 100644
--- a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
+++ b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: chatbot-inference
     ports:
diff --git a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
index b7e2ff68..d024235f 100644
--- a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
+++ b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
@@ -42,7 +42,7 @@
 CB = pytest_container.Container(
         url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
         extra_environment_variables={
-            "MODEL_SERVICE_ENDPOINT": "http://10.88.0.1:8001/v1"
+            "MODEL_ENDPOINT": "http://10.88.0.1:8001"
         },
         forwarded_ports=[
             pytest_container.PortForwarding(
diff --git a/recipes/natural_language_processing/codegen/README.md b/recipes/natural_language_processing/codegen/README.md
index d7999f47..2b0f65c0 100644
--- a/recipes/natural_language_processing/codegen/README.md
+++ b/recipes/natural_language_processing/codegen/README.md
@@ -82,10 +82,10 @@ podman build -t codegen app
 ```
 ### Deploy the AI Application
 
-Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
+Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
 
 ```bash
-podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 codegen
+podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 codegen
 ```
 
 ### Interact with the AI Application
diff --git a/recipes/natural_language_processing/codegen/app/codegen-app.py b/recipes/natural_language_processing/codegen/app/codegen-app.py
index 741b7d34..89ba80b2 100644
--- a/recipes/natural_language_processing/codegen/app/codegen-app.py
+++ b/recipes/natural_language_processing/codegen/app/codegen-app.py
@@ -8,7 +8,8 @@
 import requests
 import time
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT", "http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT", "http://localhost:8001")
+model_service = f"{model_service}/v1"
 
 @st.cache_resource(show_spinner=False)
 def checking_model_service():
diff --git a/recipes/natural_language_processing/codegen/quadlet/codegen.yaml b/recipes/natural_language_processing/codegen/quadlet/codegen.yaml
index 1de317b6..fe19f0b9 100644
--- a/recipes/natural_language_processing/codegen/quadlet/codegen.yaml
+++ b/recipes/natural_language_processing/codegen/quadlet/codegen.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: codegen-inference
     ports:
diff --git a/recipes/natural_language_processing/rag/README.md b/recipes/natural_language_processing/rag/README.md
index 2473a28d..f4832930 100644
--- a/recipes/natural_language_processing/rag/README.md
+++ b/recipes/natural_language_processing/rag/README.md
@@ -133,7 +133,7 @@ make APP_IMAGE=rag build
 
 ### Deploy the AI Application
 
-Make sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.
+Make sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.
 
 There also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database.  
 
@@ -141,7 +141,7 @@ The following Podman command can be used to run your AI Application:
 
 ```bash
 podman run --rm -it -p 8501:8501 \
--e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 \
+-e MODEL_ENDPOINT=http://10.88.0.1:8001 \
 -e VECTORDB_HOST=10.88.0.1 \
 -v Local/path/to/locallm/models/:/rag/models \
 rag   
diff --git a/recipes/natural_language_processing/rag/app/rag_app.py b/recipes/natural_language_processing/rag/app/rag_app.py
index 56e1615f..e71fd8a4 100644
--- a/recipes/natural_language_processing/rag/app/rag_app.py
+++ b/recipes/natural_language_processing/rag/app/rag_app.py
@@ -18,7 +18,8 @@
 import argparse
 import pathlib
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT","http://0.0.0.0:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT","http://0.0.0.0:8001/v1")
+model_service = f"{model_service}/v1"
 chunk_size = os.getenv("CHUNK_SIZE", 150)
 embedding_model = os.getenv("EMBEDDING_MODEL","BAAI/bge-base-en-v1.5")
 vdb_host = os.getenv("VECTORDB_HOST", "0.0.0.0")
diff --git a/recipes/natural_language_processing/rag/quadlet/rag.yaml b/recipes/natural_language_processing/rag/quadlet/rag.yaml
index 00ff0b52..3e9ff41b 100644
--- a/recipes/natural_language_processing/rag/quadlet/rag.yaml
+++ b/recipes/natural_language_processing/rag/quadlet/rag.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: rag-inference
     ports:
diff --git a/recipes/natural_language_processing/summarizer/README.md b/recipes/natural_language_processing/summarizer/README.md
index cfa67086..748d9cdd 100644
--- a/recipes/natural_language_processing/summarizer/README.md
+++ b/recipes/natural_language_processing/summarizer/README.md
@@ -110,10 +110,10 @@ make APP_IMAGE=summarizer build
 
 ### Deploy the AI Application
 
-Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
+Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
 
 ```bash
-podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 summarizer   
+podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 summarizer   
 ```
 
 ### Interact with the AI Application
diff --git a/recipes/natural_language_processing/summarizer/app/summarizer.py b/recipes/natural_language_processing/summarizer/app/summarizer.py
index 741912ca..8c4df01f 100644
--- a/recipes/natural_language_processing/summarizer/app/summarizer.py
+++ b/recipes/natural_language_processing/summarizer/app/summarizer.py
@@ -7,8 +7,9 @@
 import time
 import os
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT",
-                          "http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT",
+                          "http://localhost:8001")
+model_service = f"{model_service}/v1"
 
 @st.cache_resource(show_spinner=False)
 def checking_model_service():
diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
index 8d38faf5..cc13ba3d 100644
--- a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
+++ b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: summarizer-inference
     ports: