diff --git a/packages/phoenix-evals/src/phoenix/evals/default_audio_templates.py b/packages/phoenix-evals/src/phoenix/evals/default_audio_templates.py
index 2632b42946..c51b114677 100644
--- a/packages/phoenix-evals/src/phoenix/evals/default_audio_templates.py
+++ b/packages/phoenix-evals/src/phoenix/evals/default_audio_templates.py
@@ -15,54 +15,71 @@
 - Volume: Loudness of the speech (e.g., loud, soft, moderate).
 - Intensity: Emotional strength or expression (e.g., subdued, sharp, exaggerated).
 
+The classified emotion must be one of the following:
+['anger', 'happiness', 'excitement', 'sadness', 'neutral', 'frustration', 'fear', 'surprise',
+'disgust', 'other']
+
+IMPORTANT: Choose the most dominant emotion expressed in the audio. Neutral should only be used when
+no other emotion is clearly present, do your best to avoid this label.
+
 ************
 
-Here is the base64 encoded audio string:
+Here is the audio to classify:
 
 """
 
 EMOTION_AUDIO_BASE_TEMPLATE_PT_2 = """{audio}"""
 
 EMOTION_AUDIO_BASE_TEMPLATE_PT_3 = """
+RESPONSE FORMAT:
+
+Provide a single word from the list above representing the detected emotion.
+
 ************
 
-POSSIBLE EMOTIONS:
-['anger', 'happiness', 'excitement', 'sadness', 'neutral', 'frustration', 'fear', 'surprise',
-'disgust', 'other']
-IMPORTANT: Choose the most dominant emotion expressed in the audio. Neutral should only be used when
-no other emotion is clearly present, do your best to avoid this label.
+EXAMPLE RESPONSE: excitement
 
 ************
 
-RESPONSE FORMAT:
+Analyze the audio and respond in this format.
+"""
 
-Provide a single word from the list above representing the detected emotion.
+EMOTION_AUDIO_EXPLANATION_TEMPLATE_PT_1 = """
+You are an AI system designed to classify emotions in audio files.
 
-EXAMPLE RESPONSE: excitement
+### TASK:
+First, explain in a step-by-step manner how the provided audio file based on these characteristics
+and how they indicate the emotion of the speaker:
+- Tone: General tone of the speaker (e.g., cheerful, tense, calm).
+- Pitch: Level and variability of the pitch (e.g., high, low, monotone).
+- Pace: Speed of speech (e.g., fast, slow, steady).
+- Volume: Loudness of the speech (e.g., loud, soft, moderate).
+- Intensity: Emotional strength or expression (e.g., subdued, sharp, exaggerated).
 
-Analyze the audio and respond in this format.
+Then, classify the primary emotion. The classified emotion must be one of the following:
+['anger', 'happiness', 'excitement', 'sadness', 'neutral', 'frustration', 'fear', 'surprise',
+'disgust', 'other']
+
+IMPORTANT: Choose the most dominant emotion expressed in the audio. Neutral should only be used when
+no other emotion is clearly present, do your best to avoid this label.
 
 ************
 
+Here is the audio to classify:
 """
 
-EMOTION_AUDIO_BASE_TEMPLATE_EXPLANATION = """
+EMOTION_AUDIO_EXPLANATION_TEMPLATE_PT_3 = """
+EXAMPLE RESPONSE FORMAT:
 
-Write out in a step by step manner
-an EXPLANATION to show how you determined the emotion of the audio considering the tone, pitch,
-pace, volume, and intensity.
+************
 
-EXAMPLE RESPONSE:
-1. Tone: The tone was enthusiastic and high-energy.
-2. Pitch: The pitch was elevated and varied significantly.
-3. Pace: The pace was fast, consistent with excitement.
-4. Volume: The volume was loud and dynamic.
-5. Intensity: The delivery was expressive and emotionally charged.
-6. Conclusion: Based on these features, the primary emotion is 'excitement.'
+EXPLANATION: An explanation of your reasoning based on the tone, pitch, pace, volume, and intensity
+    of the audio.
+LABEL: "excitement"
 
 ************
 
-EXPLANATION:
+Analyze the audio and respond in the format shown above.
 """
 
 EMOTION_AUDIO_RAILS = [
@@ -97,7 +114,7 @@
     explanation_template=[
         PromptPartTemplate(
             content_type=PromptPartContentType.TEXT,
-            template=EMOTION_AUDIO_BASE_TEMPLATE_PT_1,
+            template=EMOTION_AUDIO_EXPLANATION_TEMPLATE_PT_1,
         ),
         PromptPartTemplate(
             content_type=PromptPartContentType.AUDIO,
@@ -105,11 +122,7 @@
         ),
         PromptPartTemplate(
             content_type=PromptPartContentType.TEXT,
-            template=EMOTION_AUDIO_BASE_TEMPLATE_PT_3,
-        ),
-        PromptPartTemplate(
-            content_type=PromptPartContentType.TEXT,
-            template=EMOTION_AUDIO_BASE_TEMPLATE_EXPLANATION,
+            template=EMOTION_AUDIO_EXPLANATION_TEMPLATE_PT_3,
         ),
     ],
 )