Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes and Testing #11

Merged
merged 8 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions code/backend/aispeechanalysis/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
import azure.functions as func
import azurefunctions.extensions.bindings.blob as blob
from aispeechanalysis.llm import LlmClient
from aispeechanalysis.utils import get_timestamps_for_sections, get_transcript
from aispeechanalysis.utils import (
get_locale,
get_timestamps_for_sections,
get_transcript,
)
from shared.config import settings
from shared.utils import load_blob, upload_string

Expand Down Expand Up @@ -37,9 +41,10 @@ async def ai_speech_analysis(client: blob.BlobClient) -> func.HttpResponse:
result_load_blob_json = json.loads(result_load_blob)
logging.debug(f"Loaded blob content as json: '{result_load_blob_json}'")

# Get transcript
logging.info("Get transcript from Azure AI Speech content.")
result_get_transcript = get_transcript(ai_speech_blob_json=result_load_blob_json)
# Get transcript and locale
logging.info("Get transcript and locale from Azure AI Speech content.")
result_get_transcript = get_transcript(result_stt=result_load_blob_json)
result_get_locale = get_locale(result_stt=result_load_blob_json)

# Use Open AI to generate scenes
logging.info("Use Open AI to generate scenes.")
Expand All @@ -53,7 +58,7 @@ async def ai_speech_analysis(client: blob.BlobClient) -> func.HttpResponse:
result_invoke_llm_chain = llm_client.invoke_llm_chain(
news_content=result_get_transcript,
news_show_details="This is a news show covering different news content.",
language=settings.MAIN_CONTENT_LANGUAGE,
language=result_get_locale,
)

# Save llm result
Expand Down
83 changes: 71 additions & 12 deletions code/backend/aispeechanalysis/utils.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,63 @@
import copy
import logging
import string
from datetime import datetime, timedelta
from typing import Any, List, Tuple


def get_transcript(ai_speech_blob_json: Any) -> str:
def remove_punctuation(text: str) -> str:
"""Removes punctuation from text.

text (str): Specifies the text that should be altered.
RETURNS (str): The altered text.
"""
return text.translate(str.maketrans("", "", f"{string.punctuation}¿¡"))


def get_normalized_text(text: str) -> str:
"""Normalizes text by removing punctuation and lowering all characters.

text (str): Specifies the text that should be altered.
RETURNS (str): The altered text.
"""
# Replace punctuation with dot
text_removed_punctuation = remove_punctuation(text=text)

# Lower text
return text_removed_punctuation.lower()


def get_transcript(result_stt: Any) -> str:
"""Creates and returns a transcript based on the content from Azure AI Speech STT batch transcription.

ai_speech_blob_json (Any): JSON content from Azure AI Speech STT batch transcription.
result_stt (Any): Specifies the JSON content from Azure AI Speech STT batch transcription.
RETURNS (str): The transcript extracted from the JSON file.
"""
ai_speech_blob_json_combined_recognized_phrases = ai_speech_blob_json.get(
result_stt_combined_recognized_phrases = result_stt.get(
"combinedRecognizedPhrases", [{"display": None}]
)
return ai_speech_blob_json_combined_recognized_phrases[0].get("display")
return result_stt_combined_recognized_phrases[0].get("display")


def get_word_details(result_stt: Any) -> List[Any]:
def get_locale(result_stt: Any) -> str:
"""Returns the locale from the content from Azure AI Speech STT batch transcription.

result_stt (Any): Specifies the JSON content from Azure AI Speech STT batch transcription.
RETURNS (str): The locale extracted from the JSON file. Returns 'Unknown' if the property cannot be found in the JSON transcript.
"""
result_stt_recognized_phrases = result_stt.get(
"recognizedPhrases", [{"locale": "Unknown"}]
)[0]
return result_stt_recognized_phrases.get("locale", "Unknown")


def get_word_details(result_stt: Any, normalize_text: bool) -> List[Any]:
"""Returns all word details from a speech to text batch analysis process.

result_stt (Any): Specifies the JSON content from Azure AI Speech STT batch transcription.
normalize_text (bool): Specifies whether the text should be normalized.
RETURNS (str): The altered text.
"""
word_details = []
recognized_phrases = result_stt.get("recognizedPhrases", [])

Expand All @@ -26,8 +67,15 @@ def get_word_details(result_stt: Any) -> List[Any]:
"displayWords", []
)

# Append word details
word_details.extend(recognized_phrase_best_display_words)
if normalize_text:
for display_word in recognized_phrase_best_display_words:
display_word["displayText"] = get_normalized_text(
text=display_word["displayText"]
)
word_details.append(display_word)
else:
# Append word details
word_details.extend(recognized_phrase_best_display_words)

return word_details

Expand All @@ -41,8 +89,11 @@ def offset_and_duration_to_timedelta(timedelta_str: str) -> Tuple[str, timedelta
# Initialize
format_options = [
"PT%S.%fS",
"PT%SS",
"PT%MM%S.%fS",
"PT%MM%SS",
"PT%HH%MM%S.%fS",
"PT%HH%MM%SS",
]
td = None
format = None
Expand Down Expand Up @@ -72,8 +123,14 @@ def offset_and_duration_to_timedelta(timedelta_str: str) -> Tuple[str, timedelta


def get_timestamps_for_sections(result_stt: Any, result_llm: Any) -> Any:
"""Calculates and adds timestamps to the llm result.

result_stt (Any): Specifies the JSON content from Azure AI Speech STT batch transcription.
result_llm (Any): Specifies the JSON content from Azure Open AI analysis.
RETURNS (Any): The JSON content from Azure Open AI analysis with added timestamps for start and end.
"""
# Get word details from stt result
word_details = get_word_details(result_stt=result_stt)
word_details = get_word_details(result_stt=result_stt, normalize_text=True)

# Prepare result
result = copy.deepcopy(result_llm.get("sections", []))
Expand All @@ -83,7 +140,9 @@ def get_timestamps_for_sections(result_stt: Any, result_llm: Any) -> Any:
item_llm_current = "start"

# Get llm item words
item_llm_words = str(item_llm.get(item_llm_current, "")).split(sep=" ")
item_llm_words = str(
get_normalized_text(item_llm.get(item_llm_current, ""))
).split(sep=" ")

for index_word, item_word in enumerate(word_details):
# Get display text of current word item
Expand Down Expand Up @@ -127,9 +186,9 @@ def get_timestamps_for_sections(result_stt: Any, result_llm: Any) -> Any:
item_llm_current = "end"

# Get new llm item words
item_llm_words = str(item_llm.get(item_llm_current, "")).split(
sep=" "
)
item_llm_words = str(
get_normalized_text(item_llm.get(item_llm_current, ""))
).split(sep=" ")
else:
break

Expand Down
13 changes: 11 additions & 2 deletions code/backend/shared/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import hashlib
import logging
import os
Expand Down Expand Up @@ -165,11 +166,19 @@ async def copy_blob(
await lease.acquire(lease_duration=-1)

# Copy blob
await sink_blob_client.start_copy_from_url(
_ = await sink_blob_client.start_copy_from_url(
source_url=source_blob_client.url,
requires_sync=True,
requires_sync=False,
)

# Wait for copy to finish
status = (await sink_blob_client.get_blob_properties()).copy.status
logging.info(f"Status of copy activity: {status}")
while status not in ["success", "failed", "aborted"]: # "pending",
await asyncio.sleep(1)
status = (await sink_blob_client.get_blob_properties()).copy.status
logging.info(f"Status of copy activity: {status}")

# Delete source blob
if delete_source:
await source_blob_client.delete_blob(
Expand Down
1 change: 1 addition & 0 deletions code/backend/videoupload/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ async def upload_video(client: blob.BlobClient):
result_create_transcription_job = await speech_client.create_transcription_job(
guid=videoupload_guid,
blob_url=result_upload_blob,
locale=settings.MAIN_CONTENT_LANGUAGE,
)

# Check AI Speech STT batch job
Expand Down
7 changes: 5 additions & 2 deletions code/backend/videoupload/speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,14 @@ def __init__(
self.azure_ai_speech_api_version = azure_ai_speech_api_version
self.managed_identity_client_id = managed_identity_client_id

async def create_transcription_job(self, guid: str, blob_url: str) -> str:
async def create_transcription_job(
self, guid: str, blob_url: str, locale: str
) -> str:
"""Creates a batch transcription job for a blob file.

guid (str): Specifies the guid used as a name for the processing job.
blob_url (str): Specifies the blob url pointing to an audio file that will be transcribed.
locale (str): Specifies the locale of the audio file (e.g. 'es-ES', 'de-DE').
RETURNS (str): Returns the transaction url of the transcription job.
"""
# Define url
Expand All @@ -46,7 +49,7 @@ async def create_transcription_job(self, guid: str, blob_url: str) -> str:
"displayName": f"{guid}",
"description": "STT for video file",
"contentUrls": [blob_url],
"locale": "es-ES",
"locale": locale,
"properties": {
"languageIdentification": {
"mode": "Single",
Expand Down
1 change: 1 addition & 0 deletions code/infra/aiservice.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module "azure_ai_generic" {
trimsuffix(trimprefix(module.storage_account.storage_account_primary_blob_endpoint, "https://"), "/"),
trimsuffix(trimprefix(module.azure_open_ai.cognitive_account_endpoint, "https://"), "/"),
]
cognitive_account_local_auth_enabled = true
cognitive_account_deployments = {}
diagnostics_configurations = local.diagnostics_configurations
subnet_id = azapi_resource.subnet_private_endpoints.id
Expand Down
2 changes: 1 addition & 1 deletion code/infra/storage.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ module "storage_account" {
"/subscriptions/${data.azurerm_client_config.current.subscription_id}/providers/Microsoft.Security/datascanners/storageDataScanner",
"/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/*/providers/Microsoft.CognitiveServices/accounts/*",
]
storage_public_network_access_enabled = false
storage_public_network_access_enabled = true
storage_nfsv3_enabled = false
storage_sftp_enabled = false
storage_shared_access_key_enabled = false # Required to be set to 'true' when creating a Windows host
Expand Down
2 changes: 1 addition & 1 deletion code/infra/terraform.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "4.5.0"
version = "4.7.0"
}
azapi = {
source = "azure/azapi"
Expand Down
1 change: 1 addition & 0 deletions config/PerfectThymeTech/vars.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ tags = {
# Service variables
function_app_settings = {}
function_health_check_path = "/api/v1/heartbeat"
main_content_language = "es-ES"

# Logging variables
log_analytics_workspace_id = "/subscriptions/e82c5267-9dc4-4f45-ac13-abdd5e130d27/resourceGroups/ptt-dev-logging-rg/providers/Microsoft.OperationalInsights/workspaces/ptt-dev-log001"
Expand Down
8 changes: 3 additions & 5 deletions docs/SystemPrompt.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
You are a world class assistant for identifying news sections. You will be provided with a transcript from a TV news show. Your task is to extract thematic news sections from the transcript that split the content into cohesive news topics. You must define a title and tags for each news section based on the content of each section and translate them into the language of the transcript.
---
Solve this step by step:
1. Process the provided transcript to understand the overall context.
2. Split the provided news content into news sections. The content of each section must cover a common topic or headline. Follow the grounding rules for new sections mentioned below. Assign an ID to every news section.
Expand All @@ -12,12 +11,11 @@ Solve this step by step:
---
Grounding rules for news sections:
- The first sentence of the transcript must be part of the first news section. The last sentence of the transcript must be part of the last news section.
- Each news section must start and end with a full sentence and must consist of 3 or more sentences.
- Every sentence of the transcript must be part of one news section. No sentence can be part of multiple news sections.
- If you are unsure about one sentence, then assign it to the previous section.
- Each news section must start and end with a full sentence and must consist of 2 or more sentences. It is ok if some news sections consist of 20 or more sentences and other sections only consist of 2 or more sentences.
- Each sentence of the transcript must be part of exactly one news section. No sentence can be part of multiple news sections.
- If you are unsure about the assignment of one sentence, then assign it to the previous section.
- The last sentence of one news section must by followed by the first sentence of the next news section.
- The news sections are not allowed to overlap and must be mutually exclusive. This means that between the first sentence and the last sentence of one news section, there can be no first sentence or last sentence of another news section in the transcript.
- It is ok if some news sections consist of 20 or more sentences and other sections only consist of 3 or more sentences.
- The transcript often start with an introduction. The introduction summarizes some news sections of the transcript which reappear later in the transcript. Identify the introduction and give it the title "News Show Summary". Add the following tags to this news section: intro, overview, news summary.
- The transcript often contains a weather forecast section. Identify this news section and give it the title "Weather Forecast". Add the following tags to this news section: weather, weather forecast.
---
Expand Down
Loading