Azure-Samples · rhurey · Jan 13, 2025 · Dec 18, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/scenarios/python/console/premium_speech_demo/.vscode/tasks.json b/scenarios/python/console/premium_speech_demo/.vscode/tasks.json
@@ -0,0 +1,52 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "Configuration and Setup",
+            "type": "shell",
+            "command": "/bin/bash",
+            "args": [
+                "-c",
+                "chmod u+x ${workspaceFolder}/app_manager.sh && ${workspaceFolder}/app_manager.sh configure"
+            ],
+            "group": {
+                "kind": "build",
+                "isDefault": false
+            },
+            "problemMatcher": [],
+            "windows": {
+                "command": "powershell",
+                "args": [
+                    "-ExecutionPolicy",
+                    "Bypass",
+                    "-File",
+                    "${workspaceFolder}/app_manager.ps1",
+                    "configure"
+                ]
+            }
+        },
+        {
+            "label": "Run the App",
+            "type": "shell",
+            "command": "${workspaceFolder}/app_manager.sh",
+            "args": [
+                "run"
+            ],
+            "group": {
+                "kind": "none",
+                "isDefault": false
+            },
+            "problemMatcher": [],
+            "windows": {
+                "command": "powershell",
+                "args": [
+                    "-ExecutionPolicy",
+                    "Bypass",
+                    "-File",
+                    "${workspaceFolder}/app_manager.ps1",
+                    "run"
+                ]
+            }
+        }
+    ]
+}
diff --git a/scenarios/python/console/premium_speech_demo/README.md b/scenarios/python/console/premium_speech_demo/README.md
@@ -0,0 +1,41 @@
+# Instructions to run Speech + LLM Samples
+This project integrates Azure Cognitive Services Speech SDK with Azure OpenAI Service to perform real-time speech recognition and refine the recognized text for improved grammar and readability.
+
+# Features
+1. Real-time speech-to-text transcription using Azure Cognitive Services Speech SDK.
+2. Automatic refinement of recognized text using Azure OpenAI Service.
+3. Grammar correction, minor rewrites for improved readability, and spelling fixes for predefined phrases.
+
+## Run the Sample within VS Code
+1. Install "Azure AI Speech Toolkit" extension in VS Code.
+2. Download this sample from sample gallery to local machine.
+3. Trigger "Azure AI Speech Toolkit: Configure Azure Speech Resources" command from command palette to select an **Azure AI Service** resource.
+4. Trigger "Azure AI Speech Toolkit: Configure and Setup the Sample App" command from command palette to configure and setup the sample. This command only needs to be run once.
+5. Trigger "Azure AI Speech Toolkit: Run the Sample App" command from command palette to run the sample.
+
+## Prerequisites
+- Install a version of [Python from 3.7 or later](https://www.python.org/downloads/). 
+
+## Environment Setup
+- Azure AI Speech Toolkit will automatically help you set these environment variables. If you want to run outside of VS Code, you can manually set the following environment variables.
+
+  - `SPEECH_REGION`: Azure region for the Speech Service (e.g., `eastus`).
+  - `SPEECH_KEY`: Azure Cognitive Services Speech API key.
+  - `AZURE_OPENAI_ENDPOINT`: Endpoint for Azure OpenAI Service (e.g., `https://<your-resource-name>.openai.azure.com`).
+  - `AZURE_OPENAI_API_KEY`: API key for Azure OpenAI Service.
+
+---
+
+## Example Output
+
+When you say "how ar you" into the microphone:
+
+### Raw Transcription:
+```
+RAW RECO: how ar you
+```
+
+### Refined Output:
+```
+REWRITE: How are you?
+```
diff --git a/scenarios/python/console/premium_speech_demo/app_manager.ps1 b/scenarios/python/console/premium_speech_demo/app_manager.ps1
@@ -0,0 +1,70 @@
+param(
+    [string]$action
+)
+
+function Test-PythonInstalled {
+    return Get-Command python -ErrorAction SilentlyContinue
+}
+
+function Test-PipInstalled {
+    return Get-Command pip -ErrorAction SilentlyContinue
+}
+
+if ($action -eq "configure") {
+    if (-not (Test-PythonInstalled)) {
+        Write-Host "Python is not installed. Please install Python to proceed." -ForegroundColor Red
+        exit 1
+    }
+
+    if (-not (Test-PipInstalled)) {
+        Write-Host "pip is not installed. Please install pip to proceed." -ForegroundColor Red
+        exit 1
+    }
+
+    Write-Host "Installing requirements packages..."
+    try {
+        pip install -r requirements.txt
+        Write-Host "Requirements packages installation succeeded." -ForegroundColor Green
+    }
+    catch {
+        Write-Host "Requirements packages installation failed. Please check your pip installation." -ForegroundColor Red
+        exit 1
+    }
+}
+elseif ($action -eq "run") {
+    # Define the path to your .env file
+    $envFilePath = ".env/.env.dev"
+
+    if (Test-Path $envFilePath) {
+        # Read each line of the file and process it
+        Get-Content -Path $envFilePath | ForEach-Object {
+            # Ignore empty lines and lines that start with `#` (comments)
+            if ($_ -and $_ -notmatch '^\s*#') {
+                # Split each line into key and value
+                $parts = $_ -split '=', 2
+                $key = $parts[0].Trim()
+                $value = $parts[1].Trim()
+
+                # Set the environment variable
+                [System.Environment]::SetEnvironmentVariable($key, $value)
+            }
+
+            [System.Environment]::SetEnvironmentVariable("SPEECH_KEY", $env:SPEECH_RESOURCE_KEY)
+            [System.Environment]::SetEnvironmentVariable("AZURE_OPENAI_API_KEY", $env:SPEECH_RESOURCE_KEY)
+            [System.Environment]::SetEnvironmentVariable("SPEECH_REGION", $env:SERVICE_REGION)
+            [System.Environment]::SetEnvironmentVariable("AZURE_OPENAI_ENDPOINT", "https://$env:CUSTOM_SUBDOMAIN_NAME.openai.azure.com/")
+        }
+
+        Write-Host "Environment variables loaded from $envFilePath"
+    }
+    else {
+        Write-Host "File not found: $envFilePath. You can create one to set environment variables or manually set secrets in environment variables."
+    }
+
+    Start-Process "python" -ArgumentList "premium_speech_demo.py"
+}
+else {
+    Write-Host "Invalid action: $action" -ForegroundColor Red
+    Write-Host "Usage: -action configure or -action run"
+    exit 1
+}
diff --git a/scenarios/python/console/premium_speech_demo/app_manager.sh b/scenarios/python/console/premium_speech_demo/app_manager.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+action=$1
+
+function check_python_installed() {
+    command -v python >/dev/null 2>&1
+}
+
+function check_pip_installed() {
+    command -v pip >/dev/null 2>&1
+}
+
+if [ "$action" == "configure" ]; then
+    echo "Installing Linux platform required dependencies..."
+    sudo apt-get update
+    sudo apt-get install -y build-essential libssl-dev libasound2 wget
+
+    if ! check_python_installed; then
+        echo -e "\e[31mPython is not installed. Please install Python to proceed.\e[0m"
+        exit 1
+    fi
+
+    if ! check_pip_installed; then
+        echo -e "\e[31mpip is not installed. Please install pip to proceed.\e[0m"
+        exit 1
+    fi
+
+    echo "Installing requirements packages..."
+    if ! pip install -r requirements.txt; then
+        exit 1
+    fi
+elif [ "$action" == "run" ]; then
+
+    # Load environment variables from .env file
+    ENV_FILE=".env/.env.dev" 
+    if [ -f "$ENV_FILE" ]; then
+        source "$ENV_FILE"
+
+        # Ensure environment variables are available to the C++ binary
+        export SPEECH_KEY=$SPEECH_RESOURCE_KEY
+        export AZURE_OPENAI_API_KEY=$SPEECH_RESOURCE_KEY
+        export SPEECH_REGION=$SERVICE_REGION
+        export AZURE_OPENAI_ENDPOINT="https://${CUSTOM_SUBDOMAIN_NAME}.openai.azure.com/"
+        echo "Environment variables loaded from $ENV_FILE"
+
+    else
+        echo "Environment file $ENV_FILE not found. You can create one to set environment variables or manually set secrets in environment variables."
+    fi
+    python premium_speech_demo.py
+else
+    echo -e "\e[31mInvalid action: $action\e[0m"
+    echo "Usage: $0 configure or $0 run"
+    exit 1
+fi
diff --git a/scenarios/python/console/premium_speech_demo/premium_speech_demo.py b/scenarios/python/console/premium_speech_demo/premium_speech_demo.py
@@ -0,0 +1,75 @@
+import os
+import azure.cognitiveservices.speech as speechsdk
+from openai import AzureOpenAI, OpenAI
+
+# Inialize speech recognition engine
+service_region = os.environ.get('SPEECH_REGION')
+speech_key=os.environ.get('SPEECH_KEY')
+speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
+speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, language="en-us")
+
+# Inialize AOAI engine
+client = AzureOpenAI(
+  azure_endpoint = os.environ.get('AZURE_OPENAI_ENDPOINT'), 
+  api_key=os.environ.get('AZURE_OPENAI_API_KEY'), 
+  api_version="2024-10-21"
+)
+
+
+def rewrite_content(input_reco):
+
+    my_messages=[
+            {"role": "system", "content": "You are a helpful assistant to help user to rewrite sentences."
+                "Please fix the grammar errors in the user provided sentence and make it more reading friendly."
+                "You can do some minor rewriting, but DON'T do big change, especially don't change the meaning of the sentence."
+                "DON'T make up any new content. DON'T do question answering. User is not asking you to answer questions."
+                "You just need to copy and refine the sentences from user."
+                "Meanwhile, this is a list of phrases relevant to the sentences \"{}\"."
+                "If they appear in the sentence and wrongly spelled, please fix them."
+                "Here is some examples:\n"
+                "User: how ar you\n"
+                "Your response: How are you?\n\n"
+                "User: what yur name?\n"
+                "Your response: What's your name?\n\n"
+                ""
+            },
+            {"role": "user", "content": ""}
+          ]
+
+    my_messages[0]["content"] = my_messages[0]["content"].format(
+            "PAFE music festival, non-profit 501(c)(3), Changliang Liu"
+    )
+    my_messages[1]["content"] = input_reco
+
+    response = client.chat.completions.create(
+      model="gpt-4o-mini",
+      messages=my_messages
+    )
+
+    return response.choices[0].message.content 
+
+
+def recognized_cb(evt: speechsdk.SpeechRecognitionEventArgs):
+    current_sentence = evt.result.text
+    if current_sentence == "":
+        return
+
+
+    print("RAW RECO: ", current_sentence)
+    print("REWRITE : ", rewrite_content(current_sentence))
+
+
+speech_recognizer.recognized.connect(recognized_cb)
+result_future = speech_recognizer.start_continuous_recognition_async()
+result_future.get()  # wait for voidfuture, so we know engine initialization is done.
+
+print('Continuous Recognition is now running, say something.')
+while True:
+    # No real sample parallel work to do on this thread, so just wait for user to type stop.
+    # Can't exit function or speech_recognizer will go out of scope and be destroyed while running.
+    print('type "stop" then enter when done')
+    stop = input()
+    if (stop.lower() == "stop"):
+        print('Stopping async recognition.')
+        speech_recognizer.stop_continuous_recognition_async()
+        break
diff --git a/scenarios/python/console/premium_speech_demo/requirements.txt b/scenarios/python/console/premium_speech_demo/requirements.txt
@@ -0,0 +1,2 @@
+azure-cognitiveservices-speech
+openai