diff --git a/.github/workflows/android-browserstack.yml b/.github/workflows/android-browserstack.yml index d766f984..9dc7d97f 100644 --- a/.github/workflows/android-browserstack.yml +++ b/.github/workflows/android-browserstack.yml @@ -32,10 +32,10 @@ jobs: - run: pip3 install requests - - name: set up JDK 11 + - name: set up JDK 17 uses: actions/setup-java@v3 with: - java-version: '11' + java-version: '17' distribution: 'temurin' - name: Copy test_resources @@ -70,8 +70,8 @@ jobs: --access_key "${{secrets.BROWSERSTACK_ACCESS_KEY}}" --project_name "Cheetah-Android" --devices "android-min-max" - --app_path "cheetah-test-app/build/outputs/apk/debug/cheetah-test-app-debug.apk" - --test_path "cheetah-test-app/build/outputs/apk/androidTest/debug/cheetah-test-app-debug-androidTest.apk" + --app_path "cheetah-test-app/build/outputs/apk/en/debug/cheetah-test-app-en-debug.apk" + --test_path "cheetah-test-app/build/outputs/apk/androidTest/en/debug/cheetah-test-app-en-debug-androidTest.apk" build-integ: name: Run Android Integration Tests on BrowserStack @@ -90,10 +90,10 @@ jobs: - name: Install AppCenter CLI run: npm install -g appcenter-cli - - name: set up JDK 11 + - name: set up JDK 17 uses: actions/setup-java@v3 with: - java-version: '11' + java-version: '17' distribution: 'temurin' - name: Copy test_resources @@ -128,6 +128,6 @@ jobs: --access_key "${{secrets.BROWSERSTACK_ACCESS_KEY}}" --project_name "Cheetah-Android-Integration" --devices "android-min-max" - --app_path "cheetah-test-app/build/outputs/apk/release/cheetah-test-app-release.apk" - --test_path "cheetah-test-app/build/outputs/apk/androidTest/release/cheetah-test-app-release-androidTest.apk" + --app_path "cheetah-test-app/build/outputs/apk/en/release/cheetah-test-app-en-release.apk" + --test_path "cheetah-test-app/build/outputs/apk/androidTest/en/release/cheetah-test-app-en-release-androidTest.apk" diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml index a7b56854..cd8c9163 100644 --- a/.github/workflows/android-perf.yml +++ b/.github/workflows/android-perf.yml @@ -42,10 +42,10 @@ jobs: - run: pip3 install requests - - name: set up JDK 11 + - name: set up JDK 17 uses: actions/setup-java@v3 with: - java-version: '11' + java-version: '17' distribution: 'temurin' - name: Copy test_resources @@ -89,6 +89,6 @@ jobs: --access_key "${{secrets.BROWSERSTACK_ACCESS_KEY}}" --project_name "Cheetah-Android-Performance" --devices "${{ matrix.device }}" - --app_path "cheetah-test-app/build/outputs/apk/debug/cheetah-test-app-debug.apk" - --test_path "cheetah-test-app/build/outputs/apk/androidTest/debug/cheetah-test-app-debug-androidTest.apk" + --app_path "cheetah-test-app/build/outputs/apk/en/debug/cheetah-test-app-en-debug.apk" + --test_path "cheetah-test-app/build/outputs/apk/androidTest/en/debug/cheetah-test-app-en-debug-androidTest.apk" diff --git a/.github/workflows/flutter-codestyle.yml b/.github/workflows/flutter-codestyle.yml index 2990d575..b255fd9a 100644 --- a/.github/workflows/flutter-codestyle.yml +++ b/.github/workflows/flutter-codestyle.yml @@ -29,11 +29,6 @@ jobs: run: flutter analyze --no-fatal-infos --no-fatal-warnings working-directory: binding/flutter - - name: Copy Demo Assets - run: ./copy_assets.sh - working-directory: demo/flutter - - name: Run Demo Analyzer run: flutter analyze --no-fatal-infos --no-fatal-warnings working-directory: demo/flutter - \ No newline at end of file diff --git a/.github/workflows/flutter-demos.yml b/.github/workflows/flutter-demos.yml index b0482191..2b39cdfc 100644 --- a/.github/workflows/flutter-demos.yml +++ b/.github/workflows/flutter-demos.yml @@ -28,10 +28,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: - java-version: '11' + java-version: '17' distribution: 'temurin' - name: Install Flutter ${{ matrix.flutter-version }} @@ -39,18 +39,18 @@ jobs: with: flutter-version: ${{ matrix.flutter-version }} - - name: Copy assets - run: ./copy_assets.sh - - name: Install dependencies run: flutter pub get + - name: Prepare demo + run: dart scripts/prepare_demo.dart en + - name: Build run: flutter build apk build-ios: name: Build iOS demo - runs-on: macos-12 + runs-on: macos-14 strategy: matrix: flutter-version: ['3.3.0', '3.22.2'] @@ -64,11 +64,11 @@ jobs: flutter-version: ${{ matrix.flutter-version }} architecture: x64 - - name: Copy assets - run: ./copy_assets.sh - - name: Install dependencies run: flutter pub get + - name: Prepare demo + run: dart scripts/prepare_demo.dart en + - name: Build run: flutter build ios --release --no-codesign diff --git a/.github/workflows/flutter.yml b/.github/workflows/flutter.yml index c7ceb23d..8450e09c 100644 --- a/.github/workflows/flutter.yml +++ b/.github/workflows/flutter.yml @@ -39,25 +39,31 @@ jobs: - name: Install dependencies run: flutter pub get + - name: Prepare demo + run: dart scripts/prepare_demo.dart en + - name: Run integration tests run: flutter test integration_test test-ios: - name: Test iOS - runs-on: pv-ios + name: Test iOS + runs-on: pv-ios + + steps: + - uses: actions/checkout@v3 - steps: - - uses: actions/checkout@v3 + - name: Copy test_resources + run: ./copy_test_resources.sh - - name: Copy test_resources - run: ./copy_test_resources.sh + - name: Inject AppID + run: sed -i '.bak' 's:{TESTING_ACCESS_KEY_HERE}:${{secrets.PV_VALID_ACCESS_KEY}}:' + integration_test/app_test.dart - - name: Inject AppID - run: sed -i '.bak' 's:{TESTING_ACCESS_KEY_HERE}:${{secrets.PV_VALID_ACCESS_KEY}}:' - integration_test/app_test.dart + - name: Install dependencies + run: flutter pub get - - name: Install dependencies - run: flutter pub get + - name: Prepare demo + run: dart scripts/prepare_demo.dart en - - name: Run integration tests - run: flutter test integration_test + - name: Run integration tests + run: flutter test integration_test diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 631ba5ba..f8e0c38d 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -78,4 +78,4 @@ jobs: run: go build - name: Test - run: go test -modfile="go_test.mod" -v -access_key ${{secrets.PV_VALID_ACCESS_KEY}} + run: go test -modfile="go_test.mod" -timeout 30m -v -access_key ${{secrets.PV_VALID_ACCESS_KEY}} diff --git a/.github/workflows/ios-browserstack.yml b/.github/workflows/ios-browserstack.yml index 4e3d8071..01e5e220 100644 --- a/.github/workflows/ios-browserstack.yml +++ b/.github/workflows/ios-browserstack.yml @@ -36,6 +36,9 @@ jobs: - name: Make build dir run: mkdir ddp + - name: Copy test_resources + run: ./copy_test_resources.sh + - name: Inject AccessKey run: sed -i '.bak' 's:{TESTING_ACCESS_KEY_HERE}:${{secrets.PV_VALID_ACCESS_KEY}}:' CheetahAppTestUITests/CheetahAppTestUITests.swift diff --git a/.github/workflows/ios-perf.yml b/.github/workflows/ios-perf.yml index 59f73325..5a24d143 100644 --- a/.github/workflows/ios-perf.yml +++ b/.github/workflows/ios-perf.yml @@ -46,6 +46,9 @@ jobs: - name: Make build dir run: mkdir ddp + - name: Copy test_resources + run: ./copy_test_resources.sh + - name: Inject AccessKey run: sed -i '.bak' 's:{TESTING_ACCESS_KEY_HERE}:${{secrets.PV_VALID_ACCESS_KEY}}:' PerformanceTest/PerformanceTest.swift diff --git a/.github/workflows/react-demos.yml b/.github/workflows/react-demos.yml index afa4f82a..8b06ea87 100644 --- a/.github/workflows/react-demos.yml +++ b/.github/workflows/react-demos.yml @@ -39,16 +39,11 @@ jobs: - name: Pre-build dependencies run: npm install yarn - - name: Build Local Web SDK - run: yarn && yarn copywasm && yarn build - working-directory: binding/web - - - name: Build Local React SDK - run: yarn && yarn build - working-directory: binding/react - - name: Install dependencies run: yarn install - - name: Build - run: yarn build + - name: Build English + run: yarn build en + + - name: Build Other Language + run: yarn build es diff --git a/.github/workflows/react-native-demos.yml b/.github/workflows/react-native-demos.yml index b2ca3fe8..c89b83d5 100644 --- a/.github/workflows/react-native-demos.yml +++ b/.github/workflows/react-native-demos.yml @@ -36,7 +36,16 @@ jobs: node-version: ${{ matrix.node-version }} - name: Pre-build dependencies - run: npm install yarn + run: npm install -g yarn + + # ************ REMOVE AFTER RELEASE ***************** + - name: Build and package binding + working-directory: binding/react-native + run: yarn && yarn pkg + + - name: Add to demo + run: yarn add ../../binding/react-native/pkg/picovoice-cheetah-react-native-2.1.0.tgz + # *************************************************** - name: Install dependencies run: yarn android-install @@ -64,7 +73,16 @@ jobs: node-version: ${{ matrix.node-version }} - name: Pre-build dependencies - run: npm install yarn + run: npm install -g yarn + + # ************ REMOVE AFTER RELEASE ***************** + - name: Build and package binding + working-directory: binding/react-native + run: yarn && yarn pkg + + - name: Add to demo + run: yarn add ../../binding/react-native/pkg/picovoice-cheetah-react-native-2.1.0.tgz + # *************************************************** - name: Install dependencies run: yarn ios-install diff --git a/.github/workflows/react.yml b/.github/workflows/react.yml index cb25b231..d2922627 100644 --- a/.github/workflows/react.yml +++ b/.github/workflows/react.yml @@ -44,10 +44,6 @@ jobs: - name: Pre-build dependencies run: npm install yarn - - name: Build Local Web SDK - run: yarn && yarn copywasm && yarn build - working-directory: binding/web - - name: Install dependencies run: yarn install diff --git a/.github/workflows/swift-codestyle.yml b/.github/workflows/swift-codestyle.yml index 8c51f677..efa06aa1 100644 --- a/.github/workflows/swift-codestyle.yml +++ b/.github/workflows/swift-codestyle.yml @@ -12,7 +12,7 @@ on: - '**/*.swift' jobs: - check-switch-codestyle: + check-swift-codestyle: runs-on: ubuntu-latest steps: @@ -21,4 +21,4 @@ jobs: - name: Check swift codestyle uses: norio-nomura/action-swiftlint@3.2.1 with: - args: lint --config resources/.lint/swift/.swiftlint.yml --strict \ No newline at end of file + args: lint --config resources/.lint/swift/.swiftlint.yml --strict diff --git a/README.md b/README.md index fe152a1c..2334e54b 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ AccessKey also verifies that your usage is within the limits of your account. Ev ## Language Support -- Cheetah Streaming Speech-to-Text currently supports English only. +- Cheetah Streaming Speech-to-Text currently supports English, French, German, Italian, Portuguese, and Spanish. - Support for [additional languages is available for commercial customers](https://picovoice.ai/consulting/) on a case-by-case basis. ## Demos @@ -154,7 +154,11 @@ Replace `"${YOUR_ACCESS_KEY_HERE}"` in the file [MainActivity.java](./demo/andro To run the Cheetah demo on Android or iOS with Flutter, you must have the [Flutter SDK](https://flutter.dev/docs/get-started/install) installed on your system. Once installed, you can run `flutter doctor` to determine any other missing requirements for your relevant platform. Once your environment has been set up, launch a simulator or connect an Android/iOS device. -Before launching the app, use the [copy_assets.sh](./demo/flutter/copy_assets.sh) script to copy the cheetah demo model file into the demo project. (**NOTE**: on Windows, Git Bash or another bash shell is required, or you will have to manually copy the context into the project.). +Run the `prepare_demo` script from [demo/flutter](.) with a language code to set up the demo in the language of your choice (e.g. `de` -> German, `ko` -> Korean). To see a list of available languages, run `prepare_demo` without a language code. + +```console +dart scripts/prepare_demo.dart ${LANGUAGE} +``` Replace `"${YOUR_ACCESS_KEY_HERE}"` in the file [main.dart](./demo/flutter/lib/main.dart) with your `AccessKey`. @@ -828,6 +832,11 @@ function App(props) { ## Releases +### v2.1.0 - December 10th, 2024 + +- Added language support for French, German, Italian, Portuguese and Spanish +- Various bug fixes and performance improvements + ### v2.0.0 - November 27th, 2023 - Improvements to error reporting diff --git a/binding/android/Cheetah/cheetah/build.gradle b/binding/android/Cheetah/cheetah/build.gradle index 253acca1..da7f13c6 100644 --- a/binding/android/Cheetah/cheetah/build.gradle +++ b/binding/android/Cheetah/cheetah/build.gradle @@ -2,7 +2,7 @@ apply plugin: 'com.android.library' ext { PUBLISH_GROUP_ID = 'ai.picovoice' - PUBLISH_VERSION = '2.0.0' + PUBLISH_VERSION = '2.1.0' PUBLISH_ARTIFACT_ID = 'cheetah-android' } diff --git a/binding/android/Cheetah/gradle/wrapper/gradle-wrapper.properties b/binding/android/Cheetah/gradle/wrapper/gradle-wrapper.properties index 1659a764..db13f6a0 100644 --- a/binding/android/Cheetah/gradle/wrapper/gradle-wrapper.properties +++ b/binding/android/Cheetah/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ #Tue Jun 29 22:27:49 PDT 2021 distributionBase=GRADLE_USER_HOME -distributionUrl=https\://services.gradle.org/distributions/gradle-6.7.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip distributionPath=wrapper/dists zipStorePath=wrapper/dists zipStoreBase=GRADLE_USER_HOME diff --git a/binding/android/CheetahTestApp/.gitignore b/binding/android/CheetahTestApp/.gitignore index afb688ed..d31a4874 100644 --- a/binding/android/CheetahTestApp/.gitignore +++ b/binding/android/CheetahTestApp/.gitignore @@ -8,8 +8,8 @@ .externalNativeBuild release test_resources -cheetah_params.pv +cheetah_params*.pv *.wav *.jks -!.dummy.jks \ No newline at end of file +!.dummy.jks diff --git a/binding/android/CheetahTestApp/build.gradle b/binding/android/CheetahTestApp/build.gradle index ebfdad29..01dbd8bf 100644 --- a/binding/android/CheetahTestApp/build.gradle +++ b/binding/android/CheetahTestApp/build.gradle @@ -8,7 +8,7 @@ buildscript { mavenCentral() } dependencies { - classpath 'com.android.tools.build:gradle:7.2.2' + classpath 'com.android.tools.build:gradle:8.2.2' } } diff --git a/binding/android/CheetahTestApp/cheetah-test-app/build.gradle b/binding/android/CheetahTestApp/cheetah-test-app/build.gradle index 2317b40f..d6509098 100644 --- a/binding/android/CheetahTestApp/cheetah-test-app/build.gradle +++ b/binding/android/CheetahTestApp/cheetah-test-app/build.gradle @@ -72,14 +72,54 @@ android { testBuildType("release") } + def testDataFile = file('../../../../resources/.test/test_data.json') + def parsedJson = new groovy.json.JsonSlurper().parseText(testDataFile.text) + def languages = [] + parsedJson.tests.parameters.each { a -> + languages.add(a.language) + } + + flavorDimensions "language" + productFlavors { + en { + getIsDefault().set(true) + } + + languages.each { language -> + "$language" { + applicationIdSuffix ".$language" + + } + } + + all { flavor -> + delete fileTree("$projectDir/src/main/assets") { + exclude '**/.gitkeep' + } + String suffix = (flavor.name != "en") ? "_${flavor.name}" : "" + task("${flavor.name}CopyParams", type: Copy) { + from("$projectDir/../../../../lib/common/") + include("cheetah_params${suffix}.pv") + into("$projectDir/src/main/assets/models") + } + task("${flavor.name}CopyAudio", type: Copy) { + description = "Copy ${flavor.name} audio resources" + from("$projectDir/../../../../resources/audio_samples/") + include("test${suffix}.wav") + into("$projectDir/src/main/assets/audio_samples") + } + } + } sourceSets { androidTest { java { if (System.getProperty("testBuildType", "debug") == "perf") { - exclude "**/CheetahTest.java" + exclude "**/StandardTests.java" exclude "**/IntegrationTest.java" + exclude "**/LanguageTests.java" } else if (System.getProperty("testBuildType", "debug") == "integ") { - exclude "**/CheetahTest.java" + exclude "**/StandardTests.java" + exclude "**/LanguageTests.java" exclude "**/PerformanceTest.java" } else { exclude "**/IntegrationTest.java" @@ -89,18 +129,6 @@ android { } } - task("copyParams", type: Copy) { - from("$projectDir/../../../../lib/common/") - include("cheetah_params.pv") - into("$projectDir/src/main/assets/models") - } - task("copyAudio", type: Copy) { - description = "Copy audio resources" - from("$projectDir/../../../../resources/audio_samples/") - include("test.wav") - into("$projectDir/src/main/assets/audio_samples/") - } - compileOptions { sourceCompatibility JavaVersion.VERSION_1_8 targetCompatibility JavaVersion.VERSION_1_8 @@ -109,6 +137,8 @@ android { lint { abortOnError false } + + namespace 'ai.picovoice.cheetah.testapp' } dependencies { @@ -116,7 +146,8 @@ dependencies { implementation 'androidx.appcompat:appcompat:1.4.2' implementation 'com.google.android.material:material:1.6.1' implementation 'androidx.constraintlayout:constraintlayout:2.1.4' - implementation 'ai.picovoice:cheetah-android:2.0.0' + implementation 'com.google.code.gson:gson:2.10' + implementation 'ai.picovoice:cheetah-android:2.1.0' // Espresso UI Testing androidTestImplementation 'androidx.test.ext:junit:1.1.3' @@ -127,8 +158,15 @@ dependencies { } afterEvaluate { - tasks."mergeDebugAssets".dependsOn "copyParams" - tasks."mergeReleaseAssets".dependsOn "copyParams" - tasks."mergeDebugAssets".dependsOn "copyAudio" - tasks."mergeReleaseAssets".dependsOn "copyAudio" -} \ No newline at end of file + android.productFlavors.all { + flavor -> + tasks."merge${flavor.name.capitalize()}DebugAssets".dependsOn "${flavor.name}CopyParams" + tasks."merge${flavor.name.capitalize()}ReleaseAssets".dependsOn "${flavor.name}CopyParams" + tasks."generate${flavor.name.capitalize()}ReleaseLintVitalReportModel".dependsOn "${flavor.name}CopyParams" + tasks."lintVitalAnalyze${flavor.name.capitalize()}Release".dependsOn "${flavor.name}CopyParams" + tasks."merge${flavor.name.capitalize()}DebugAssets".dependsOn "${flavor.name}CopyAudio" + tasks."merge${flavor.name.capitalize()}ReleaseAssets".dependsOn "${flavor.name}CopyAudio" + tasks."generate${flavor.name.capitalize()}ReleaseLintVitalReportModel".dependsOn "${flavor.name}CopyAudio" + tasks."lintVitalAnalyze${flavor.name.capitalize()}Release".dependsOn "${flavor.name}CopyAudio" + } +} diff --git a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/BaseTest.java b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/BaseTest.java index b8e84b27..e3174cf0 100644 --- a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/BaseTest.java +++ b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/BaseTest.java @@ -1,5 +1,5 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2024 Picovoice Inc. You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. @@ -22,6 +22,7 @@ import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -50,11 +51,58 @@ public void Setup() throws IOException { assetManager = testContext.getAssets(); extractAssetsRecursively("test_resources"); testResourcesPath = new File(appContext.getFilesDir(), "test_resources").getAbsolutePath(); - defaultModelPath = new File(testResourcesPath, "cheetah_params.pv").getAbsolutePath(); + defaultModelPath = new File(testResourcesPath, "model_files/cheetah_params.pv").getAbsolutePath(); accessKey = appContext.getString(R.string.pvTestingAccessKey); } + public static String getTestDataString() throws IOException { + Context testContext = InstrumentationRegistry.getInstrumentation().getContext(); + AssetManager assetManager = testContext.getAssets(); + + InputStream is = new BufferedInputStream(assetManager.open("test_resources/test_data.json"), 256); + ByteArrayOutputStream result = new ByteArrayOutputStream(); + + byte[] buffer = new byte[256]; + int bytesRead; + while ((bytesRead = is.read(buffer)) != -1) { + result.write(buffer, 0, bytesRead); + } + + return result.toString("UTF-8"); + } + + protected static float getWordErrorRate( + String transcript, + String expectedTranscript, + boolean useCER) { + String splitter = (useCER) ? "" : " "; + return (float) levenshteinDistance( + transcript.split(splitter), + expectedTranscript.split(splitter)) / transcript.length(); + } + + private static int levenshteinDistance(String[] words1, String[] words2) { + int[][] res = new int[words1.length + 1][words2.length + 1]; + for (int i = 0; i <= words1.length; i++) { + res[i][0] = i; + } + for (int j = 0; j <= words2.length; j++) { + res[0][j] = j; + } + for (int i = 1; i <= words1.length; i++) { + for (int j = 1; j <= words2.length; j++) { + res[i][j] = Math.min( + Math.min( + res[i - 1][j] + 1, + res[i][j - 1] + 1), + res[i - 1][j - 1] + (words1[i - 1].equalsIgnoreCase(words2[j - 1]) ? 0 : 1) + ); + } + } + return res[words1.length][words2.length]; + } + private void extractAssetsRecursively(String path) throws IOException { String[] list = assetManager.list(path); if (list.length > 0) { diff --git a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/LanguageTests.java b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/LanguageTests.java new file mode 100644 index 00000000..81b22839 --- /dev/null +++ b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/LanguageTests.java @@ -0,0 +1,142 @@ +/* + Copyright 2024 Picovoice Inc. + + You may not use this file except in compliance with the license. A copy of the license is + located in the "LICENSE" file accompanying this source. + + Unless required by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + express or implied. See the License for the specific language governing permissions and + limitations under the License. +*/ + +package ai.picovoice.cheetah.testapp; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import androidx.test.ext.junit.runners.AndroidJUnit4; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import ai.picovoice.cheetah.Cheetah; +import ai.picovoice.cheetah.CheetahException; + + +@RunWith(Parameterized.class) +public class LanguageTests extends BaseTest { + @Parameterized.Parameter(value = 0) + public String language; + + @Parameterized.Parameter(value = 1) + public String modelFile; + + @Parameterized.Parameter(value = 2) + public String testAudioFile; + + @Parameterized.Parameter(value = 3) + public String expectedTranscript; + + @Parameterized.Parameter(value = 4) + public String[] punctuations; + + @Parameterized.Parameter(value = 5) + public float errorRate; + + @Parameterized.Parameters(name = "{0}") + public static Collection initParameters() throws IOException { + String testDataJsonString = getTestDataString(); + + JsonParser parser = new JsonParser(); + JsonObject testDataJson = parser.parse(testDataJsonString).getAsJsonObject(); + JsonArray languageTests = testDataJson + .getAsJsonObject("tests") + .getAsJsonArray("language_tests"); + + List parameters = new ArrayList<>(); + for (int i = 0; i < languageTests.size(); i++) { + JsonObject testData = languageTests.get(i).getAsJsonObject(); + + String language = testData.get("language").getAsString(); + String audioFile = testData.get("audio_file").getAsString(); + String transcript = testData.get("transcript").getAsString(); + float errorRate = testData.get("error_rate").getAsFloat(); + + final JsonArray punctuationsJson = testData.getAsJsonArray("punctuations"); + final String[] punctuations = new String[punctuationsJson.size()]; + for (int j = 0; j < punctuationsJson.size(); j++) { + punctuations[j] = punctuationsJson.get(j).getAsString(); + } + + String modelFile; + if (language.equals("en")) { + modelFile = "model_files/cheetah_params.pv"; + } else { + modelFile = String.format("model_files/cheetah_params_%s.pv", language); + } + + String testAudioFile = String.format("audio_samples/%s", audioFile); + + parameters.add(new Object[]{ + language, + modelFile, + testAudioFile, + transcript, + punctuations, + errorRate + }); + } + + return parameters; + } + + @Test + public void testTranscribe() throws Exception { + String modelPath = new File(testResourcesPath, modelFile).getAbsolutePath(); + Cheetah cheetah = new Cheetah.Builder() + .setAccessKey(accessKey) + .setModelPath(modelPath) + .build(appContext); + + File audioFile = new File(testResourcesPath, testAudioFile); + String result = processTestAudio(cheetah, audioFile); + cheetah.delete(); + + String transcript = expectedTranscript; + for (String punctuation : punctuations) { + transcript = transcript.replace(punctuation, ""); + } + + boolean useCER = language.equals("ja"); + assertTrue(getWordErrorRate(result, transcript, useCER) < errorRate); + } + + @Test + public void testTranscribeWithPunctuation() throws Exception { + String modelPath = new File(testResourcesPath, modelFile).getAbsolutePath(); + Cheetah cheetah = new Cheetah.Builder() + .setAccessKey(accessKey) + .setModelPath(modelPath) + .setEnableAutomaticPunctuation(true) + .build(appContext); + + File audioFile = new File(testResourcesPath, testAudioFile); + String result = processTestAudio(cheetah, audioFile); + cheetah.delete(); + + boolean useCER = language.equals("ja"); + assertTrue(getWordErrorRate(result, expectedTranscript, useCER) < errorRate); + } +} diff --git a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/PerformanceTest.java b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/PerformanceTest.java index d4a76587..87cdb7d3 100644 --- a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/PerformanceTest.java +++ b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/PerformanceTest.java @@ -78,7 +78,7 @@ public void testProcPerformance() throws Exception { .setModelPath(defaultModelPath) .build(appContext); - File testAudio = new File(testResourcesPath, "audio/test.wav"); + File testAudio = new File(testResourcesPath, "audio_samples/test.wav"); long totalNSec = 0; for (int i = 0; i < numTestIterations + 1; i++) { diff --git a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/CheetahTest.java b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/StandardTests.java similarity index 67% rename from binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/CheetahTest.java rename to binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/StandardTests.java index 349ec73f..80fff9ee 100644 --- a/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/CheetahTest.java +++ b/binding/android/CheetahTestApp/cheetah-test-app/src/androidTest/java/ai/picovoice/cheetah/testapp/StandardTests.java @@ -1,5 +1,5 @@ /* - Copyright 2022-2023 Picovoice Inc. + Copyright 2024 Picovoice Inc. You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. @@ -27,42 +27,7 @@ @RunWith(AndroidJUnit4.class) -public class CheetahTest extends BaseTest { - - private final String transcript = - "Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel"; - private final String transcriptWithPunctuation = - "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel."; - - @Test - public void testTranscribe() throws Exception { - Cheetah cheetah = new Cheetah.Builder() - .setAccessKey(accessKey) - .setModelPath(defaultModelPath) - .build(appContext); - - File audioFile = new File(testResourcesPath, "audio/test.wav"); - String result = processTestAudio(cheetah, audioFile); - cheetah.delete(); - - assertEquals(transcript, result); - } - - @Test - public void testTranscribeWithPunctuation() throws Exception { - Cheetah cheetah = new Cheetah.Builder() - .setAccessKey(accessKey) - .setModelPath(defaultModelPath) - .setEnableAutomaticPunctuation(true) - .build(appContext); - - File audioFile = new File(testResourcesPath, "audio/test.wav"); - String result = processTestAudio(cheetah, audioFile); - cheetah.delete(); - - assertEquals(transcriptWithPunctuation, result); - } - +public class StandardTests extends BaseTest { @Test public void getVersion() throws CheetahException { Cheetah cheetah = new Cheetah.Builder() diff --git a/binding/android/CheetahTestApp/copy_test_resources.sh b/binding/android/CheetahTestApp/copy_test_resources.sh index a8cb8148..ad3f16e1 100755 --- a/binding/android/CheetahTestApp/copy_test_resources.sh +++ b/binding/android/CheetahTestApp/copy_test_resources.sh @@ -1,12 +1,20 @@ -if [ ! -d "./cheetah-test-app/src/androidTest/assets/test_resources/audio" ] +if [ ! -d "./cheetah-test-app/src/androidTest/assets/test_resources/audio_samples" ] then echo "Creating test audio samples directory..." - mkdir -p ./cheetah-test-app/src/androidTest/assets/test_resources/audio + mkdir -p ./cheetah-test-app/src/androidTest/assets/test_resources/audio_samples fi echo "Copying test audio samples..." -cp ../../../resources/audio_samples/test.wav ./cheetah-test-app/src/androidTest/assets/test_resources/audio/test.wav +cp ../../../resources/audio_samples/* ./cheetah-test-app/src/androidTest/assets/test_resources/audio_samples/ -echo "Copying cheetah model..." -cp ../../../lib/common/cheetah_params.pv ./cheetah-test-app/src/main/assets/cheetah_params.pv -cp ../../../lib/common/cheetah_params.pv ./cheetah-test-app/src/androidTest/assets/test_resources/cheetah_params.pv +if [ ! -d "./cheetah-test-app/src/androidTest/assets/test_resources/model_files" ] +then + echo "Creating test model files directory..." + mkdir -p ./cheetah-test-app/src/androidTest/assets/test_resources/model_files +fi + +echo "Copying cheetah models..." +cp ../../../lib/common/* ./cheetah-test-app/src/androidTest/assets/test_resources/model_files + +echo "Copying test data file..." +cp ../../../resources/.test/test_data.json ./cheetah-test-app/src/androidTest/assets/test_resources diff --git a/binding/android/CheetahTestApp/gradle/wrapper/gradle-wrapper.properties b/binding/android/CheetahTestApp/gradle/wrapper/gradle-wrapper.properties index 740ab489..3d63a902 100644 --- a/binding/android/CheetahTestApp/gradle/wrapper/gradle-wrapper.properties +++ b/binding/android/CheetahTestApp/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ #Tue Jun 29 23:02:09 PDT 2021 distributionBase=GRADLE_USER_HOME -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip distributionPath=wrapper/dists zipStorePath=wrapper/dists zipStoreBase=GRADLE_USER_HOME diff --git a/binding/android/README.md b/binding/android/README.md index 12d17039..65ce63a2 100644 --- a/binding/android/README.md +++ b/binding/android/README.md @@ -87,6 +87,13 @@ When done, resources have to be released explicitly: cheetah.delete(); ``` +### Language Model + +Add the Cheetah model file to your Android application by: + +1. Either create a model in [Picovoice Console](https://console.picovoice.ai/) or use one of the default language models found in [lib/common](../../lib/common). +2. Add the model as a bundled resource by placing it under the assets directory of your Android project (`src/main/assets/`). + ## Demo App For example usage refer to our [Android demo application](../../demo/android). diff --git a/binding/dotnet/Cheetah/Cheetah.csproj b/binding/dotnet/Cheetah/Cheetah.csproj index 027c5ae3..aff70714 100644 --- a/binding/dotnet/Cheetah/Cheetah.csproj +++ b/binding/dotnet/Cheetah/Cheetah.csproj @@ -1,7 +1,7 @@  - net8.0;net6.0;netcoreapp3.0;netstandard2.0 - 2.0.2 + net8.0;net6.0;netcoreapp3.0;netstandard2.0; + 2.1.0 Picovoice Cheetah Speech-to-Text Engine diff --git a/binding/dotnet/CheetahTest/CheetahTest.csproj b/binding/dotnet/CheetahTest/CheetahTest.csproj index 0fcef2a9..742c8525 100644 --- a/binding/dotnet/CheetahTest/CheetahTest.csproj +++ b/binding/dotnet/CheetahTest/CheetahTest.csproj @@ -6,11 +6,11 @@ - + diff --git a/binding/dotnet/CheetahTest/MainTest.cs b/binding/dotnet/CheetahTest/MainTest.cs index 081218f6..30d041ba 100644 --- a/binding/dotnet/CheetahTest/MainTest.cs +++ b/binding/dotnet/CheetahTest/MainTest.cs @@ -12,12 +12,13 @@ specific language governing permissions and limitations under the License. using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Reflection; -using Fastenshtein; - using Microsoft.VisualStudio.TestTools.UnitTesting; +using Newtonsoft.Json.Linq; + using Pv; namespace CheetahTest @@ -25,63 +26,119 @@ namespace CheetahTest [TestClass] public class MainTest { - private static string ACCESS_KEY; + private static string _accessKey; + private static readonly string ROOT_DIR = Path.Combine(AppContext.BaseDirectory, "../../../../../.."); - private static readonly string _relativeDir = AppContext.BaseDirectory; + [ClassInitialize] + public static void ClassInitialize(TestContext _) + { + _accessKey = Environment.GetEnvironmentVariable("ACCESS_KEY"); + } - private List GetPcmFromFile(string audioFilePath, int expectedSampleRate) + [Serializable] + private class LanguageTestJson { - List data = new List(); - using (BinaryReader reader = new BinaryReader(File.Open(audioFilePath, FileMode.Open))) - { - reader.ReadBytes(24); // skip over part of the header - Assert.AreEqual(reader.ReadInt32(), expectedSampleRate, "Specified sample rate did not match test file."); - reader.ReadBytes(16); // skip over the rest of the header + public string language { get; set; } + public string audio_file { get; set; } + public string transcript { get; set; } - while (reader.BaseStream.Position != reader.BaseStream.Length) - { - data.Add(reader.ReadInt16()); - } - } + public string[] punctuations { get; set; } + public float error_rate { get; set; } + } - return data; + private static JObject LoadJsonTestData() + { + string content = File.ReadAllText(Path.Combine(ROOT_DIR, "resources/.test/test_data.json")); + return JObject.Parse(content); } - public static IEnumerable TestParameters + private static IEnumerable LanguageTestParameters { get { - List testParameters = new List(); + JObject testDataJson = LoadJsonTestData(); + IList languageTestJson = ((JArray)testDataJson["tests"]["language_tests"]).ToObject>(); + return languageTestJson + .Select(x => new object[] { + x.language, + x.audio_file, + x.transcript, + x.punctuations, + x.error_rate, + }); + } + } + + private static string AppendLanguage(string s, string language) + { + return language == "en" ? s : $"{s}_{language}"; + } + + private static int LevenshteinDistance(string[] transcriptWords, string[] referenceWords) + { + int referenceWordsLen = referenceWords.Length; + int transcriptWordsLen = transcriptWords.Length; - string transcript = "Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel"; - string transcriptWithPunctuation = "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel."; + int[,] dp = new int[referenceWordsLen + 1, transcriptWordsLen + 1]; - testParameters.Add(new object[] + for (int i = 0; i <= referenceWordsLen; i++) dp[i, 0] = i; + for (int j = 0; j <= transcriptWordsLen; j++) dp[0, j] = j; + + for (int i = 1; i <= referenceWordsLen; i++) + { + for (int j = 1; j <= transcriptWordsLen; j++) { - "en", - "test.wav", - transcript, - transcriptWithPunctuation, - 0.025f - }); - - return testParameters; + int cost = referenceWords[i - 1].ToUpper() == transcriptWords[j - 1].ToUpper() ? 0 : 1; + + dp[i, j] = Math.Min( + Math.Min(dp[i - 1, j] + 1, dp[i, j - 1] + 1), + dp[i - 1, j - 1] + cost + ); + } } + + return dp[referenceWordsLen, transcriptWordsLen]; } - static float GetErrorRate(string transcript, string referenceTranscript) - => Levenshtein.Distance(transcript, referenceTranscript) / (float)referenceTranscript.Length; + private static double GetErrorRate(string transcript, string referenceTranscript) + { + string[] transcriptWords = transcript.Split(' '); + string[] referenceTranscriptWords = referenceTranscript.Split(' '); - [ClassInitialize] - public static void ClassInitialize(TestContext _) + int editDistance = LevenshteinDistance(transcriptWords, referenceTranscriptWords); + return (double)editDistance / referenceTranscriptWords.Length; + } + + private static string GetModelPath(string language) { - ACCESS_KEY = Environment.GetEnvironmentVariable("ACCESS_KEY"); + return Path.Combine( + ROOT_DIR, + "lib/common", + $"{AppendLanguage("cheetah_params", language)}.pv"); + } + + private List GetPcmFromFile(string audioFilePath, int expectedSampleRate) + { + List data = new List(); + using (BinaryReader reader = new BinaryReader(File.Open(audioFilePath, FileMode.Open))) + { + reader.ReadBytes(24); // skip over part of the header + Assert.AreEqual(reader.ReadInt32(), expectedSampleRate, "Specified sample rate did not match test file."); + reader.ReadBytes(16); // skip over the rest of the header + + while (reader.BaseStream.Position != reader.BaseStream.Length) + { + data.Add(reader.ReadInt16()); + } + } + + return data; } [TestMethod] public void TestVersion() { - using (Cheetah cheetah = Cheetah.Create(ACCESS_KEY)) + using (Cheetah cheetah = Cheetah.Create(_accessKey)) { Assert.IsFalse(string.IsNullOrWhiteSpace(cheetah?.Version), "Cheetah did not return a valid version number."); } @@ -90,7 +147,7 @@ public void TestVersion() [TestMethod] public void TestSampleRate() { - using (Cheetah cheetah = Cheetah.Create(ACCESS_KEY)) + using (Cheetah cheetah = Cheetah.Create(_accessKey)) { int num = 0; Assert.IsTrue(int.TryParse(cheetah.SampleRate.ToString(), out num), "Cheetah did not return a valid sample rate."); @@ -100,7 +157,7 @@ public void TestSampleRate() [TestMethod] public void TestFrameLength() { - using (Cheetah cheetah = Cheetah.Create(ACCESS_KEY)) + using (Cheetah cheetah = Cheetah.Create(_accessKey)) { int num = 0; Assert.IsTrue(int.TryParse(cheetah.FrameLength.ToString(), out num), "Cheetah did not return a valid frame length."); @@ -108,20 +165,21 @@ public void TestFrameLength() } [TestMethod] - [DynamicData(nameof(TestParameters))] + [DynamicData(nameof(LanguageTestParameters))] public void TestProcess( string language, string testAudioFile, string referenceTranscript, - string _, + string[] punctuations, float targetErrorRate) { using (Cheetah cheetah = Cheetah.Create( - accessKey: ACCESS_KEY, + accessKey: _accessKey, + modelPath: GetModelPath(language), endpointDurationSec: 0.2f, enableAutomaticPunctuation: false)) { - string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples", testAudioFile); + string testAudioPath = Path.Combine(ROOT_DIR, "resources/audio_samples", testAudioFile); List pcm = GetPcmFromFile(testAudioPath, cheetah.SampleRate); int frameLen = cheetah.FrameLength; @@ -140,25 +198,32 @@ public void TestProcess( CheetahTranscript finalTranscriptObj = cheetah.Flush(); transcript += finalTranscriptObj.Transcript; - Assert.IsTrue(GetErrorRate(transcript, referenceTranscript) < targetErrorRate); + string normalizedTranscript = referenceTranscript; + foreach (string punctuation in punctuations) + { + normalizedTranscript = normalizedTranscript.Replace(punctuation, ""); + } + + Assert.IsTrue(GetErrorRate(transcript, normalizedTranscript) <= targetErrorRate); } } [TestMethod] - [DynamicData(nameof(TestParameters))] + [DynamicData(nameof(LanguageTestParameters))] public void TestProcessWithPunctuation( string language, string testAudioFile, - string _, string referenceTranscript, + string[] _, float targetErrorRate) { using (Cheetah cheetah = Cheetah.Create( - accessKey: ACCESS_KEY, + accessKey: _accessKey, + modelPath: GetModelPath(language), endpointDurationSec: 0.2f, enableAutomaticPunctuation: true)) { - string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples", testAudioFile); + string testAudioPath = Path.Combine(ROOT_DIR, "resources/audio_samples", testAudioFile); List pcm = GetPcmFromFile(testAudioPath, cheetah.SampleRate); int frameLen = cheetah.FrameLength; @@ -177,50 +242,14 @@ public void TestProcessWithPunctuation( CheetahTranscript finalTranscriptObj = cheetah.Flush(); transcript += finalTranscriptObj.Transcript; - Assert.IsTrue(GetErrorRate(transcript, referenceTranscript) < targetErrorRate); - } - } - - [TestMethod] - [DynamicData(nameof(TestParameters))] - public void TestCustomModel( - string language, - string testAudioFile, - string referenceTranscript, - string _, - float targetErrorRate) - { - string testModelPath = Path.Combine(_relativeDir, "lib/common/cheetah_params.pv"); - using (Cheetah cheetah = Cheetah.Create( - accessKey: ACCESS_KEY, - modelPath: testModelPath, - enableAutomaticPunctuation: false)) - { - string testAudioPath = Path.Combine(_relativeDir, "resources/audio_samples", testAudioFile); - List pcm = GetPcmFromFile(testAudioPath, cheetah.SampleRate); - - int frameLen = cheetah.FrameLength; - int framecount = (int)Math.Floor((float)(pcm.Count / frameLen)); - - string transcript = ""; - for (int i = 0; i < framecount; i++) - { - int start = i * cheetah.FrameLength; - List frame = pcm.GetRange(start, frameLen); - CheetahTranscript transcriptObj = cheetah.Process(frame.ToArray()); - transcript += transcriptObj.Transcript; - } - CheetahTranscript finalTranscriptObj = cheetah.Flush(); - transcript += finalTranscriptObj.Transcript; - - Assert.IsTrue(GetErrorRate(transcript, referenceTranscript) < targetErrorRate); + Assert.IsTrue(GetErrorRate(transcript, referenceTranscript) <= targetErrorRate); } } [TestMethod] public void TestMessageStack() { - string modelPath = Path.Combine(_relativeDir, "lib/common/cheetah_params.pv"); + string modelPath = GetModelPath("en"); Cheetah c; string[] messageList = new string[] { }; @@ -263,10 +292,10 @@ public void TestMessageStack() [TestMethod] public void TestProcessFlushMessageStack() { - string modelPath = Path.Combine(_relativeDir, "lib/common/cheetah_params.pv"); + string modelPath = GetModelPath("en"); Cheetah c = Cheetah.Create( - accessKey: ACCESS_KEY, + accessKey: _accessKey, modelPath: modelPath, enableAutomaticPunctuation: false); short[] testPcm = new short[c.FrameLength]; diff --git a/binding/dotnet/README.md b/binding/dotnet/README.md index a586fdb5..d8f3a0dc 100644 --- a/binding/dotnet/README.md +++ b/binding/dotnet/README.md @@ -102,7 +102,13 @@ using(Cheetah handle = Cheetah.Create(accessKey)) } ``` -The model file contains the parameters for the Cheetah engine. You may create bespoke language models using [Picovoice Console](https://console.picovoice.ai/) and then pass in the relevant file. +### Language Model + +The Cheetah .NET SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. ```csharp using Pv; diff --git a/binding/flutter/CHANGELOG.md b/binding/flutter/CHANGELOG.md index 9f687de6..1e9ed2bc 100644 --- a/binding/flutter/CHANGELOG.md +++ b/binding/flutter/CHANGELOG.md @@ -21,4 +21,7 @@ * Additional gradle plugin build support ## [2.0.2] - 2024-11-28 -* Update native packages \ No newline at end of file +* Update native packages + +## [2.1.0] - 2024-12-09 +* Additional language support \ No newline at end of file diff --git a/binding/flutter/android/build.gradle b/binding/flutter/android/build.gradle index 620ab0ee..e966362e 100644 --- a/binding/flutter/android/build.gradle +++ b/binding/flutter/android/build.gradle @@ -1,5 +1,5 @@ group 'ai.picovoice.flutter.cheetah' -version '2.0.2' +version '2.1.0' buildscript { repositories { @@ -44,5 +44,5 @@ android { } dependencies { - implementation 'ai.picovoice:cheetah-android:2.0.0' + implementation 'ai.picovoice:cheetah-android:2.1.0' } diff --git a/binding/flutter/ios/cheetah_flutter.podspec b/binding/flutter/ios/cheetah_flutter.podspec index 1be796b3..c6daa7e8 100644 --- a/binding/flutter/ios/cheetah_flutter.podspec +++ b/binding/flutter/ios/cheetah_flutter.podspec @@ -1,6 +1,6 @@ Pod::Spec.new do |s| s.name = 'cheetah_flutter' - s.version = '2.0.2' + s.version = '2.1.0' s.summary = 'A Flutter package plugin for Picovoice\'s Cheetah Speech-to-Text engine' s.description = <<-DESC A Flutter package plugin for Picovoice\'s Cheetah Speech-to-Text engine @@ -12,7 +12,7 @@ Pod::Spec.new do |s| s.source_files = 'Classes/**/*' s.platform = :ios, '13.0' s.dependency 'Flutter' - s.dependency 'Cheetah-iOS', '~> 2.0.1' + s.dependency 'Cheetah-iOS', '~> 2.1.0' s.swift_version = '5.0' end diff --git a/binding/flutter/pubspec.lock b/binding/flutter/pubspec.lock index a375c7c7..9a67ba03 100644 --- a/binding/flutter/pubspec.lock +++ b/binding/flutter/pubspec.lock @@ -53,18 +53,10 @@ packages: dependency: transitive description: name: ffi - sha256: "35d0f481d939de0d640b3db9a7aa36a52cd22054a798a73b4f50bdad5ce12678" + sha256: ed5337a5660c506388a9f012be0288fb38b49020ce2b45fe1f8b8323fe429f99 url: "https://pub.dev" source: hosted - version: "1.1.2" - file: - dependency: transitive - description: - name: file - sha256: "9fd2163d866769f60f4df8ac1dc59f52498d810c356fe78022e383dd3c57c0e1" - url: "https://pub.dev" - source: hosted - version: "6.1.0" + version: "2.0.2" flutter: dependency: "direct main" description: flutter @@ -75,6 +67,30 @@ packages: description: flutter source: sdk version: "0.0.0" + leak_tracker: + dependency: transitive + description: + name: leak_tracker + sha256: "3f87a60e8c63aecc975dda1ceedbc8f24de75f09e4856ea27daf8958f2f0ce05" + url: "https://pub.dev" + source: hosted + version: "10.0.5" + leak_tracker_flutter_testing: + dependency: transitive + description: + name: leak_tracker_flutter_testing + sha256: "932549fb305594d82d7183ecd9fa93463e9914e1b67cacc34bc40906594a1806" + url: "https://pub.dev" + source: hosted + version: "3.0.5" + leak_tracker_testing: + dependency: transitive + description: + name: leak_tracker_testing + sha256: "6ba465d5d76e67ddf503e1161d1f4a6bc42306f9d66ca1e8f079a47290fb06d3" + url: "https://pub.dev" + source: hosted + version: "3.0.1" lints: dependency: "direct dev" description: @@ -87,114 +103,98 @@ packages: dependency: transitive description: name: matcher - sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" + sha256: d2323aa2060500f906aa31a895b4030b6da3ebdcc5619d14ce1aada65cd161cb url: "https://pub.dev" source: hosted - version: "0.12.16" + version: "0.12.16+1" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" + sha256: f7142bb1154231d7ea5f96bc7bde4bda2a0945d2806bb11670e30b850d56bdec url: "https://pub.dev" source: hosted - version: "0.5.0" + version: "0.11.1" meta: dependency: transitive description: name: meta - sha256: a6e590c838b18133bb482a2745ad77c5bb7715fb0451209e1a7567d416678b8e + sha256: bdb68674043280c3428e9ec998512fb681678676b3c54e773629ffe74419f8c7 url: "https://pub.dev" source: hosted - version: "1.10.0" + version: "1.15.0" path: dependency: transitive description: name: path - sha256: "8829d8a55c13fc0e37127c29fedf290c102f4e40ae94ada574091fe0ff96c917" + sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af" url: "https://pub.dev" source: hosted - version: "1.8.3" + version: "1.9.0" path_provider: dependency: "direct main" description: name: path_provider - sha256: e92dee4d38a9044605cb3fb253e9b46eb9375dfcad4515d0379b44ac90797568 + sha256: "909b84830485dbcd0308edf6f7368bc8fd76afa26a270420f34cabea2a6467a0" url: "https://pub.dev" source: hosted - version: "2.0.9" + version: "2.1.0" path_provider_android: dependency: transitive description: name: path_provider_android - sha256: "32bbab16092df3bedab89ed9f2c1cfaedf25d96a5036f62f16d5e372890d068c" + sha256: "5d44fc3314d969b84816b569070d7ace0f1dea04bd94a83f74c4829615d22ad8" url: "https://pub.dev" source: hosted - version: "2.0.13" - path_provider_ios: + version: "2.1.0" + path_provider_foundation: dependency: transitive description: - name: path_provider_ios - sha256: "943b76e54056386432cdc2731cb303e2f580346b61a1fc73819721767be72309" + name: path_provider_foundation + sha256: "1b744d3d774e5a879bb76d6cd1ecee2ba2c6960c03b1020cd35212f6aa267ac5" url: "https://pub.dev" source: hosted - version: "2.0.8" + version: "2.3.0" path_provider_linux: dependency: transitive description: name: path_provider_linux - sha256: "1e109f4df28bd95eab71e323008b53d19c4d633bc1ab05b577518773474e9621" + sha256: ba2b77f0c52a33db09fc8caf85b12df691bf28d983e84cf87ff6d693cfa007b3 url: "https://pub.dev" source: hosted - version: "2.1.5" - path_provider_macos: - dependency: transitive - description: - name: path_provider_macos - sha256: eb58b896ea3a504f0b0fa7870646bda6935a6f752b2a54df33f97070dacca8d4 - url: "https://pub.dev" - source: hosted - version: "2.0.0" + version: "2.2.0" path_provider_platform_interface: dependency: transitive description: name: path_provider_platform_interface - sha256: c2af5a8a6369992d915f8933dfc23172071001359d17896e83db8be57db8a397 + sha256: bced5679c7df11190e1ddc35f3222c858f328fff85c3942e46e7f5589bf9eb84 url: "https://pub.dev" source: hosted - version: "2.0.1" + version: "2.1.0" path_provider_windows: dependency: transitive description: name: path_provider_windows - sha256: "366ad4e3541ea707f859e7148d4d5aba67d589d7936cee04a05c464a277eeb27" + sha256: ee0e0d164516b90ae1f970bdf29f726f1aa730d7cfc449ecc74c495378b705da url: "https://pub.dev" source: hosted - version: "2.0.5" + version: "2.2.0" platform: dependency: transitive description: name: platform - sha256: ebc79f16b5f6b609aad4a5e63447d4795d16f7adee46e93ed03200848c006735 + sha256: "57c07bf82207aee366dfaa3867b3164e4f03a238a461a11b0e8a3a510d51203d" url: "https://pub.dev" source: hosted - version: "3.0.0" + version: "3.1.1" plugin_platform_interface: dependency: transitive description: name: plugin_platform_interface - sha256: c2c49e16d42fd6983eb55e44b7f197fdf16b4da7aab7f8e1d21da307cad3fb02 - url: "https://pub.dev" - source: hosted - version: "2.0.0" - process: - dependency: transitive - description: - name: process - sha256: dc3c073b5bc0db4e0f3dbc6b69f8e9cf2f336dafb3db996242ebdacf94c295dd + sha256: "43798d895c929056255600343db8f049921cbec94d31ec87f1dc5c16c01935dd" url: "https://pub.dev" source: hosted - version: "4.2.1" + version: "2.1.5" sky_engine: dependency: transitive description: flutter @@ -244,10 +244,10 @@ packages: dependency: transitive description: name: test_api - sha256: "5c2f730018264d276c20e4f1503fd1308dfbbae39ec8ee63c5236311ac06954b" + sha256: "5b8a98dafc4d5c4c9c72d8b31ab2b23fc13422348d2997120294d3bac86b4ddb" url: "https://pub.dev" source: hosted - version: "0.6.1" + version: "0.7.2" vector_math: dependency: transitive description: @@ -256,30 +256,30 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.4" - web: + vm_service: dependency: transitive description: - name: web - sha256: afe077240a270dcfd2aafe77602b4113645af95d0ad31128cc02bce5ac5d5152 + name: vm_service + sha256: "5c5f338a667b4c644744b661f309fb8080bb94b18a7e91ef1dbd343bed00ed6d" url: "https://pub.dev" source: hosted - version: "0.3.0" + version: "14.2.5" win32: dependency: transitive description: name: win32 - sha256: c0ee29e0f6e4ee5a63983aae753640adc15017b34e50424f8b45063426e19c5b + sha256: "5a751eddf9db89b3e5f9d50c20ab8612296e4e8db69009788d6c8b060a84191c" url: "https://pub.dev" source: hosted - version: "2.0.5" + version: "4.1.4" xdg_directories: dependency: transitive description: name: xdg_directories - sha256: "0186b3f2d66be9a12b0295bddcf8b6f8c0b0cc2f85c6287344e2a6366bc28457" + sha256: f0c26453a2d47aa4c2570c6a033246a3fc62da2fe23c7ffdd0a7495086dc0247 url: "https://pub.dev" source: hosted - version: "0.2.0" + version: "1.0.2" sdks: - dart: ">=3.2.0-194.0.dev <4.0.0" - flutter: ">=2.8.1" + dart: ">=3.3.0 <4.0.0" + flutter: ">=3.18.0-18.0.pre.54" diff --git a/binding/flutter/pubspec.yaml b/binding/flutter/pubspec.yaml index 1621e0b4..47d38d77 100644 --- a/binding/flutter/pubspec.yaml +++ b/binding/flutter/pubspec.yaml @@ -1,6 +1,6 @@ name: cheetah_flutter description: A Flutter plugin for Picovoice's Cheetah Speech-to-Text engine -version: 2.0.2 +version: 2.1.0 homepage: https://picovoice.ai/ repository: https://github.com/Picovoice/cheetah/ documentation: https://picovoice.ai/docs/cheetah/ diff --git a/binding/go/README.md b/binding/go/README.md index bf6e63fa..3fb5c3f1 100644 --- a/binding/go/README.md +++ b/binding/go/README.md @@ -63,6 +63,14 @@ for { Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/). When done be sure to explicitly release the resources using `cheetah.Delete()`. +### Language Model + +The Cheetah Go SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + ## Demos Check out the Cheetah Go demos [here](https://github.com/Picovoice/cheetah/tree/master/demo/go). diff --git a/binding/go/cheetah_test.go b/binding/go/cheetah_test.go index 5e1eed27..034b5f5c 100644 --- a/binding/go/cheetah_test.go +++ b/binding/go/cheetah_test.go @@ -1,4 +1,4 @@ -// Copyright 2022-2023 Picovoice Inc. +// Copyright 2022-2024 Picovoice Inc. // // You may not use this file except in compliance with the license. A copy of the license is // located in the "LICENSE" file accompanying this source. @@ -15,6 +15,7 @@ package cheetah import ( "encoding/binary" + "encoding/json" "flag" "io/ioutil" "log" @@ -23,22 +24,20 @@ import ( "reflect" "strings" "testing" - - "github.com/agnivade/levenshtein" ) -type TestParameters struct { +type LanguageTests struct { language string testAudioFile string transcript string + punctuations []string errorRate float32 - enableAutomaticPunctuation bool } var ( testAccessKey string cheetah Cheetah - processTestParameters []TestParameters + languageTests []LanguageTests ) func TestMain(m *testing.M) { @@ -46,38 +45,53 @@ func TestMain(m *testing.M) { flag.StringVar(&testAccessKey, "access_key", "", "AccessKey for testing") flag.Parse() - processTestParameters = loadTestData() + languageTests = loadTestData() os.Exit(m.Run()) } -func loadTestData() []TestParameters { - punctuations := []string{"."} - transcript := "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel." +func appendLanguage(s string, language string) string { + if language == "en" { + return s + } else { + return s + "_" + language + } +} - testCaseWithPunctuation := TestParameters{ - language: "en", - testAudioFile: "test.wav", - transcript: transcript, - enableAutomaticPunctuation: true, - errorRate: 0.025, +func loadTestData() []LanguageTests { + content, err := ioutil.ReadFile("../../resources/.test/test_data.json") + if err != nil { + log.Fatalf("Could not read test data json: %v", err) } - processTestParameters = append(processTestParameters, testCaseWithPunctuation) - transcriptWithoutPunctuation := transcript - for _, p := range punctuations { - transcriptWithoutPunctuation = strings.ReplaceAll(transcriptWithoutPunctuation, p, "") + var testData struct { + Tests struct { + LanguageTests []struct { + Language string `json:"language"` + AudioFile string `json:"audio_file"` + Transcript string `json:"transcript"` + Punctuations []string `json:"punctuations"` + ErrorRate float32 `json:"error_rate"` + } `json:"language_tests"` + } `json:"tests"` } + err = json.Unmarshal(content, &testData) + if err != nil { + log.Fatalf("Could not decode test data json: %v", err) + } + + for _, x := range testData.Tests.LanguageTests { + languageTestParameters := LanguageTests{ + language: x.Language, + testAudioFile: x.AudioFile, + transcript: x.Transcript, + punctuations: x.Punctuations, + errorRate: x.ErrorRate, + } - testCaseWithoutPunctuation := TestParameters{ - language: "en", - testAudioFile: "test.wav", - transcript: transcriptWithoutPunctuation, - enableAutomaticPunctuation: false, - errorRate: 0.025, + languageTests = append(languageTests, languageTestParameters) } - processTestParameters = append(processTestParameters, testCaseWithoutPunctuation) - return processTestParameters + return languageTests } func TestVersion(t *testing.T) { @@ -102,15 +116,62 @@ func TestVersion(t *testing.T) { } } +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func levenshteinDistance(transcriptWords, referenceWords []string) int { + m, n := len(transcriptWords), len(referenceWords) + dp := make([][]int, m+1) + for i := range dp { + dp[i] = make([]int, n+1) + } + + for i := 0; i <= m; i++ { + dp[i][0] = i + } + for j := 0; j <= n; j++ { + dp[0][j] = j + } + + for i := 1; i <= m; i++ { + for j := 1; j <= n; j++ { + cost := 0 + if !strings.EqualFold(transcriptWords[i-1], referenceWords[j-1]) { + cost = 1 + } + dp[i][j] = min(dp[i-1][j]+1, + min(dp[i][j-1]+1, + dp[i-1][j-1]+cost)) + } + } + return dp[m][n] +} + +func getWordErrorRate(transcript, reference string) float32 { + transcriptWords := strings.Fields(transcript) + referenceWords := strings.Fields(reference) + + dist := levenshteinDistance(transcriptWords, referenceWords) + return float32(dist) / float32(len(referenceWords)) +} + func runProcessTestCase( t *testing.T, - _ string, + language string, testAudioFile string, referenceTranscript string, + punctuations []string, targetErrorRate float32, enableAutomaticPunctuation bool) { + modelPath, _ := filepath.Abs(filepath.Join("../../lib/common", appendLanguage("cheetah_params", language)+".pv")) + cheetah = NewCheetah(testAccessKey) + cheetah.ModelPath = modelPath cheetah.EnableAutomaticPunctuation = enableAutomaticPunctuation err := cheetah.Init() if err != nil { @@ -157,15 +218,28 @@ func runProcessTestCase( } transcript += final - errorRate := float32(levenshtein.ComputeDistance(transcript, referenceTranscript)) / float32(len(referenceTranscript)) + var normalizedTranscript = referenceTranscript + if !enableAutomaticPunctuation { + for _, punctuation := range punctuations { + normalizedTranscript = strings.ReplaceAll(normalizedTranscript, punctuation, "") + } + } + + errorRate := getWordErrorRate(transcript, normalizedTranscript) if errorRate >= targetErrorRate { t.Fatalf("Expected '%f' got '%f'", targetErrorRate, errorRate) } } func TestProcess(t *testing.T) { - for _, test := range processTestParameters { - runProcessTestCase(t, test.language, test.testAudioFile, test.transcript, test.errorRate, test.enableAutomaticPunctuation) + for _, test := range languageTests { + runProcessTestCase(t, test.language, test.testAudioFile, test.transcript, test.punctuations, test.errorRate, false) + } +} + +func TestProcessWithPunctuation(t *testing.T) { + for _, test := range languageTests { + runProcessTestCase(t, test.language, test.testAudioFile, test.transcript, test.punctuations, test.errorRate, true) } } diff --git a/binding/go/embedded/lib/common/cheetah_params.pv b/binding/go/embedded/lib/common/cheetah_params.pv index 15e2bd90..b18a3d5e 100644 Binary files a/binding/go/embedded/lib/common/cheetah_params.pv and b/binding/go/embedded/lib/common/cheetah_params.pv differ diff --git a/binding/go/embedded/lib/linux/x86_64/libpv_cheetah.so b/binding/go/embedded/lib/linux/x86_64/libpv_cheetah.so index 1982e371..ad55e67d 100755 Binary files a/binding/go/embedded/lib/linux/x86_64/libpv_cheetah.so and b/binding/go/embedded/lib/linux/x86_64/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/mac/arm64/libpv_cheetah.dylib b/binding/go/embedded/lib/mac/arm64/libpv_cheetah.dylib index d170298b..4d247b51 100755 Binary files a/binding/go/embedded/lib/mac/arm64/libpv_cheetah.dylib and b/binding/go/embedded/lib/mac/arm64/libpv_cheetah.dylib differ diff --git a/binding/go/embedded/lib/mac/x86_64/libpv_cheetah.dylib b/binding/go/embedded/lib/mac/x86_64/libpv_cheetah.dylib index b17cbc17..0b000712 100755 Binary files a/binding/go/embedded/lib/mac/x86_64/libpv_cheetah.dylib and b/binding/go/embedded/lib/mac/x86_64/libpv_cheetah.dylib differ diff --git a/binding/go/embedded/lib/raspberry-pi/cortex-a53-aarch64/libpv_cheetah.so b/binding/go/embedded/lib/raspberry-pi/cortex-a53-aarch64/libpv_cheetah.so index 284378ac..3adfcc95 100755 Binary files a/binding/go/embedded/lib/raspberry-pi/cortex-a53-aarch64/libpv_cheetah.so and b/binding/go/embedded/lib/raspberry-pi/cortex-a53-aarch64/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/raspberry-pi/cortex-a53/libpv_cheetah.so b/binding/go/embedded/lib/raspberry-pi/cortex-a53/libpv_cheetah.so index 3ca6b4f6..1d7483a7 100755 Binary files a/binding/go/embedded/lib/raspberry-pi/cortex-a53/libpv_cheetah.so and b/binding/go/embedded/lib/raspberry-pi/cortex-a53/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/raspberry-pi/cortex-a72-aarch64/libpv_cheetah.so b/binding/go/embedded/lib/raspberry-pi/cortex-a72-aarch64/libpv_cheetah.so index c60a61be..91d6ed1b 100755 Binary files a/binding/go/embedded/lib/raspberry-pi/cortex-a72-aarch64/libpv_cheetah.so and b/binding/go/embedded/lib/raspberry-pi/cortex-a72-aarch64/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/raspberry-pi/cortex-a72/libpv_cheetah.so b/binding/go/embedded/lib/raspberry-pi/cortex-a72/libpv_cheetah.so index 3cb1d075..6c6feec2 100755 Binary files a/binding/go/embedded/lib/raspberry-pi/cortex-a72/libpv_cheetah.so and b/binding/go/embedded/lib/raspberry-pi/cortex-a72/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/raspberry-pi/cortex-a76-aarch64/libpv_cheetah.so b/binding/go/embedded/lib/raspberry-pi/cortex-a76-aarch64/libpv_cheetah.so index 6256aca0..9b1b26d4 100755 Binary files a/binding/go/embedded/lib/raspberry-pi/cortex-a76-aarch64/libpv_cheetah.so and b/binding/go/embedded/lib/raspberry-pi/cortex-a76-aarch64/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/raspberry-pi/cortex-a76/libpv_cheetah.so b/binding/go/embedded/lib/raspberry-pi/cortex-a76/libpv_cheetah.so index fad94bd9..7e880624 100755 Binary files a/binding/go/embedded/lib/raspberry-pi/cortex-a76/libpv_cheetah.so and b/binding/go/embedded/lib/raspberry-pi/cortex-a76/libpv_cheetah.so differ diff --git a/binding/go/embedded/lib/windows/amd64/libpv_cheetah.dll b/binding/go/embedded/lib/windows/amd64/libpv_cheetah.dll index 282f8edd..9f73a6b0 100644 Binary files a/binding/go/embedded/lib/windows/amd64/libpv_cheetah.dll and b/binding/go/embedded/lib/windows/amd64/libpv_cheetah.dll differ diff --git a/binding/go/go.mod b/binding/go/go.mod index 78edfb1e..e45ae646 100644 --- a/binding/go/go.mod +++ b/binding/go/go.mod @@ -1,5 +1,3 @@ module github.com/Picovoice/cheetah/binding/go/v2 go 1.16 - -require github.com/agnivade/levenshtein v1.1.1 diff --git a/binding/go/go.sum b/binding/go/go.sum index 544fa859..e69de29b 100644 --- a/binding/go/go.sum +++ b/binding/go/go.sum @@ -1,6 +0,0 @@ -github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8= -github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo= -github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= -github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= -github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g= -github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= diff --git a/binding/ios/Cheetah-iOS.podspec b/binding/ios/Cheetah-iOS.podspec index dc4b1bb6..19843135 100644 --- a/binding/ios/Cheetah-iOS.podspec +++ b/binding/ios/Cheetah-iOS.podspec @@ -1,7 +1,7 @@ Pod::Spec.new do |s| s.name = 'Cheetah-iOS' s.module_name = 'Cheetah' - s.version = '2.0.1' + s.version = '2.1.0' s.license = {:type => 'Apache 2.0'} s.summary = 'iOS SDK for Picovoice\'s Cheetah speech-to-text engine.' s.description = diff --git a/binding/ios/CheetahAppTest/.gitignore b/binding/ios/CheetahAppTest/.gitignore index 5b281327..38ef34e4 100644 --- a/binding/ios/CheetahAppTest/.gitignore +++ b/binding/ios/CheetahAppTest/.gitignore @@ -18,3 +18,6 @@ build/* xcuserdata Pods ddp + +CheetahAppTestUITests/test_resources/* +!CheetahAppTestUITests/test_resources/.gitkeep diff --git a/binding/ios/CheetahAppTest/CheetahAppTest.xcodeproj/project.pbxproj b/binding/ios/CheetahAppTest/CheetahAppTest.xcodeproj/project.pbxproj index 7dcebef6..f5451e80 100644 --- a/binding/ios/CheetahAppTest/CheetahAppTest.xcodeproj/project.pbxproj +++ b/binding/ios/CheetahAppTest/CheetahAppTest.xcodeproj/project.pbxproj @@ -3,18 +3,17 @@ archiveVersion = 1; classes = { }; - objectVersion = 55; + objectVersion = 60; objects = { /* Begin PBXBuildFile section */ + 072F6D492D02533000E29DF3 /* test_resources in Resources */ = {isa = PBXBuildFile; fileRef = 072F6D482D02533000E29DF3 /* test_resources */; }; 1E00655827CFF7EB006FF6E9 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1E00655727CFF7EB006FF6E9 /* AppDelegate.swift */; }; 1E00655C27CFF7EB006FF6E9 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1E00655B27CFF7EB006FF6E9 /* ViewController.swift */; }; 1E00655F27CFF7EB006FF6E9 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1E00655D27CFF7EB006FF6E9 /* Main.storyboard */; }; 1E00656127CFF7EC006FF6E9 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 1E00656027CFF7EC006FF6E9 /* Assets.xcassets */; }; 1E00656427CFF7EC006FF6E9 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1E00656227CFF7EC006FF6E9 /* LaunchScreen.storyboard */; }; 1E00657927CFF7EC006FF6E9 /* CheetahAppTestUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1E00657827CFF7EC006FF6E9 /* CheetahAppTestUITests.swift */; }; - 1E00658A27CFFAB3006FF6E9 /* test.wav in Resources */ = {isa = PBXBuildFile; fileRef = 1E00658927CFFAB3006FF6E9 /* test.wav */; }; - 1E00658C27CFFABD006FF6E9 /* cheetah_params.pv in Resources */ = {isa = PBXBuildFile; fileRef = 1E00658B27CFFABD006FF6E9 /* cheetah_params.pv */; }; 1E5B7B332800F75900F8BDDB /* PerformanceTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1E5B7B322800F75900F8BDDB /* PerformanceTest.swift */; }; 1E5B7B362800F77300F8BDDB /* test.wav in Resources */ = {isa = PBXBuildFile; fileRef = 1E5B7B352800F77300F8BDDB /* test.wav */; }; 1E5B7B382800F7AA00F8BDDB /* cheetah_params.pv in Resources */ = {isa = PBXBuildFile; fileRef = 1E5B7B372800F7AA00F8BDDB /* cheetah_params.pv */; }; @@ -42,6 +41,21 @@ /* End PBXContainerItemProxy section */ /* Begin PBXFileReference section */ + 072F6D482D02533000E29DF3 /* test_resources */ = {isa = PBXFileReference; lastKnownFileType = folder; name = test_resources; path = CheetahAppTestUITests/test_resources; sourceTree = ""; }; + 07E777C32D02632A005A41E7 /* test.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = test.wav; sourceTree = ""; }; + 07E777C42D02632A005A41E7 /* test_de.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = test_de.wav; sourceTree = ""; }; + 07E777C52D02632A005A41E7 /* test_es.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = test_es.wav; sourceTree = ""; }; + 07E777C62D02632A005A41E7 /* test_fr.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = test_fr.wav; sourceTree = ""; }; + 07E777C72D02632A005A41E7 /* test_it.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = test_it.wav; sourceTree = ""; }; + 07E777C82D02632A005A41E7 /* test_pt.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = test_pt.wav; sourceTree = ""; }; + 07E777CA2D02632A005A41E7 /* cheetah_params.pv */ = {isa = PBXFileReference; lastKnownFileType = file; path = cheetah_params.pv; sourceTree = ""; }; + 07E777CB2D02632A005A41E7 /* cheetah_params_de.pv */ = {isa = PBXFileReference; lastKnownFileType = file; path = cheetah_params_de.pv; sourceTree = ""; }; + 07E777CC2D02632A005A41E7 /* cheetah_params_es.pv */ = {isa = PBXFileReference; lastKnownFileType = file; path = cheetah_params_es.pv; sourceTree = ""; }; + 07E777CD2D02632A005A41E7 /* cheetah_params_fr.pv */ = {isa = PBXFileReference; lastKnownFileType = file; path = cheetah_params_fr.pv; sourceTree = ""; }; + 07E777CE2D02632A005A41E7 /* cheetah_params_it.pv */ = {isa = PBXFileReference; lastKnownFileType = file; path = cheetah_params_it.pv; sourceTree = ""; }; + 07E777CF2D02632A005A41E7 /* cheetah_params_pt.pv */ = {isa = PBXFileReference; lastKnownFileType = file; path = cheetah_params_pt.pv; sourceTree = ""; }; + 07E777D12D02632A005A41E7 /* .gitkeep */ = {isa = PBXFileReference; lastKnownFileType = text; path = .gitkeep; sourceTree = ""; }; + 07E777D22D02632A005A41E7 /* test_data.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = test_data.json; sourceTree = ""; }; 1E00655427CFF7EB006FF6E9 /* CheetahAppTest.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = CheetahAppTest.app; sourceTree = BUILT_PRODUCTS_DIR; }; 1E00655727CFF7EB006FF6E9 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 1E00655B27CFF7EB006FF6E9 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; @@ -52,8 +66,6 @@ 1E00657427CFF7EC006FF6E9 /* CheetahAppTestUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = CheetahAppTestUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 1E00657827CFF7EC006FF6E9 /* CheetahAppTestUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CheetahAppTestUITests.swift; sourceTree = ""; }; 1E00658827CFFA3C006FF6E9 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; - 1E00658927CFFAB3006FF6E9 /* test.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; name = test.wav; path = ../../../../resources/audio_samples/test.wav; sourceTree = ""; }; - 1E00658B27CFFABD006FF6E9 /* cheetah_params.pv */ = {isa = PBXFileReference; lastKnownFileType = file; name = cheetah_params.pv; path = ../../../../lib/common/cheetah_params.pv; sourceTree = ""; }; 1E5B7B302800F6FE00F8BDDB /* PerformanceTest.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = PerformanceTest.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 1E5B7B322800F75900F8BDDB /* PerformanceTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PerformanceTest.swift; sourceTree = ""; }; 1E5B7B342800F76700F8BDDB /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; @@ -90,9 +102,47 @@ /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ + 07E777C92D02632A005A41E7 /* audio_samples */ = { + isa = PBXGroup; + children = ( + 07E777C32D02632A005A41E7 /* test.wav */, + 07E777C42D02632A005A41E7 /* test_de.wav */, + 07E777C52D02632A005A41E7 /* test_es.wav */, + 07E777C62D02632A005A41E7 /* test_fr.wav */, + 07E777C72D02632A005A41E7 /* test_it.wav */, + 07E777C82D02632A005A41E7 /* test_pt.wav */, + ); + path = audio_samples; + sourceTree = ""; + }; + 07E777D02D02632A005A41E7 /* model_files */ = { + isa = PBXGroup; + children = ( + 07E777CA2D02632A005A41E7 /* cheetah_params.pv */, + 07E777CB2D02632A005A41E7 /* cheetah_params_de.pv */, + 07E777CC2D02632A005A41E7 /* cheetah_params_es.pv */, + 07E777CD2D02632A005A41E7 /* cheetah_params_fr.pv */, + 07E777CE2D02632A005A41E7 /* cheetah_params_it.pv */, + 07E777CF2D02632A005A41E7 /* cheetah_params_pt.pv */, + ); + path = model_files; + sourceTree = ""; + }; + 07E777D32D02632A005A41E7 /* test_resources */ = { + isa = PBXGroup; + children = ( + 07E777C92D02632A005A41E7 /* audio_samples */, + 07E777D02D02632A005A41E7 /* model_files */, + 07E777D12D02632A005A41E7 /* .gitkeep */, + 07E777D22D02632A005A41E7 /* test_data.json */, + ); + path = test_resources; + sourceTree = ""; + }; 1E00654B27CFF7EB006FF6E9 = { isa = PBXGroup; children = ( + 072F6D482D02533000E29DF3 /* test_resources */, 1E5B7B312800F71E00F8BDDB /* PerformanceTest */, 1E00655627CFF7EB006FF6E9 /* CheetahAppTest */, 1E00657727CFF7EC006FF6E9 /* CheetahAppTestUITests */, @@ -127,8 +177,7 @@ 1E00657727CFF7EC006FF6E9 /* CheetahAppTestUITests */ = { isa = PBXGroup; children = ( - 1E00658927CFFAB3006FF6E9 /* test.wav */, - 1E00658B27CFFABD006FF6E9 /* cheetah_params.pv */, + 07E777D32D02632A005A41E7 /* test_resources */, 1E00657827CFF7EC006FF6E9 /* CheetahAppTestUITests.swift */, 1E00658827CFFA3C006FF6E9 /* Info.plist */, ); @@ -242,7 +291,7 @@ ); mainGroup = 1E00654B27CFF7EB006FF6E9; packageReferences = ( - E15A377C2CF7B6D600A96F85 /* XCRemoteSwiftPackageReference "cheetah" */, + 07BA482F2D01233800A57078 /* XCLocalSwiftPackageReference "../../../../cheetah" */, ); productRefGroup = 1E00655527CFF7EB006FF6E9 /* Products */; projectDirPath = ""; @@ -270,8 +319,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 1E00658C27CFFABD006FF6E9 /* cheetah_params.pv in Resources */, - 1E00658A27CFFAB3006FF6E9 /* test.wav in Resources */, + 072F6D492D02533000E29DF3 /* test_resources in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -524,7 +572,7 @@ buildSettings = { CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = 65723695GD; + DEVELOPMENT_TEAM = 8TK4L7UF2X; GENERATE_INFOPLIST_FILE = YES; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", @@ -546,7 +594,7 @@ buildSettings = { CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = 65723695GD; + DEVELOPMENT_TEAM = 8TK4L7UF2X; GENERATE_INFOPLIST_FILE = YES; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", @@ -652,23 +700,16 @@ /* End XCConfigurationList section */ /* Begin XCLocalSwiftPackageReference section */ + 07BA482F2D01233800A57078 /* XCLocalSwiftPackageReference "../../../../cheetah" */ = { + isa = XCLocalSwiftPackageReference; + relativePath = ../../../../cheetah; + }; E1E0C7FE2CF11FD9001D6D84 /* XCLocalSwiftPackageReference "../../../../cheetah" */ = { isa = XCLocalSwiftPackageReference; relativePath = ../../../../cheetah; }; /* End XCLocalSwiftPackageReference section */ -/* Begin XCRemoteSwiftPackageReference section */ - E15A377C2CF7B6D600A96F85 /* XCRemoteSwiftPackageReference "cheetah" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://github.com/Picovoice/cheetah"; - requirement = { - kind = exactVersion; - version = 2.0.1; - }; - }; -/* End XCRemoteSwiftPackageReference section */ - /* Begin XCSwiftPackageProductDependency section */ E1E0C8002CF120B0001D6D84 /* Cheetah */ = { isa = XCSwiftPackageProductDependency; diff --git a/binding/ios/CheetahAppTest/CheetahAppTestUITests/CheetahAppTestUITests.swift b/binding/ios/CheetahAppTest/CheetahAppTestUITests/CheetahAppTestUITests.swift index 867fa896..4a1a4d35 100644 --- a/binding/ios/CheetahAppTest/CheetahAppTestUITests/CheetahAppTestUITests.swift +++ b/binding/ios/CheetahAppTest/CheetahAppTestUITests/CheetahAppTestUITests.swift @@ -11,18 +11,80 @@ import AVFoundation import XCTest import Cheetah +extension String { + subscript(index: Int) -> Character { + return self[self.index(self.startIndex, offsetBy: index)] + } +} + +extension String { + public func levenshtein(_ other: String) -> Int { + let sCount = self.count + let oCount = other.count + + guard sCount != 0 else { + return oCount + } + + guard oCount != 0 else { + return sCount + } + + let line: [Int] = Array(repeating: 0, count: oCount + 1) + var mat: [[Int]] = Array(repeating: line, count: sCount + 1) + + for i in 0...sCount { + mat[i][0] = i + } + + for j in 0...oCount { + mat[0][j] = j + } + + for j in 1...oCount { + for i in 1...sCount { + if self[i - 1] == other[j - 1] { + mat[i][j] = mat[i - 1][j - 1] // no operation + } else { + let del = mat[i - 1][j] + 1 // deletion + let ins = mat[i][j - 1] + 1 // insertion + let sub = mat[i - 1][j - 1] + 1 // substitution + mat[i][j] = min(min(del, ins), sub) + } + } + } + + return mat[sCount][oCount] + } +} + +struct TestData: Decodable { + var tests: Tests +} + +struct Tests: Decodable { + var language_tests: [LanguageTest] +} + +struct LanguageTest: Decodable { + var language: String + var audio_file: String + var transcript: String + var punctuations: [String] + var error_rate: Float +} + class CheetahDemoUITests: XCTestCase { let accessKey: String = "{TESTING_ACCESS_KEY_HERE}" - let transcript: String = "Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel" - let transcriptWithPunctuation: String = - "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel." - - let modelURL = Bundle(for: CheetahDemoUITests.self).url(forResource: "cheetah_params", withExtension: "pv")! override func setUpWithError() throws { continueAfterFailure = true } + func characterErrorRate(transcript: String, expectedTranscript: String) -> Float { + return Float(transcript.levenshtein(expectedTranscript)) / Float(expectedTranscript.count) + } + func processFile(cheetah: Cheetah, fileURL: URL) throws -> String { let data = try Data(contentsOf: fileURL) let frameLengthBytes = Int(Cheetah.frameLength) * 2 @@ -44,43 +106,88 @@ class CheetahDemoUITests: XCTestCase { return res } - func testTranscribe() throws { - let cheetah = try Cheetah(accessKey: accessKey, modelURL: modelURL) - + func runTestTranscribe( + modelPath: String, + testAudio: String, + expectedTranscript: String, + errorRate: Float, + enableAutomaticPunctuation: Bool = false + ) throws { let bundle = Bundle(for: type(of: self)) - let fileURL: URL = bundle.url(forResource: "test", withExtension: "wav")! - let res: String = try processFile(cheetah: cheetah, fileURL: fileURL) - cheetah.delete() + let audioFileURL: URL = bundle.url( + forResource: testAudio, + withExtension: "", + subdirectory: "test_resources/audio_samples")! - XCTAssertEqual(transcript, res) - } - - func testTranscribeCustomEndpointDuration() throws { let cheetah = try Cheetah( accessKey: accessKey, - modelURL: modelURL, - endpointDuration: 1.5) + modelPath: modelPath, + enableAutomaticPunctuation: enableAutomaticPunctuation) - let bundle = Bundle(for: type(of: self)) - let fileURL: URL = bundle.url(forResource: "test", withExtension: "wav")! - let res: String = try processFile(cheetah: cheetah, fileURL: fileURL) + let res: String = try processFile(cheetah: cheetah, fileURL: audioFileURL) cheetah.delete() - XCTAssertEqual(transcript, res) + XCTAssert(characterErrorRate( + transcript: res, + expectedTranscript: expectedTranscript) < errorRate) } - func testTranscribeWithPunctuation() throws { - let cheetah = try Cheetah( - accessKey: accessKey, - modelURL: modelURL, - enableAutomaticPunctuation: true) - + func testTranscribe() throws { let bundle = Bundle(for: type(of: self)) - let fileURL: URL = bundle.url(forResource: "test", withExtension: "wav")! - let res: String = try processFile(cheetah: cheetah, fileURL: fileURL) - cheetah.delete() + let testDataJsonUrl = bundle.url( + forResource: "test_data", + withExtension: "json", + subdirectory: "test_resources")! + let testDataJsonData = try Data(contentsOf: testDataJsonUrl) + let testData = try JSONDecoder().decode(TestData.self, from: testDataJsonData) + + for testCase in testData.tests.language_tests { + let suffix = testCase.language == "en" ? "" : "_\(testCase.language)" + let modelPath: String = bundle.path( + forResource: "cheetah_params\(suffix)", + ofType: "pv", + inDirectory: "test_resources/model_files")! + + var expectedTranscript = testCase.transcript + for p in testCase.punctuations { + expectedTranscript = expectedTranscript.replacingOccurrences(of: p, with: "") + } + + try XCTContext.runActivity(named: "(\(testCase.language))") { _ in + try runTestTranscribe( + modelPath: modelPath, + testAudio: testCase.audio_file, + expectedTranscript: expectedTranscript, + errorRate: testCase.error_rate) + } + } + } - XCTAssertEqual(transcriptWithPunctuation, res) + func testTranscribeWithPunctuation() throws { + let bundle = Bundle(for: type(of: self)) + let testDataJsonUrl = bundle.url( + forResource: "test_data", + withExtension: "json", + subdirectory: "test_resources")! + let testDataJsonData = try Data(contentsOf: testDataJsonUrl) + let testData = try JSONDecoder().decode(TestData.self, from: testDataJsonData) + + for testCase in testData.tests.language_tests { + let suffix = testCase.language == "en" ? "" : "_\(testCase.language)" + let modelPath: String = bundle.path( + forResource: "cheetah_params\(suffix)", + ofType: "pv", + inDirectory: "test_resources/model_files")! + + try XCTContext.runActivity(named: "(\(testCase.language))") { _ in + try runTestTranscribe( + modelPath: modelPath, + testAudio: testCase.audio_file, + expectedTranscript: testCase.transcript, + errorRate: testCase.error_rate, + enableAutomaticPunctuation: false) + } + } } func testFrameLength() throws { @@ -96,6 +203,12 @@ class CheetahDemoUITests: XCTestCase { } func testMessageStack() throws { + let bundle = Bundle(for: type(of: self)) + let modelURL: URL = bundle.url( + forResource: "cheetah_params", + withExtension: "pv", + subdirectory: "test_resources/model_files")! + var first_error: String = "" do { let cheetah = try Cheetah.init(accessKey: "invalid", modelURL: modelURL) @@ -114,6 +227,12 @@ class CheetahDemoUITests: XCTestCase { } func testProcessMessageStack() throws { + let bundle = Bundle(for: type(of: self)) + let modelURL: URL = bundle.url( + forResource: "cheetah_params", + withExtension: "pv", + subdirectory: "test_resources/model_files")! + let cheetah = try Cheetah(accessKey: accessKey, modelURL: modelURL) cheetah.delete() diff --git a/binding/ios/CheetahAppTest/CheetahAppTestUITests/test_resources/.gitkeep b/binding/ios/CheetahAppTest/CheetahAppTestUITests/test_resources/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/binding/ios/CheetahAppTest/copy_test_resources.sh b/binding/ios/CheetahAppTest/copy_test_resources.sh new file mode 100755 index 00000000..73ac4fdf --- /dev/null +++ b/binding/ios/CheetahAppTest/copy_test_resources.sh @@ -0,0 +1,17 @@ +LIB_DIR="../../../lib" +RESOURCE_DIR="../../../resources" +ASSETS_DIR="./CheetahAppTestUITests/test_resources" + +echo "Creating test resources asset directory" +mkdir -p ${ASSETS_DIR} + +echo "Copying test audio samples..." +mkdir -p ${ASSETS_DIR}/audio_samples +cp ${RESOURCE_DIR}/audio_samples/*.wav ${ASSETS_DIR}/audio_samples + +echo "Copying test model files..." +mkdir -p ${ASSETS_DIR}/model_files +cp ${LIB_DIR}/common/*.pv ${ASSETS_DIR}/model_files + +echo "Copying test data file..." +cp ${RESOURCE_DIR}/.test/test_data.json ${ASSETS_DIR} diff --git a/binding/ios/README.md b/binding/ios/README.md index 894d9b1a..ad813af6 100644 --- a/binding/ios/README.md +++ b/binding/ios/README.md @@ -86,6 +86,20 @@ while true { Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/) and `${MODEL_FILE}` with the name of the Cheetah model file name. Finally, when done be sure to explicitly release the resources using `cheetah.delete()`. +### Language Model + +Default models for supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + +Pass in the `.pv` file via the `modelURL` or `modelPath` constructor argument: +```swift +let cheetah = Cheetah(accessKey: accessKey, modelPath: "${MODEL_FILE_PATH") +// or +let cheetah = Cheetah(accessKey: accessKey, modelURL: "${MODEL_FILE_URL}") +``` + ## Running Unit Tests Copy your `AccessKey` into the `accessKey` variable in [`CheetahAppTestUITests.swift`](CheetahAppTest/CheetahAppTestUITests/CheetahAppTestUITests.swift). Open [`CheetahAppTest.xcodeproj`](CheetahAppTest/CheetahAppTest.xcodeproj) with XCode and run the tests with `Product > Test`. diff --git a/binding/java/README.md b/binding/java/README.md index cc821c07..43ece9c3 100644 --- a/binding/java/README.md +++ b/binding/java/README.md @@ -85,6 +85,14 @@ When done resources have to be released explicitly: cheetah.delete(); ``` +### Language Model + +The Cheetah Java SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + ## Demo App For example usage refer to our [Java demos](../../demo/java). diff --git a/binding/java/build.gradle b/binding/java/build.gradle index 8476faae..75b2a4c9 100644 --- a/binding/java/build.gradle +++ b/binding/java/build.gradle @@ -7,7 +7,7 @@ plugins { ext { PUBLISH_GROUP_ID = 'ai.picovoice' - PUBLISH_VERSION = '2.0.2' + PUBLISH_VERSION = '2.1.0' PUBLISH_ARTIFACT_ID = 'cheetah-java' } @@ -84,6 +84,7 @@ if (file("${rootDir}/publish-mavencentral.gradle").exists()) { } dependencies { + testImplementation 'com.google.code.gson:gson:2.10.1' testImplementation 'org.junit.jupiter:junit-jupiter:5.4.2' testImplementation 'org.junit.jupiter:junit-jupiter-params:5.8.2' } diff --git a/binding/java/test/ai/picovoice/cheetah/CheetahTest.java b/binding/java/test/ai/picovoice/cheetah/CheetahTest.java index 513b3cdf..633bb51b 100644 --- a/binding/java/test/ai/picovoice/cheetah/CheetahTest.java +++ b/binding/java/test/ai/picovoice/cheetah/CheetahTest.java @@ -1,5 +1,5 @@ /* - Copyright 2022-2023 Picovoice Inc. + Copyright 2022-2024 Picovoice Inc. You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. @@ -12,14 +12,22 @@ package ai.picovoice.cheetah; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; import org.junit.jupiter.api.Test; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.stream.Stream; @@ -32,6 +40,103 @@ public class CheetahTest { private final String accessKey = System.getProperty("pvTestingAccessKey"); + private static String appendLanguage(String s, String language) { + if (language.equals("en")) { + return s; + } + return s + "_" + language; + } + + private static int levenshteinDistance(String[] transcript, String[] reference) { + int m = transcript.length; + int n = reference.length; + int[][] dp = new int[m + 1][n + 1]; + + for (int i = 0; i <= m; i++) { + dp[i][0] = i; + } + + for (int j = 0; j <= n; j++) { + dp[0][j] = j; + } + + for (int i = 1; i <= m; i++) { + for (int j = 1; j <= n; j++) { + if (transcript[i - 1].equalsIgnoreCase(reference[j - 1])) { + dp[i][j] = dp[i - 1][j - 1]; + } else { + dp[i][j] = 1 + Math.min(dp[i - 1][j - 1], Math.min(dp[i - 1][j], dp[i][j - 1])); + } + } + } + return dp[m][n]; + } + + private static float getErrorRate(String transcript, String reference) { + String[] transcriptWords = transcript.split("\\s+"); + String[] referenceWords = reference.split("\\s+"); + int distance = levenshteinDistance(transcriptWords, referenceWords); + + return (float) distance / (float) referenceWords.length; + } + + private static ProcessTestData[] loadProcessTestData() throws IOException { + final Path testDataPath = Paths.get(System.getProperty("user.dir")) + .resolve("../../resources/.test") + .resolve("test_data.json"); + final String testDataContent = new String(Files.readAllBytes(testDataPath), StandardCharsets.UTF_8); + final JsonObject testDataJson = JsonParser.parseString(testDataContent).getAsJsonObject(); + + final JsonArray testParameters = testDataJson + .getAsJsonObject("tests") + .getAsJsonArray("language_tests"); + + final ProcessTestData[] processTestData = new ProcessTestData[testParameters.size()]; + for (int i = 0; i < testParameters.size(); i++) { + final JsonObject testData = testParameters.get(i).getAsJsonObject(); + final String language = testData.get("language").getAsString(); + final String testAudioFile = testData.get("audio_file").getAsString(); + final String transcript = testData.get("transcript").getAsString(); + final float errorRate = testData.get("error_rate").getAsFloat(); + + final JsonArray punctuationsJson = testData.getAsJsonArray("punctuations"); + final String[] punctuations = new String[punctuationsJson.size()]; + for (int j = 0; j < punctuationsJson.size(); j++) { + punctuations[j] = punctuationsJson.get(j).getAsString(); + } + processTestData[i] = new ProcessTestData( + language, + testAudioFile, + transcript, + punctuations, + errorRate); + } + return processTestData; + } + + private static Stream processTestProvider() throws IOException { + final ProcessTestData[] processTestData = loadProcessTestData(); + final ArrayList testArgs = new ArrayList<>(); + for (ProcessTestData processTestDataItem : processTestData) { + testArgs.add(Arguments.of( + processTestDataItem.language, + processTestDataItem.audioFile, + processTestDataItem.transcript, + processTestDataItem.punctuations, + false, + processTestDataItem.errorRate)); + testArgs.add(Arguments.of( + processTestDataItem.language, + processTestDataItem.audioFile, + processTestDataItem.transcript, + processTestDataItem.punctuations, + true, + processTestDataItem.errorRate)); + } + + return testArgs.stream(); + } + @Test void getVersion() throws CheetahException { Cheetah cheetah = new Cheetah.Builder() @@ -84,22 +189,33 @@ void getErrorStack() { } } - @ParameterizedTest(name = "test transcribe with automatic punctuation set to ''{0}''") - @MethodSource("transcribeProvider") - void transcribe(boolean enableAutomaticPunctuation, String referenceTranscript) throws Exception { + @ParameterizedTest(name = "test process data for ''{0}'' with punctuation ''{4}''") + @MethodSource("processTestProvider") + void process( + String language, + String testAudioFile, + String referenceTranscript, + String[] punctuations, + boolean enableAutomaticPunctuation, + float targetErrorRate) throws Exception { + String modelPath = Paths.get(System.getProperty("user.dir")) + .resolve(String.format("../../lib/common/%s.pv", appendLanguage("cheetah_params", language))) + .toString(); + Cheetah cheetah = new Cheetah.Builder() .setAccessKey(accessKey) + .setModelPath(modelPath) .setEnableAutomaticPunctuation(enableAutomaticPunctuation) .build(); int frameLen = cheetah.getFrameLength(); String audioFilePath = Paths.get(System.getProperty("user.dir")) - .resolve("../../resources/audio_samples/test.wav") + .resolve(String.format("../../resources/audio_samples/%s", testAudioFile)) .toString(); File testAudioPath = new File(audioFilePath); AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(testAudioPath); - assertEquals(audioInputStream.getFormat().getFrameRate(), 16000); + assertEquals(16000, audioInputStream.getFormat().getFrameRate()); int byteDepth = audioInputStream.getFormat().getFrameSize(); byte[] pcm = new byte[frameLen * byteDepth]; @@ -116,17 +232,37 @@ void transcribe(boolean enableAutomaticPunctuation, String referenceTranscript) } CheetahTranscript finalTranscriptObj = cheetah.flush(); transcript.append(finalTranscriptObj.getTranscript()); - assertEquals(referenceTranscript, transcript.toString()); cheetah.delete(); + + String normalizedTranscript = referenceTranscript; + if (!enableAutomaticPunctuation) { + for (String punctuation : punctuations) { + normalizedTranscript = normalizedTranscript.replace(punctuation, ""); + } + } + + assertTrue(getErrorRate(transcript.toString(), normalizedTranscript) < targetErrorRate); } - private static Stream transcribeProvider() { - return Stream.of( - Arguments.of(true, - "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel."), - Arguments.of(false, - "Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel") - ); + private static class ProcessTestData { + public final String language; + public final String audioFile; + public final String transcript; + public final String[] punctuations; + public final float errorRate; + + public ProcessTestData( + String language, + String audioFile, + String transcript, + String[] punctuations, + float errorRate) { + this.language = language; + this.audioFile = audioFile; + this.transcript = transcript; + this.punctuations = punctuations; + this.errorRate = errorRate; + } } } diff --git a/binding/nodejs/README.md b/binding/nodejs/README.md index 11332c60..6de1e8de 100644 --- a/binding/nodejs/README.md +++ b/binding/nodejs/README.md @@ -60,6 +60,14 @@ while (true) { Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/). Finally, when done be sure to explicitly release the resources using `handle.release()`. +### Language Model + +The Cheetah Node.js SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + ## Demos [Cheetah Node.js demo package](https://www.npmjs.com/package/@picovoice/cheetah-node-demo) provides command-line utilities for processing audio using cheetah. diff --git a/binding/nodejs/package.json b/binding/nodejs/package.json index 32c4949c..850e82f3 100644 --- a/binding/nodejs/package.json +++ b/binding/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/cheetah-node", - "version": "2.0.3", + "version": "2.1.0", "description": "Picovoice Cheetah Node.js binding", "main": "dist/index.js", "types": "dist/types/index.d.ts", diff --git a/binding/nodejs/test/index.test.ts b/binding/nodejs/test/index.test.ts index aaa15be7..2d72ae83 100644 --- a/binding/nodejs/test/index.test.ts +++ b/binding/nodejs/test/index.test.ts @@ -1,5 +1,5 @@ // -// Copyright 2022-2023 Picovoice Inc. +// Copyright 2022-2024 Picovoice Inc. // // You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" // file accompanying this source. @@ -20,17 +20,12 @@ import { WaveFile } from 'wavefile'; import { getSystemLibraryPath } from '../src/platforms'; import { - TRANSCRIPT, + getModelPathByLanguage, getAudioFile, - getModelPath, - getTestParameters, + getLanguageTestParameters, } from './test_utils'; -const MODEL_PATH = getModelPath(); -const TEST_PARAMETERS = getTestParameters(); -const WAV_PATH = "test.wav"; - -const libraryPath = getSystemLibraryPath(); +const LANGUAGE_TEST_PARAMETERS = getLanguageTestParameters(); const ACCESS_KEY = process.argv .filter(x => x.startsWith('--access_key='))[0] @@ -76,8 +71,7 @@ const loadPcm = (audioFile: string): Int16Array => { const waveBuffer = fs.readFileSync(waveFilePath); const waveAudioFile = new WaveFile(waveBuffer); - const pcm: any = waveAudioFile.getSamples(false, Int16Array); - return pcm; + return waveAudioFile.getSamples(false, Int16Array) as any; }; const cheetahProcessWaveFile = ( @@ -101,60 +95,73 @@ const cheetahProcessWaveFile = ( const testCheetahProcess = ( - _: string, - transcript: string, - testPunctuation: boolean, + language: string, + audioFile: string, + referenceTranscript: string, + punctuations: string[], + enableAutomaticPunctuation: boolean, errorRate: number, - audioFile: string ) => { + const modelPath = getModelPathByLanguage(language); + let cheetahEngine = new Cheetah(ACCESS_KEY, { - enableAutomaticPunctuation: testPunctuation, + modelPath, + enableAutomaticPunctuation, }); - let [res, __] = cheetahProcessWaveFile(cheetahEngine, audioFile); + let [transcript] = cheetahProcessWaveFile(cheetahEngine, audioFile); + + let normalizedTranscript = referenceTranscript; + if (!enableAutomaticPunctuation) { + for (const punctuation of punctuations) { + normalizedTranscript = normalizedTranscript.replace(punctuation, ""); + } + } expect( - characterErrorRate(res, transcript) < errorRate + characterErrorRate(transcript, normalizedTranscript) < errorRate ).toBeTruthy(); cheetahEngine.release(); }; describe('successful processes', () => { - it.each(TEST_PARAMETERS)( + it.each(LANGUAGE_TEST_PARAMETERS)( 'testing process `%p`', ( language: string, + audioFile: string, transcript: string, - _: string, + punctuations: string[], errorRate: number, - audioFile: string ) => { testCheetahProcess( language, + audioFile, transcript, + punctuations, false, errorRate, - audioFile ); } ); - it.each(TEST_PARAMETERS)( - 'testing process `%p` with punctuation', + it.each(LANGUAGE_TEST_PARAMETERS)( + 'testing process `%p` with punctuation', ( language: string, - _: string, - transcriptWithPunctuation: string, + audioFile: string, + transcript: string, + punctuations: string[], errorRate: number, - audioFile: string ) => { testCheetahProcess( language, - transcriptWithPunctuation, + audioFile, + transcript, + punctuations, true, errorRate, - audioFile ); } ); @@ -170,30 +177,32 @@ describe('Defaults', () => { describe('manual paths', () => { test('manual model path', () => { - let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: MODEL_PATH }); + let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: getModelPathByLanguage("en") }); - let [transcript, _] = cheetahProcessWaveFile( + let [transcript] = cheetahProcessWaveFile( cheetahEngine, - WAV_PATH + "test.wav" ); - expect(transcript).toBe(TRANSCRIPT); + expect(transcript.length).toBeGreaterThan(0); cheetahEngine.release(); }); test('manual model and library path', () => { + const libraryPath = getSystemLibraryPath(); + let cheetahEngine = new Cheetah(ACCESS_KEY, { - modelPath: MODEL_PATH, + modelPath: getModelPathByLanguage("en"), libraryPath: libraryPath, endpointDurationSec: 0.2, }); - let [transcript, _] = cheetahProcessWaveFile( + let [transcript] = cheetahProcessWaveFile( cheetahEngine, - WAV_PATH + "test.wav" ); - expect(transcript).toBe(TRANSCRIPT); + expect(transcript.length).toBeGreaterThan(0); cheetahEngine.release(); }); }); @@ -202,7 +211,7 @@ describe("error message stack", () => { test("message stack cleared after read", () => { let error: string[] = []; try { - new Cheetah('invalid', { modelPath: MODEL_PATH }); + new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") }); } catch (e: any) { error = e.messageStack; } @@ -211,7 +220,7 @@ describe("error message stack", () => { expect(error.length).toBeLessThanOrEqual(8); try { - new Cheetah('invalid', { modelPath: MODEL_PATH }); + new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") }); } catch (e: any) { for (let i = 0; i < error.length; i++) { expect(error[i]).toEqual(e.messageStack[i]); diff --git a/binding/nodejs/test/test_utils.ts b/binding/nodejs/test/test_utils.ts index 93c00575..c4806bd1 100644 --- a/binding/nodejs/test/test_utils.ts +++ b/binding/nodejs/test/test_utils.ts @@ -1,5 +1,5 @@ // -// Copyright 2023 Picovoice Inc. +// Copyright 2024 Picovoice Inc. // // You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" // file accompanying this source. @@ -8,19 +8,27 @@ // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // +import * as fs from 'fs'; import * as path from 'path'; const ROOT_DIR = path.join(__dirname, '../../..'); +const TEST_DATA_JSON = require(path.join( + ROOT_DIR, + 'resources/.test/test_data.json' +)); +const MB_40 = 1024 * 1024 * 40; -export const TRANSCRIPT = - 'Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel'; -export const TRANSCRIPT_WITH_PUNCTUATION = - 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.'; +function appendLanguage(s: string, language: string): string { + if (language === 'en') { + return s; + } + return s + '_' + language; +} -export function getModelPath(): string { +export function getModelPathByLanguage(language: string): string { return path.join( ROOT_DIR, - `lib/common/cheetah_params.pv` + `${appendLanguage('lib/common/cheetah_params', language)}.pv` ); } @@ -28,14 +36,43 @@ export function getAudioFile(audioFile: string): string { return path.join(ROOT_DIR, 'resources/audio_samples', audioFile); } -export function getTestParameters(): [ +function getCpuPart(): string { + if (!fs.existsSync('/proc/cpuinfo')) { + return ""; + } + const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'ascii'); + for (const infoLine of cpuInfo.split('\n')) { + if (infoLine.includes('CPU part')) { + const infoLineSplit = infoLine.split(' '); + return infoLineSplit[infoLineSplit.length - 1].toLowerCase(); + } + } + return ""; +} + +function getModelSize(language: string): number { + const modelPath = getModelPathByLanguage(language); + const stats = fs.statSync(modelPath); + return stats.size; +} + +export function getLanguageTestParameters(): [ string, string, string, + string[], number, - string ][] { - return [ - ["en", TRANSCRIPT, TRANSCRIPT_WITH_PUNCTUATION, 0.025, "test.wav"] - ]; + const cpuPart = getCpuPart(); + let parametersJson = TEST_DATA_JSON.tests.language_tests; + if (cpuPart === "0xd03") { + parametersJson = parametersJson.filter((x: any) => (getModelSize(x.language) < MB_40)); + } + return parametersJson.map((x: any) => [ + x.language, + x.audio_file, + x.transcript, + x.punctuations, + x.error_rate, + ]); } diff --git a/binding/python/README.md b/binding/python/README.md index aa1e43bd..d1dd5dd2 100644 --- a/binding/python/README.md +++ b/binding/python/README.md @@ -32,7 +32,7 @@ Cheetah requires a valid Picovoice `AccessKey` at initialization. `AccessKey` ac You can get your `AccessKey` for free. Make sure to keep your `AccessKey` secret. Signup or Login to [Picovoice Console](https://console.picovoice.ai/) to get your `AccessKey`. -### Usage +## Usage Create an instance of the engine and transcribe audio: @@ -53,6 +53,21 @@ while True: Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/). When done be sure to explicitly release the resources using `handle.delete()`. +### Language Model + +The Cheetah Python SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + +Pass in the `.pv` file via the `model_path` argument: +```python +cheetah = pvcheetah.create( + access_key='${ACCESS_KEY}', + model_path='${MODEL_FILE_PATH}') +``` + ## Demos [pvcheetahdemo](https://pypi.org/project/pvcheetahdemo/) provides command-line utilities for processing audio using diff --git a/binding/python/_cheetah.py b/binding/python/_cheetah.py index c37911c4..0e963f96 100644 --- a/binding/python/_cheetah.py +++ b/binding/python/_cheetah.py @@ -1,12 +1,12 @@ # -# Copyright 2018-2023 Picovoice Inc. +# Copyright 2018-2024 Picovoice Inc. # -# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" -# file accompanying this source. +# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" +# file accompanying this source. # -# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. # import os @@ -16,7 +16,7 @@ class CheetahError(Exception): - def __init__(self, message: str = '', message_stack: Sequence[str] = None): + def __init__(self, message: str = '', message_stack: Optional[Sequence[str]] = None): super().__init__(message) self._message = message diff --git a/binding/python/setup.py b/binding/python/setup.py index 5d27e11c..09bc33bc 100644 --- a/binding/python/setup.py +++ b/binding/python/setup.py @@ -1,5 +1,5 @@ # -# Copyright 2022-2023 Picovoice Inc. +# Copyright 2022-2024 Picovoice Inc. # # You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" # file accompanying this source. @@ -48,7 +48,7 @@ setuptools.setup( name="pvcheetah", - version="2.0.2", + version="2.1.0", author="Picovoice", author_email="hello@picovoice.ai", description="Cheetah Speech-to-Text Engine.", diff --git a/binding/python/test_cheetah.py b/binding/python/test_cheetah.py index 40b42fef..aeedcbf8 100644 --- a/binding/python/test_cheetah.py +++ b/binding/python/test_cheetah.py @@ -1,19 +1,17 @@ # -# Copyright 2018-2023 Picovoice Inc. +# Copyright 2018-2024 Picovoice Inc. # -# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" -# file accompanying this source. +# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" +# file accompanying this source. # -# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. # import os -import struct import sys import unittest -import wave from parameterized import parameterized @@ -22,7 +20,7 @@ from test_util import * -parameters = load_test_data() +language_tests = load_test_data() class CheetahTestCase(unittest.TestCase): @@ -32,17 +30,17 @@ def setUpClass(cls): cls._audio_directory = os.path.join('..', '..', 'resources', 'audio_samples') @classmethod - def _create_cheetah(cls, enable_automatic_punctuation: bool) -> Cheetah: + def _create_cheetah(cls, language: str, enable_automatic_punctuation: bool) -> Cheetah: return Cheetah( access_key=cls._access_key, - model_path=default_model_path('../..'), + model_path=get_model_path_by_language(language=language), library_path=default_library_path('../..'), enable_automatic_punctuation=enable_automatic_punctuation) - @parameterized.expand(parameters) + @parameterized.expand(language_tests) def test_process( self, - _: str, + language: str, audio_file: str, expected_transcript: str, punctuations: List[str], @@ -50,7 +48,7 @@ def test_process( o = None try: - o = self._create_cheetah(False) + o = self._create_cheetah(language=language, enable_automatic_punctuation=False) pcm = read_wav_file( file_name=os.path.join(self._audio_directory, audio_file), @@ -77,10 +75,10 @@ def test_process( if o is not None: o.delete() - @parameterized.expand(parameters) + @parameterized.expand(language_tests) def test_process_with_punctuation( self, - _: str, + language: str, audio_file: str, expected_transcript: str, punctuations: List[str], @@ -88,7 +86,7 @@ def test_process_with_punctuation( o = None try: - o = self._create_cheetah(True) + o = self._create_cheetah(language=language, enable_automatic_punctuation=True) pcm = read_wav_file( file_name=os.path.join(self._audio_directory, audio_file), @@ -112,7 +110,11 @@ def test_process_with_punctuation( o.delete() def test_version(self): - o = self._create_cheetah(False) + o = Cheetah( + access_key=self._access_key, + model_path=default_model_path('../..'), + library_path=default_library_path('../..'), + enable_automatic_punctuation=True) self.assertIsInstance(o.version, str) self.assertGreater(len(o.version), 0) diff --git a/binding/python/test_cheetah_perf.py b/binding/python/test_cheetah_perf.py index dfc5b795..3c173fd0 100644 --- a/binding/python/test_cheetah_perf.py +++ b/binding/python/test_cheetah_perf.py @@ -1,12 +1,12 @@ # -# Copyright 2022 Picovoice Inc. +# Copyright 2022-2024 Picovoice Inc. # -# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" -# file accompanying this source. +# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" +# file accompanying this source. # -# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on -# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. # import os diff --git a/binding/python/test_util.py b/binding/python/test_util.py index 4ba1815e..8ebf818a 100644 --- a/binding/python/test_util.py +++ b/binding/python/test_util.py @@ -1,5 +1,5 @@ # -# Copyright 2023 Picovoice Inc. +# Copyright 2023-2024 Picovoice Inc. # # You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" # file accompanying this source. @@ -9,23 +9,32 @@ # specific language governing permissions and limitations under the License. # +import json +import os import struct import wave + from typing import * def load_test_data() -> List[Tuple[str, str, str, List[str], float]]: - parameters = [ + data_file_path = os.path.join(os.path.dirname(__file__), "../../resources/.test/test_data.json") + with open(data_file_path, encoding="utf8") as data_file: + json_test_data = data_file.read() + test_data = json.loads(json_test_data)['tests'] + + language_tests = [ ( - "en", - "test.wav", - "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.", - ["."], - 0.025 + t['language'], + t['audio_file'], + t['transcript'], + t['punctuations'], + t['error_rate'], ) + for t in test_data['language_tests'] ] - return parameters + return language_tests def read_wav_file(file_name: str, sample_rate: int) -> Tuple: @@ -48,6 +57,17 @@ def read_wav_file(file_name: str, sample_rate: int) -> Tuple: return frames[::channels] +def get_model_path_by_language(language: str) -> str: + model_path_subdir = _append_language('../../lib/common/cheetah_params', language) + return os.path.join(os.path.dirname(__file__), f'{model_path_subdir}.pv') + + +def _append_language(s: str, language: str) -> str: + if language == 'en': + return s + return "%s_%s" % (s, language) + + def get_word_error_rate(transcript: str, expected_transcript: str, use_cer: bool = False) -> float: transcript_split = list(transcript) if use_cer else transcript.split() expected_split = list(expected_transcript) if use_cer else expected_transcript.split() @@ -55,7 +75,7 @@ def get_word_error_rate(transcript: str, expected_transcript: str, use_cer: bool def _levenshtein_distance(words1: Sequence[str], words2: Sequence[str]) -> int: - res = [[0] * (len(words1) + 2) for _ in range(len(words2) + 1)] + res = [[0] * (len(words2) + 1) for _ in range(len(words1) + 1)] for i in range(len(words1) + 1): res[i][0] = i for j in range(len(words2) + 1): diff --git a/binding/react-native/android/build.gradle b/binding/react-native/android/build.gradle index c9a05cd1..e5a89ced 100644 --- a/binding/react-native/android/build.gradle +++ b/binding/react-native/android/build.gradle @@ -129,5 +129,5 @@ repositories { dependencies { // noinspection GradleDynamicVersion api 'com.facebook.react:react-native:+' - implementation 'ai.picovoice:cheetah-android:2.0.0' + implementation 'ai.picovoice:cheetah-android:2.1.0' } diff --git a/binding/react-native/cheetah-react-native.podspec b/binding/react-native/cheetah-react-native.podspec index e7cbf3d3..d43fe08b 100644 --- a/binding/react-native/cheetah-react-native.podspec +++ b/binding/react-native/cheetah-react-native.podspec @@ -16,5 +16,5 @@ Pod::Spec.new do |s| s.source_files = "ios/*.{h,m,mm,swift}" s.dependency "React" - s.dependency "Cheetah-iOS", '~> 2.0.1' + s.dependency "Cheetah-iOS", '~> 2.1.0' end diff --git a/binding/react-native/package.json b/binding/react-native/package.json index b547a989..d040116a 100644 --- a/binding/react-native/package.json +++ b/binding/react-native/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/cheetah-react-native", - "version": "2.0.2", + "version": "2.1.0", "description": "Picovoice Cheetah React Native binding", "main": "lib/commonjs/index", "module": "lib/module/index", diff --git a/binding/react-native/test-app/CheetahTestApp/Tests.ts b/binding/react-native/test-app/CheetahTestApp/Tests.ts index 9f5881a9..cc868e33 100644 --- a/binding/react-native/test-app/CheetahTestApp/Tests.ts +++ b/binding/react-native/test-app/CheetahTestApp/Tests.ts @@ -4,17 +4,11 @@ import {decode as atob} from 'base-64'; import {Cheetah} from '@picovoice/cheetah-react-native'; +const testData = require('./test_data.json'); const platform = Platform.OS; const TEST_ACCESS_KEY: string = '{TESTING_ACCESS_KEY_HERE}'; -const LANGUAGE = 'en'; -const AUDIO_FILE = 'test.wav'; -const EXPECTED_TRANSCRIPT = - 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.'; -const PUNCTUATIONS = ['.']; -const ERROR_RATE = 0.025; - export type Result = { testName: string; success: boolean; @@ -242,28 +236,34 @@ async function initTests(): Promise { async function processTests(): Promise { const results: Result[] = []; - let result = await runProcTestCase( - LANGUAGE, - AUDIO_FILE, - EXPECTED_TRANSCRIPT, - PUNCTUATIONS, - ERROR_RATE, - ); - logResult(result); - results.push(result); + for (const testParam of testData.tests.language_tests) { + const result = await runProcTestCase( + testParam.language, + testParam.audio_file, + testParam.transcript, + testParam.punctuations, + testParam.error_rate, + ); + result.testName = `Process test for '${testParam.language}'`; + logResult(result); + results.push(result); + } - result = await runProcTestCase( - LANGUAGE, - AUDIO_FILE, - EXPECTED_TRANSCRIPT, - PUNCTUATIONS, - ERROR_RATE, - { - enablePunctuation: true, - }, - ); - logResult(result); - results.push(result); + for (const testParam of testData.tests.language_tests) { + const result = await runProcTestCase( + testParam.language, + testParam.audio_file, + testParam.transcript, + testParam.punctuations, + testParam.error_rate, + { + enablePunctuation: true, + }, + ); + result.testName = `Process test with punctuation for '${testParam.language}'`; + logResult(result); + results.push(result); + } return results; } diff --git a/binding/react-native/test-app/CheetahTestApp/copy_test_resources.sh b/binding/react-native/test-app/CheetahTestApp/copy_test_resources.sh index 83ee2f69..b0aefd06 100755 --- a/binding/react-native/test-app/CheetahTestApp/copy_test_resources.sh +++ b/binding/react-native/test-app/CheetahTestApp/copy_test_resources.sh @@ -18,3 +18,6 @@ mkdir -p ${ANDROID_ASSETS_DIR}/model_files mkdir -p ${IOS_ASSETS_DIR}/model_files cp ${LIB_DIR}/common/*.pv ${ANDROID_ASSETS_DIR}/model_files cp ${LIB_DIR}/common/*.pv ${IOS_ASSETS_DIR}/model_files + +echo "Copying test data file..." +cp ${RESOURCE_DIR}/.test/test_data.json . diff --git a/binding/react-native/test-app/CheetahTestApp/ios/Podfile b/binding/react-native/test-app/CheetahTestApp/ios/Podfile index aa2b23cf..af23d92d 100644 --- a/binding/react-native/test-app/CheetahTestApp/ios/Podfile +++ b/binding/react-native/test-app/CheetahTestApp/ios/Podfile @@ -14,7 +14,7 @@ end target 'CheetahTestApp' do config = use_native_modules! - pod 'Cheetah-iOS', '~> 2.0.0' + pod 'Cheetah-iOS', '~> 2.1.0' # Flags change depending on the env values. flags = get_default_flags() diff --git a/binding/react-native/test-app/CheetahTestApp/package.json b/binding/react-native/test-app/CheetahTestApp/package.json index 910be273..d21a5d51 100644 --- a/binding/react-native/test-app/CheetahTestApp/package.json +++ b/binding/react-native/test-app/CheetahTestApp/package.json @@ -10,7 +10,7 @@ "test": "jest" }, "dependencies": { - "@picovoice/cheetah-react-native": "file:../..", + "@picovoice/cheetah-react-native": "=2.1.0", "@picovoice/react-native-voice-processor": "1.2.3", "base-64": "^1.0.0", "react": "18.2.0", diff --git a/binding/react-native/test-app/CheetahTestApp/yarn.lock b/binding/react-native/test-app/CheetahTestApp/yarn.lock index b437f99a..3ee3c9db 100644 --- a/binding/react-native/test-app/CheetahTestApp/yarn.lock +++ b/binding/react-native/test-app/CheetahTestApp/yarn.lock @@ -1441,8 +1441,10 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/cheetah-react-native@file:../..": - version "2.0.2" +"@picovoice/cheetah-react-native@=2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-react-native/-/cheetah-react-native-2.1.0.tgz#ec0e49b3fb7ecc40ce80a8da68b59757e1616a07" + integrity sha512-yF62GbapMDP7T7MJqlYWDtdwWTakm1eRLUGdqYtVOzrUgRWiTdgJB4z7zkpAQoqQeVsXU6sj0JfPS44weQCndQ== "@picovoice/react-native-voice-processor@1.2.3": version "1.2.3" diff --git a/binding/react/README.md b/binding/react/README.md index 7541140e..9d5e99fd 100644 --- a/binding/react/README.md +++ b/binding/react/README.md @@ -183,10 +183,12 @@ If any arguments require changes, call `release`, then `init` again to initializ You do not need to call `release` when your component is unmounted - the hook will clean up automatically on unmount. -## Non-English Languages +### Language Model -In order to detect non-English wake words you need to use the corresponding model file (`.pv`). The model files for all -supported languages are available [here](https://github.com/Picovoice/cheetah/tree/master/lib/common). +Default models for supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. ## Demo diff --git a/binding/react/package.json b/binding/react/package.json index 953c4bb9..9c7500e9 100644 --- a/binding/react/package.json +++ b/binding/react/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/cheetah-react", - "version": "2.0.0", + "version": "2.1.0", "description": "React hook for Cheetah Web SDK", "entry": "src/index.ts", "module": "dist/esm/index.js", @@ -36,7 +36,7 @@ "test": "cypress run --component" }, "dependencies": { - "@picovoice/cheetah-web": "=2.0.0" + "@picovoice/cheetah-web": "~2.1.0" }, "devDependencies": { "@babel/core": "^7.21.3", diff --git a/binding/react/scripts/setup_test.js b/binding/react/scripts/setup_test.js index 071a93e2..e9357843 100644 --- a/binding/react/scripts/setup_test.js +++ b/binding/react/scripts/setup_test.js @@ -15,6 +15,16 @@ const paramsSourceDirectory = join( 'common' ); +const testDataSource = join( + __dirname, + '..', + '..', + '..', + 'resources', + '.test', + 'test_data.json' +); + const sourceDirectory = join(__dirname, '..', '..', '..', 'resources'); try { @@ -27,6 +37,8 @@ try { ); }); + fs.copyFileSync(testDataSource, join(testDirectory, 'test_data.json')); + fs.mkdirSync(join(fixturesDirectory, 'audio_samples'), { recursive: true }); fs.readdirSync(join(sourceDirectory, 'audio_samples')).forEach(file => { fs.copyFileSync( diff --git a/binding/react/test/use_cheetah.test.ts b/binding/react/test/use_cheetah.test.ts index a913fe0b..67347d5d 100644 --- a/binding/react/test/use_cheetah.test.ts +++ b/binding/react/test/use_cheetah.test.ts @@ -5,16 +5,10 @@ import { useCheetah } from '../src'; // @ts-ignore import cheetahParams from '@/cheetah_params.js'; -const ACCESS_KEY = Cypress.env('ACCESS_KEY'); +// @ts-ignore +import testData from './test_data.json'; -const testParam = { - language: 'en', - audio_file: 'test.wav', - transcript: - 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.', - punctuations: ['.'], - error_rate: 0.025, -}; +const ACCESS_KEY = Cypress.env('ACCESS_KEY'); const levenshteinDistance = (words1: string[], words2: string[]) => { const res = Array.from( @@ -46,14 +40,14 @@ const wordErrorRate = ( useCER = false ): number => { const splitter = useCER ? '' : ' '; - const ed = levenshteinDistance( - reference.split(splitter), - hypothesis.split(splitter) - ); - return ed / reference.length; + const refWords = reference.split(splitter); + const hypWords = hypothesis.split(splitter); + const ed = levenshteinDistance(refWords, hypWords); + return ed / refWords.length; }; const runProcTest = async ( + audioFile: string, punctuations: string[], expectedTranscript: string, expectedErrorRate: number, @@ -67,7 +61,7 @@ const runProcTest = async ( const { accessKey = ACCESS_KEY, model = { publicPath: '/test/cheetah_params.pv', forceWrite: true }, - enablePunctuation = true, + enablePunctuation = false, useCER = false, } = params; const { result } = renderHook(() => useCheetah()); @@ -87,11 +81,11 @@ const runProcTest = async ( expect(result.current.isListening).to.be.true; }); - cy.mockRecording('audio_samples/test.wav'); + cy.mockRecording(audioFile); cy.wrapHook(result.current.stop).then(() => { let normalizedTranscript = expectedTranscript; - if (enablePunctuation) { + if (!enablePunctuation) { for (const punctuation of punctuations) { normalizedTranscript = normalizedTranscript.replaceAll(punctuation, ''); } @@ -192,22 +186,42 @@ describe('Cheetah binding', () => { }); }); - it(`should be able to process (${testParam.language})`, () => { - runProcTest( - testParam.punctuations, - testParam.transcript, - testParam.error_rate, - { - enablePunctuation: true, - } - ); - }); + for (const testParam of testData.tests.language_tests) { + const suffix = testParam.language === 'en' ? '' : `_${testParam.language}`; + + it(`should be able to process (${testParam.language})`, () => { + cy.wrap(null).then(async () => { + await runProcTest( + `audio_samples/${testParam.audio_file}`, + testParam.punctuations, + testParam.transcript, + testParam.error_rate, + { + model: { + publicPath: `/test/cheetah_params${suffix}.pv`, + forceWrite: true, + }, + } + ); + }); + }); - it(`should be able to process with punctuation (${testParam.language})`, () => { - runProcTest( - testParam.punctuations, - testParam.transcript, - testParam.error_rate - ); - }); + it(`should be able to process with punctuation (${testParam.language})`, () => { + cy.wrap(null).then(async () => { + await runProcTest( + `audio_samples/${testParam.audio_file}`, + testParam.punctuations, + testParam.transcript, + testParam.error_rate, + { + model: { + publicPath: `/test/cheetah_params${suffix}.pv`, + forceWrite: true, + }, + enablePunctuation: true, + } + ); + }); + }); + } }); diff --git a/binding/react/yarn.lock b/binding/react/yarn.lock index 9849b22f..e6027bb7 100644 --- a/binding/react/yarn.lock +++ b/binding/react/yarn.lock @@ -1110,12 +1110,12 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/cheetah-web@=2.0.0": - version "2.0.0" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-web/-/cheetah-web-2.0.0.tgz#d4415c25e324726356f979bed3761a94e884198e" - integrity sha512-WqxHUznNS7Rf8XfJCp0m0l+xeYFDSFhzOTg+b2DJn06x1slhpJA4CiK4egiH7FzhHiJtvqjLF0dO14LA8e1Gpg== +"@picovoice/cheetah-web@~2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-web/-/cheetah-web-2.1.0.tgz#d75e579460ca7d3b308ff4d56c82b0171f19ff30" + integrity sha512-LMJC8wRu6vakDdzuL9flWtgmrYk/6fodBwX18cNSqmXgRaVYNn7/jjW+N7XgA5UQrbGLcNBipIdsEBPivuzCBA== dependencies: - "@picovoice/web-utils" "=1.3.1" + "@picovoice/web-utils" "=1.4.3" "@picovoice/web-utils@=1.3.1": version "1.3.1" @@ -1124,6 +1124,13 @@ dependencies: commander "^9.2.0" +"@picovoice/web-utils@=1.4.3": + version "1.4.3" + resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.3.tgz#1de0b20d6080c18d295c6df37c09d88bf7c4f555" + integrity sha512-7JN3YYsSD9Gtce6YKG3XqpX49dkeu7jTdbox7rHQA/X/Q3zxopXA9zlCKSq6EIjFbiX2iuzDKUx1XrFa3d8c0w== + dependencies: + commander "^10.0.1" + "@picovoice/web-voice-processor@~4.0.8": version "4.0.8" resolved "https://registry.yarnpkg.com/@picovoice/web-voice-processor/-/web-voice-processor-4.0.8.tgz#95247a5393cac4d16490a53feb0f413c902ee5fa" @@ -1723,6 +1730,11 @@ combined-stream@^1.0.6, combined-stream@~1.0.6: dependencies: delayed-stream "~1.0.0" +commander@^10.0.1: + version "10.0.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06" + integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug== + commander@^2.20.0: version "2.20.3" resolved "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz" diff --git a/binding/rust/Cargo.toml b/binding/rust/Cargo.toml index 935cf002..16057dfa 100644 --- a/binding/rust/Cargo.toml +++ b/binding/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pv_cheetah" -version = "2.0.3" +version = "2.1.0" edition = "2018" description = "The Rust bindings for Picovoice's Cheetah library" license = "Apache-2.0" @@ -27,10 +27,11 @@ crate_type = ["lib"] [dependencies] libc = "0.2" -libloading = "0.7" +libloading = "0.8" [dev-dependencies] distance = "0.4.0" -itertools = "0.10" -rodio = "0.15" +itertools = "0.11" +rodio = "0.17" serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } diff --git a/binding/rust/README.md b/binding/rust/README.md index 87b8d2f8..91e19bd2 100644 --- a/binding/rust/README.md +++ b/binding/rust/README.md @@ -71,6 +71,23 @@ Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://con The model file contains the parameters for the Cheetah engine. You may create bespoke language models using [Picovoice Console](https://console.picovoice.ai/) and then pass in the relevant file. +### Language Model + +The Cheetah Rust SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + +Pass in the `.pv` file via the `.model_path()` Builder argument: +```rust +let leopard: Cheetah = CheetahBuilder::new() + .access_key("${ACCESS_KEY}") + .model_path("${MODEL_FILE_PATH}") + .init() + .expect("Unable to create Cheetah"); +``` + ## Demos The [Cheetah Rust demo project](https://github.com/Picovoice/cheetah/tree/master/demo/rust) is a Rust console app that allows for processing real-time audio (i.e. microphone) and files using Cheetah. diff --git a/binding/rust/copy.sh b/binding/rust/copy.sh index ecb68c8d..1912cba8 100755 --- a/binding/rust/copy.sh +++ b/binding/rust/copy.sh @@ -8,7 +8,8 @@ echo "Preparing dir ..." mkdir -p ./data/lib/ echo "Copying Model File ..." -cp -r ../../lib/common ./data/lib/ +mkdir -p ./data/lib/common +cp -r ../../lib/common/cheetah_params.pv ./data/lib/common for platform in linux mac raspberry-pi windows do diff --git a/binding/rust/tests/cheetah_tests.rs b/binding/rust/tests/cheetah_tests.rs index c1727d34..30545a95 100644 --- a/binding/rust/tests/cheetah_tests.rs +++ b/binding/rust/tests/cheetah_tests.rs @@ -1,5 +1,5 @@ /* - Copyright 2022-2023 Picovoice Inc. + Copyright 2022-2024 Picovoice Inc. You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. @@ -14,22 +14,59 @@ mod tests { use distance::*; use itertools::Itertools; use rodio::{source::Source, Decoder}; - use serde_json::{json, Value}; + use serde::Deserialize; use std::env; - use std::fs::File; + use std::fs::{read_to_string, File}; use std::io::BufReader; use cheetah::CheetahBuilder; - fn load_test_data() -> Value { - let test_json: Value = json!([{ - "language": "en", - "transcript": "Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel", - "transcript_with_punctuation": "Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.", - "error_rate": 0.025, - "audio_file": "test.wav" - }]); - test_json + #[derive(Debug, Deserialize)] + struct LanguageTestJson { + language: String, + audio_file: String, + transcript: String, + punctuations: Vec, + error_rate: f32, + } + + #[derive(Debug, Deserialize)] + struct TestsJson { + language_tests: Vec, + } + + #[derive(Debug, Deserialize)] + struct RootJson { + tests: TestsJson, + } + + fn load_test_data() -> TestsJson { + let test_json_path = format!( + "{}{}", + env!("CARGO_MANIFEST_DIR"), + "/../../resources/.test/test_data.json" + ); + let contents: String = + read_to_string(test_json_path).expect("Unable to read test_data.json"); + let root: RootJson = serde_json::from_str(&contents).expect("Failed to parse JSON"); + root.tests + } + + fn append_lang(path: &str, language: &str) -> String { + if language == "en" { + String::from(path) + } else { + format!("{}_{}", path, language) + } + } + + fn model_path_by_language(language: &str) -> String { + format!( + "{}{}{}", + env!("CARGO_MANIFEST_DIR"), + append_lang("/../../lib/common/cheetah_params", language), + ".pv" + ) } fn character_error_rate(transcript: &str, expected_transcript: &str) -> f32 { @@ -38,7 +75,7 @@ mod tests { } fn run_test_process( - _: &str, + language: &str, transcript: &str, test_punctuation: bool, error_rate: f32, @@ -47,6 +84,8 @@ mod tests { let access_key = env::var("PV_ACCESS_KEY") .expect("Pass the AccessKey in using the PV_ACCESS_KEY env variable"); + let model_path = model_path_by_language(language); + let audio_path = format!( "{}{}{}", env!("CARGO_MANIFEST_DIR"), @@ -59,6 +98,7 @@ mod tests { let cheetah = CheetahBuilder::new() .access_key(access_key) + .model_path(model_path) .enable_automatic_punctuation(test_punctuation) .init() .expect("Unable to create Cheetah"); @@ -82,42 +122,37 @@ mod tests { #[test] fn test_process() { - let test_json: Value = load_test_data(); - - for t in test_json.as_array().unwrap() { - let language = t["language"].as_str().unwrap(); - let transcript = t["transcript"].as_str().unwrap(); - let error_rate = t["error_rate"].as_f64().unwrap() as f32; + let test_json: TestsJson = load_test_data(); - let test_audio = t["audio_file"].as_str().unwrap(); + for t in test_json.language_tests { + let mut transcript = t.transcript; + for p in t.punctuations { + transcript = transcript.replace(&p, "") + } run_test_process( - language, - transcript, + &t.language, + &transcript, false, - error_rate, - &test_audio, + t.error_rate, + &t.audio_file, ); } } #[test] fn test_process_punctuation() { - let test_json: Value = load_test_data(); - - for t in test_json.as_array().unwrap() { - let language = t["language"].as_str().unwrap(); - let transcript_with_punctuation = t["transcript_with_punctuation"].as_str().unwrap(); - let error_rate = t["error_rate"].as_f64().unwrap() as f32; + let test_json: TestsJson = load_test_data(); - let test_audio = t["audio_file"].as_str().unwrap(); + for t in test_json.language_tests { + let transcript = t.transcript; run_test_process( - language, - transcript_with_punctuation, + &t.language, + &transcript, true, - error_rate, - &test_audio, + t.error_rate, + &t.audio_file, ); } } diff --git a/binding/web/.gitignore b/binding/web/.gitignore index ee7e40b6..169eea56 100644 --- a/binding/web/.gitignore +++ b/binding/web/.gitignore @@ -4,3 +4,4 @@ lib/pv_cheetah*.wasm cypress/fixtures/audio_samples/* test/cheetah_params*.js test/cheetah_params*.pv +test/test_data.json diff --git a/binding/web/README.md b/binding/web/README.md index 15041382..10b65fa8 100644 --- a/binding/web/README.md +++ b/binding/web/README.md @@ -191,6 +191,13 @@ Terminate `CheetahWorker` instance: await handle.terminate(); ``` +### Language Model + +Default models for supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + ## Demo For example usage refer to our [Web demo application](https://github.com/Picovoice/cheetah/tree/master/demo/web). diff --git a/binding/web/package.json b/binding/web/package.json index b10c163f..0dd5ac50 100644 --- a/binding/web/package.json +++ b/binding/web/package.json @@ -3,7 +3,7 @@ "description": "Cheetah Speech-to-Text engine for web browsers (via WebAssembly)", "author": "Picovoice Inc", "license": "Apache-2.0", - "version": "2.0.0", + "version": "2.1.0", "keywords": [ "cheetah", "web", @@ -35,7 +35,7 @@ "test-perf": "cypress run --spec test/cheetah_perf.test.ts" }, "dependencies": { - "@picovoice/web-utils": "=1.3.1" + "@picovoice/web-utils": "=1.4.3" }, "devDependencies": { "@babel/core": "^7.21.3", diff --git a/binding/web/scripts/setup_test.js b/binding/web/scripts/setup_test.js index 33134657..dd9f3014 100644 --- a/binding/web/scripts/setup_test.js +++ b/binding/web/scripts/setup_test.js @@ -15,6 +15,16 @@ const paramsSourceDirectory = join( 'common', ); +const testDataSource = join( + __dirname, + '..', + '..', + '..', + 'resources', + '.test', + 'test_data.json' +); + const sourceDirectory = join( __dirname, "..", @@ -30,6 +40,8 @@ try { fs.copyFileSync(join(paramsSourceDirectory, file), join(testDirectory, file)); }); + fs.copyFileSync(testDataSource, join(testDirectory, 'test_data.json')); + fs.mkdirSync(join(fixturesDirectory, 'audio_samples'), { recursive: true }); fs.readdirSync(join(sourceDirectory, 'audio_samples')).forEach(file => { fs.copyFileSync(join(sourceDirectory, 'audio_samples', file), join(fixturesDirectory, 'audio_samples', file)); diff --git a/binding/web/src/cheetah.ts b/binding/web/src/cheetah.ts index d83eb5d8..1adffc29 100644 --- a/binding/web/src/cheetah.ts +++ b/binding/web/src/cheetah.ts @@ -452,7 +452,7 @@ export class Cheetah { // A WebAssembly page has a constant size of 64KiB. -> 1MiB ~= 16 pages const memory = new WebAssembly.Memory({ initial: 3700 }); - const memoryBufferUint8 = new Uint8Array(memory.buffer); + let memoryBufferUint8 = new Uint8Array(memory.buffer); const pvError = new PvError(); @@ -551,7 +551,7 @@ export class Cheetah { throw new CheetahErrors.CheetahOutOfMemoryError('malloc failed: Cannot allocate memory'); } - const memoryBufferView = new DataView(memory.buffer); + let memoryBufferView = new DataView(memory.buffer); const status = await pv_cheetah_init( accessKeyAddress, @@ -559,6 +559,15 @@ export class Cheetah { endpointDurationSec, (enableAutomaticPunctuation) ? 1 : 0, objectAddressAddress); + + if (memoryBufferView.buffer.byteLength === 0) { + memoryBufferView = new DataView(memory.buffer); + } + + if (memoryBufferUint8.buffer.byteLength === 0) { + memoryBufferUint8 = new Uint8Array(memory.buffer); + } + if (status !== PV_STATUS_SUCCESS) { const messageStack = await Cheetah.getMessageStack( pv_get_error_stack, @@ -599,7 +608,7 @@ export class Cheetah { frameLength: frameLength, sampleRate: sampleRate, version: version, - + objectAddress: objectAddress, inputBufferAddress: inputBufferAddress, isEndpointAddress: isEndpointAddress, @@ -625,7 +634,7 @@ export class Cheetah { memoryBufferUint8: Uint8Array, ): Promise { const status = await pv_get_error_stack(messageStackAddressAddressAddress, messageStackDepthAddress); - if (status != PvStatus.SUCCESS) { + if (status !== PvStatus.SUCCESS) { throw pvStatusToException(status, "Unable to get Cheetah error state"); } diff --git a/binding/web/test/cheetah.test.ts b/binding/web/test/cheetah.test.ts index 2cfd9fc3..2496ecc6 100644 --- a/binding/web/test/cheetah.test.ts +++ b/binding/web/test/cheetah.test.ts @@ -1,19 +1,12 @@ import { Cheetah, CheetahWorker } from "../"; -import { CheetahError } from "../dist/types/cheetah_error"; +import { CheetahError } from "../dist/types/cheetah_errors"; +import testData from './test_data.json'; // @ts-ignore import cheetahParams from "./cheetah_params"; import { PvModel } from '@picovoice/web-utils'; -const ACCESS_KEY: string = Cypress.env("ACCESS_KEY"); - -const testParam = { - language: 'en', - audio_file: 'test.wav', - transcript: 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.', - punctuations: ['.'], - error_rate: 0.025, -}; +const ACCESS_KEY: string = Cypress.env('ACCESS_KEY'); const levenshteinDistance = (words1: string[], words2: string[]) => { const res = Array.from(Array(words1.length + 1), () => new Array(words2.length + 1)); @@ -37,8 +30,10 @@ const levenshteinDistance = (words1: string[], words2: string[]) => { const wordErrorRate = (reference: string, hypothesis: string, useCER = false): number => { const splitter = (useCER) ? '' : ' '; - const ed = levenshteinDistance(reference.split(splitter), hypothesis.split(splitter)); - return ed / reference.length; + const refWords = reference.split(splitter); + const hypWords = hypothesis.split(splitter); + const ed = levenshteinDistance(refWords, hypWords); + return ed / refWords.length; }; function delay(time: number) { @@ -131,7 +126,7 @@ const runProcTest = async ( model, { enableAutomaticPunctuation: enablePunctuation, - processErrorCallback: (error: string) => { + processErrorCallback: (error: CheetahError) => { reject(error); } } @@ -166,7 +161,7 @@ const runProcTest = async ( describe("Cheetah Binding", function () { it(`should return process and flush error message stack`, async () => { - let errors: [CheetahError] = []; + let errors: CheetahError[] = []; const runProcess = () => new Promise(async resolve => { const cheetah = await Cheetah.create( @@ -285,45 +280,59 @@ describe("Cheetah Binding", function () { }); }); - it(`should be able to process (${testParam.language}) (${instanceString})`, () => { - try { - cy.getFramesFromFile(`audio_samples/${testParam.audio_file}`).then( async pcm => { - const suffix = (testParam.language === 'en') ? '' : `_${testParam.language}`; - await runProcTest( - instance, - pcm, - testParam.punctuations, - testParam.transcript, - testParam.error_rate, - { - model: { publicPath: `/test/cheetah_params${suffix}.pv`, forceWrite: true }, - enablePunctuation: false, - useCER: (testParam.language === 'ja') - }); - }); - } catch (e) { - expect(e).to.be.undefined; - } - }); + for (const testParam of testData.tests.language_tests) { + it(`should be able to process (${testParam.language}) (${instanceString})`, () => { + try { + cy.getFramesFromFile(`audio_samples/${testParam.audio_file}`).then( + async pcm => { + const suffix = + testParam.language === 'en' ? '' : `_${testParam.language}`; + await runProcTest( + instance, + pcm, + testParam.punctuations, + testParam.transcript, + testParam.error_rate, + { + model: { + publicPath: `/test/cheetah_params${suffix}.pv`, + forceWrite: true, + }, + } + ); + } + ); + } catch (e) { + expect(e).to.be.undefined; + } + }); - it(`should be able to process with punctuation (${testParam.language}) (${instanceString})`, () => { - try { - cy.getFramesFromFile(`audio_samples/${testParam.audio_file}`).then( async pcm => { - const suffix = (testParam.language === 'en') ? '' : `_${testParam.language}`; - await runProcTest( - instance, - pcm, - testParam.punctuations, - testParam.transcript, - testParam.error_rate, - { - model: { publicPath: `/test/cheetah_params${suffix}.pv`, forceWrite: true }, - useCER: (testParam.language === 'ja') - }); - }); - } catch (e) { - expect(e).to.be.undefined; - } - }); + it(`should be able to process with punctuation (${testParam.language}) (${instanceString})`, () => { + try { + cy.getFramesFromFile(`audio_samples/${testParam.audio_file}`).then( + async pcm => { + const suffix = + testParam.language === 'en' ? '' : `_${testParam.language}`; + await runProcTest( + instance, + pcm, + testParam.punctuations, + testParam.transcript, + testParam.error_rate, + { + model: { + publicPath: `/test/cheetah_params${suffix}.pv`, + forceWrite: true, + }, + enablePunctuation: true, + } + ); + } + ); + } catch (e) { + expect(e).to.be.undefined; + } + }); + } } }); diff --git a/binding/web/yarn.lock b/binding/web/yarn.lock index 666e46ec..ea08a359 100644 --- a/binding/web/yarn.lock +++ b/binding/web/yarn.lock @@ -1100,12 +1100,12 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/web-utils@=1.3.1": - version "1.3.1" - resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.3.1.tgz#d417e98604a650b54a8e03669015ecf98c2383ec" - integrity sha512-jcDqdULtTm+yJrnHDjg64hARup+Z4wNkYuXHNx6EM8+qZkweBq9UA6XJrHAlUkPnlkso4JWjaIKhz3x8vZcd3g== +"@picovoice/web-utils@=1.4.3": + version "1.4.3" + resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.3.tgz#1de0b20d6080c18d295c6df37c09d88bf7c4f555" + integrity sha512-7JN3YYsSD9Gtce6YKG3XqpX49dkeu7jTdbox7rHQA/X/Q3zxopXA9zlCKSq6EIjFbiX2iuzDKUx1XrFa3d8c0w== dependencies: - commander "^9.2.0" + commander "^10.0.1" "@rollup/plugin-babel@^6.0.3": version "6.0.4" @@ -1679,6 +1679,11 @@ combined-stream@^1.0.6, combined-stream@~1.0.6: dependencies: delayed-stream "~1.0.0" +commander@^10.0.1: + version "10.0.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06" + integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug== + commander@^2.20.0: version "2.20.3" resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" @@ -1689,11 +1694,6 @@ commander@^5.1.0: resolved "https://registry.yarnpkg.com/commander/-/commander-5.1.0.tgz#46abbd1652f8e059bddaef99bbdcb2ad9cf179ae" integrity sha512-P0CysNDQ7rtVw4QIQtm+MRxV66vKFSvlsQvGYXZWR3qFU0jlMKHZZZgw8e+8DSah4UDKMqnknRDQz+xuQXQ/Zg== -commander@^9.2.0: - version "9.5.0" - resolved "https://registry.yarnpkg.com/commander/-/commander-9.5.0.tgz#bc08d1eb5cedf7ccb797a96199d41c7bc3e60d30" - integrity sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ== - common-tags@^1.8.0: version "1.8.2" resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.2.tgz#94ebb3c076d26032745fd54face7f688ef5ac9c6" diff --git a/demo/android/CheetahDemo/.gitignore b/demo/android/CheetahDemo/.gitignore index cf444b64..f967018a 100644 --- a/demo/android/CheetahDemo/.gitignore +++ b/demo/android/CheetahDemo/.gitignore @@ -8,4 +8,4 @@ .externalNativeBuild release test_resources -cheetah_params.pv \ No newline at end of file +*.pv diff --git a/demo/android/CheetahDemo/README.md b/demo/android/CheetahDemo/README.md index b751e40f..07448207 100644 --- a/demo/android/CheetahDemo/README.md +++ b/demo/android/CheetahDemo/README.md @@ -17,14 +17,3 @@ Launch the demo on your phone using Android Studio. 1. Press the record button. 2. Start talking. The transcription will appear in the textbox above. - -## Running the Instrumented Unit Tests - -Ensure you have an Android device connected or simulator running. Then run the following from the terminal: - -```console -cd demo/android/CheetahDemo -./gradlew connectedAndroidTest -PpvTestingAccessKey="YOUR_ACCESS_KEY_HERE" -``` - -The test results are stored in `cheetah-demo-app/build/reports`. diff --git a/demo/android/CheetahDemo/cheetah-demo-app/build.gradle b/demo/android/CheetahDemo/cheetah-demo-app/build.gradle index 7b6c1bb2..1c66c70e 100644 --- a/demo/android/CheetahDemo/cheetah-demo-app/build.gradle +++ b/demo/android/CheetahDemo/cheetah-demo-app/build.gradle @@ -1,4 +1,7 @@ +import groovy.json.JsonSlurper + apply plugin: 'com.android.application' + android { compileSdkVersion defaultTargetSdkVersion @@ -17,6 +20,44 @@ android { proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' } } + + def testDataFile = file('../../../../resources/.test/test_data.json') + def parsedJson = new JsonSlurper().parseText(testDataFile.text) + def languages = [] + parsedJson.tests.language_tests.each { a -> + languages.add(a.language) + } + + flavorDimensions "language" + productFlavors { + en { + getIsDefault().set(true) + } + + languages.each { language -> + "$language" { + applicationIdSuffix ".$language" + + } + } + + all { flavor -> + delete fileTree("$projectDir/src/main/assets") { + exclude '**/.gitkeep' + } + task("${flavor.name}CopyParams", type: Copy) { + if (flavor.name != 'en') { + from("$projectDir/../../../../lib/common/") + include("cheetah_params_${flavor.name}.pv") + into("$projectDir/src/main/assets/models") + } else { + from("$projectDir/../../../../lib/common/") + include("cheetah_params.pv") + into("$projectDir/src/main/assets/models") + } + } + } + } compileOptions { sourceCompatibility JavaVersion.VERSION_1_8 targetCompatibility JavaVersion.VERSION_1_8 @@ -32,14 +73,14 @@ dependencies { implementation 'androidx.appcompat:appcompat:1.4.2' implementation 'com.google.android.material:material:1.6.1' implementation 'androidx.constraintlayout:constraintlayout:2.1.4' - implementation 'ai.picovoice:cheetah-android:2.0.0' + implementation 'ai.picovoice:cheetah-android:2.1.0' implementation 'ai.picovoice:android-voice-processor:1.0.2' } -task copyParams(type: Copy) { - from("${rootDir}/../../../lib/common") - include('cheetah_params.pv') - into("${rootDir}/cheetah-demo-app/src/main/assets") +afterEvaluate { + android.productFlavors.all { + flavor -> + tasks."merge${flavor.name.capitalize()}DebugAssets".dependsOn "${flavor.name}CopyParams" + tasks."merge${flavor.name.capitalize()}ReleaseAssets".dependsOn "${flavor.name}CopyParams" + } } - -preBuild.dependsOn(copyParams) \ No newline at end of file diff --git a/demo/android/CheetahDemo/cheetah-demo-app/src/main/java/ai/picovoice/cheetahdemo/MainActivity.java b/demo/android/CheetahDemo/cheetah-demo-app/src/main/java/ai/picovoice/cheetahdemo/MainActivity.java index dbc0f336..18eeb6ef 100644 --- a/demo/android/CheetahDemo/cheetah-demo-app/src/main/java/ai/picovoice/cheetahdemo/MainActivity.java +++ b/demo/android/CheetahDemo/cheetah-demo-app/src/main/java/ai/picovoice/cheetahdemo/MainActivity.java @@ -25,6 +25,8 @@ import androidx.appcompat.app.AppCompatActivity; import androidx.core.app.ActivityCompat; +import java.util.Objects; + import ai.picovoice.android.voiceprocessor.VoiceProcessor; import ai.picovoice.android.voiceprocessor.VoiceProcessorException; import ai.picovoice.cheetah.Cheetah; @@ -54,12 +56,20 @@ protected void onCreate(Bundle savedInstanceState) { transcriptTextView.setMovementMethod(new ScrollingMovementMethod()); try { - cheetah = new Cheetah.Builder() + Cheetah.Builder builder = new Cheetah.Builder() .setAccessKey(ACCESS_KEY) - .setModelPath(MODEL_FILE) .setEndpointDuration(1f) - .setEnableAutomaticPunctuation(true) - .build(getApplicationContext()); + .setEnableAutomaticPunctuation(true); + + String model; + if (Objects.equals(BuildConfig.FLAVOR, "en")) { + model = "cheetah_params.pv"; + } else { + model = "cheetah_params_" + BuildConfig.FLAVOR + ".pv"; + } + builder.setModelPath("models/" + model); + + cheetah = builder.build(getApplicationContext()); } catch (CheetahInvalidArgumentException e) { displayError(e.getMessage()); } catch (CheetahActivationException e) { diff --git a/demo/dotnet/CheetahDemo/CheetahDemo.csproj b/demo/dotnet/CheetahDemo/CheetahDemo.csproj index 75cc81dc..c17ede74 100644 --- a/demo/dotnet/CheetahDemo/CheetahDemo.csproj +++ b/demo/dotnet/CheetahDemo/CheetahDemo.csproj @@ -19,7 +19,7 @@ - + diff --git a/demo/flutter/copy_assets.sh b/demo/flutter/copy_assets.sh deleted file mode 100755 index ee034435..00000000 --- a/demo/flutter/copy_assets.sh +++ /dev/null @@ -1,17 +0,0 @@ -if [ ! -d "./assets/models/android" ] -then - echo "Creating Android demo asset directory..." - mkdir -p ./assets/models/android -fi - -echo "Copying Android demo model..." -cp ../../lib/common/cheetah_params.pv ./assets/models/android/cheetah_params.pv - -if [ ! -d "./assets/models/ios" ] -then - echo "Creating iOS demo asset directory..." - mkdir -p ./assets/models/ios -fi - -echo "Copying iOS demo model..." -cp ../../lib/common/cheetah_params.pv ./assets/models/ios/cheetah_params.pv diff --git a/demo/flutter/copy_test_resources.sh b/demo/flutter/copy_test_resources.sh index 68637dd4..33fffe11 100755 --- a/demo/flutter/copy_test_resources.sh +++ b/demo/flutter/copy_test_resources.sh @@ -11,4 +11,7 @@ cp ${RESOURCE_DIR}/audio_samples/*.wav ${ASSETS_DIR}/audio_samples echo "Copying test model files..." mkdir -p ${ASSETS_DIR}/model_files -cp ${LIB_DIR}/common/*.pv ${ASSETS_DIR}/model_files \ No newline at end of file +cp ${LIB_DIR}/common/*.pv ${ASSETS_DIR}/model_files + +echo "Copying test data file..." +cp ${RESOURCE_DIR}/.test/test_data.json ${ASSETS_DIR} diff --git a/demo/flutter/integration_test/app_test.dart b/demo/flutter/integration_test/app_test.dart index 30e53bb7..ab07173e 100644 --- a/demo/flutter/integration_test/app_test.dart +++ b/demo/flutter/integration_test/app_test.dart @@ -1,3 +1,4 @@ +import 'dart:convert'; import 'dart:math'; import 'dart:typed_data'; @@ -8,14 +9,6 @@ import 'package:integration_test/integration_test.dart'; import 'package:cheetah_flutter/cheetah.dart'; import 'package:cheetah_flutter/cheetah_error.dart'; -Map testParam = { - 'language': 'en', - 'audio_file': 'test.wav', - 'transcript': 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.', - 'punctuations': ['.'], - 'error_rate': 0.025, -}; - void main() { IntegrationTestWidgetsFlutterBinding.ensureInitialized(); @@ -86,6 +79,14 @@ void main() { } group('Cheetah Process Tests', () { + late dynamic testData; + + setUp(() async { + String testDataJson = + await rootBundle.loadString('assets/test_resources/test_data.json'); + testData = json.decode(testDataJson); + }); + Future runCheetahProcess( String language, String transcript, @@ -130,30 +131,36 @@ void main() { reason: "Character error rate was incorrect"); } - testWidgets('Test Process', (tester) async { - String language = testParam['language']; - String transcript = testParam['transcript']; - List punctuationsRaw = testParam['punctuations']; - List punctuations = - punctuationsRaw.map((p) => p.toString()).toList(); - double errorRate = testParam['error_rate']; - String audioFile = testParam['audio_file']; - - await runCheetahProcess( - language, transcript, punctuations, false, errorRate, audioFile); + testWidgets('Test Process all languages', (tester) async { + for (int t = 0; t < testData['tests']['language_tests'].length; t++) { + String language = testData['tests']['language_tests'][t]['language']; + String transcript = testData['tests']['language_tests'][t]['transcript']; + List punctuationsRaw = testData['tests']['language_tests'][t]['punctuations']; + List punctuations = punctuationsRaw.map((p) => p.toString()).toList(); + double errorRate = testData['tests']['language_tests'][t]['error_rate']; + String audioFile = testData['tests']['language_tests'][t]['audio_file']; + + for (int p = 0; p < punctuations.length; p++) { + transcript = transcript.replaceAll(punctuations[p], ""); + } + + await runCheetahProcess( + language, transcript, punctuations, false, errorRate, audioFile); + } }); - testWidgets('Test Process with Punctuation', (tester) async { - String language = testParam['language']; - String transcript = testParam['transcript']; - List punctuationsRaw = testParam['punctuations']; - List punctuations = - punctuationsRaw.map((p) => p.toString()).toList(); - double errorRate = testParam['error_rate']; - String audioFile = testParam['audio_file']; - - await runCheetahProcess( - language, transcript, punctuations, true, errorRate, audioFile); + testWidgets('Test Process all languages with Punctuation', (tester) async { + for (int t = 0; t < testData['tests']['language_tests'].length; t++) { + String language = testData['tests']['language_tests'][t]['language']; + String transcript = testData['tests']['language_tests'][t]['transcript']; + List punctuationsRaw = testData['tests']['language_tests'][t]['punctuations']; + List punctuations = punctuationsRaw.map((p) => p.toString()).toList(); + double errorRate = testData['tests']['language_tests'][t]['error_rate']; + String audioFile = testData['tests']['language_tests'][t]['audio_file']; + + await runCheetahProcess( + language, transcript, punctuations, true, errorRate, audioFile); + } }); }); -} \ No newline at end of file +} diff --git a/demo/flutter/ios/Podfile b/demo/flutter/ios/Podfile index 25be1f10..10f3c9b4 100644 --- a/demo/flutter/ios/Podfile +++ b/demo/flutter/ios/Podfile @@ -28,7 +28,6 @@ require File.expand_path(File.join('packages', 'flutter_tools', 'bin', 'podhelpe flutter_ios_podfile_setup target 'Runner' do - pod 'Cheetah-iOS', '~> 2.0.1' use_frameworks! use_modular_headers! diff --git a/demo/flutter/ios/Podfile.lock b/demo/flutter/ios/Podfile.lock index f10e5311..2212a980 100644 --- a/demo/flutter/ios/Podfile.lock +++ b/demo/flutter/ios/Podfile.lock @@ -1,7 +1,7 @@ PODS: - - Cheetah-iOS (2.0.1) - - cheetah_flutter (2.0.2): - - Cheetah-iOS (~> 2.0.1) + - Cheetah-iOS (2.1.0) + - cheetah_flutter (2.1.0): + - Cheetah-iOS (~> 2.1.0) - Flutter - Flutter (1.0.0) - flutter_voice_processor (1.1.2): @@ -10,23 +10,25 @@ PODS: - integration_test (0.0.1): - Flutter - ios-voice-processor (1.2.0) - - path_provider_ios (0.0.1): + - path_provider_foundation (0.0.1): - Flutter + - FlutterMacOS DEPENDENCIES: - - Cheetah-iOS (~> 2.0.1) + - Cheetah-iOS (from `https://raw.githubusercontent.com/Picovoice/cheetah/v2.1-flutter/binding/ios/Cheetah-iOS.podspec`) - cheetah_flutter (from `.symlinks/plugins/cheetah_flutter/ios`) - Flutter (from `Flutter`) - flutter_voice_processor (from `.symlinks/plugins/flutter_voice_processor/ios`) - integration_test (from `.symlinks/plugins/integration_test/ios`) - - path_provider_ios (from `.symlinks/plugins/path_provider_ios/ios`) + - path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`) SPEC REPOS: trunk: - - Cheetah-iOS - ios-voice-processor EXTERNAL SOURCES: + Cheetah-iOS: + :podspec: https://raw.githubusercontent.com/Picovoice/cheetah/v2.1-flutter/binding/ios/Cheetah-iOS.podspec cheetah_flutter: :path: ".symlinks/plugins/cheetah_flutter/ios" Flutter: @@ -35,18 +37,23 @@ EXTERNAL SOURCES: :path: ".symlinks/plugins/flutter_voice_processor/ios" integration_test: :path: ".symlinks/plugins/integration_test/ios" - path_provider_ios: - :path: ".symlinks/plugins/path_provider_ios/ios" + path_provider_foundation: + :path: ".symlinks/plugins/path_provider_foundation/darwin" + +CHECKOUT OPTIONS: + Cheetah-iOS: + :commit: 0efbc0dff9ee5ad8fcb2f419ffb95618cb3941e0 + :git: https://github.com/Picovoice/cheetah.git SPEC CHECKSUMS: - Cheetah-iOS: 2f1a662b84de3696498b9f9bbac3758b1b19ccb7 - cheetah_flutter: b269b3116f8fa18d0c2f150e080801721ae9e2c0 + Cheetah-iOS: 86718b271b5938e2813bdcd084d7641f090d9792 + cheetah_flutter: 87138473d412f0dc03e083389266a8891c40b81d Flutter: e0871f40cf51350855a761d2e70bf5af5b9b5de7 flutter_voice_processor: 9ebe2cae6612db22d20e7e7833937ea19fa3e59c integration_test: 252f60fa39af5e17c3aa9899d35d908a0721b573 ios-voice-processor: 6b5ca08962f39e434fe39dca0f483d923a3b1b97 - path_provider_ios: 7d7ce634493af4477d156294792024ec3485acd5 + path_provider_foundation: 29f094ae23ebbca9d3d0cec13889cd9060c0e943 -PODFILE CHECKSUM: 7d2a8ed6c4f8fa59f685c1b58c574f09328dcc42 +PODFILE CHECKSUM: f7a7ce4b5e489d8907d89c46a82c25ff03813df4 COCOAPODS: 1.16.2 diff --git a/demo/flutter/lib/main.dart b/demo/flutter/lib/main.dart index e3a49dd5..8c89f81d 100644 --- a/demo/flutter/lib/main.dart +++ b/demo/flutter/lib/main.dart @@ -1,5 +1,5 @@ // -// Copyright 2022-2023 Picovoice Inc. +// Copyright 2022-2024 Picovoice Inc. // // You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" // file accompanying this source. @@ -10,7 +10,8 @@ // import 'dart:async'; -import 'dart:io'; +import 'dart:convert'; + import 'package:cheetah_demo/cheetah_manager.dart'; import 'package:cheetah_flutter/cheetah_error.dart'; import 'package:flutter/material.dart'; @@ -50,17 +51,28 @@ class _MyAppState extends State { } Future initCheetah() async { - String platform = Platform.isAndroid - ? "android" - : Platform.isIOS - ? "ios" - : throw CheetahRuntimeException( - "This demo supports iOS and Android only."); - String modelPath = "assets/models/$platform/cheetah_params.pv"; + String language = ""; + try { + final paramsString = + await DefaultAssetBundle.of(context).loadString('assets/params.json'); + final params = json.decode(paramsString); + + language = params["language"]; + } catch (_) { + errorCallback(CheetahException( + "Could not find `params.json`. Ensure 'prepare_demo.dart' script was run before launching the demo.")); + return; + } + + final String suffix = language != "en" ? "_$language" : ""; + final String modelPath = "assets/models/cheetah_params$suffix.pv"; try { _cheetahManager = await CheetahManager.create( - accessKey, modelPath, transcriptCallback, errorCallback); + accessKey, + modelPath, + transcriptCallback, + errorCallback); } on CheetahActivationException { errorCallback(CheetahActivationException("AccessKey activation error.")); } on CheetahActivationLimitException { diff --git a/demo/flutter/pubspec.lock b/demo/flutter/pubspec.lock index e828452e..34cf12e4 100644 --- a/demo/flutter/pubspec.lock +++ b/demo/flutter/pubspec.lock @@ -29,10 +29,10 @@ packages: dependency: "direct main" description: name: cheetah_flutter - sha256: "80e52dfc3af4a2a1c395533ff9979b51fe6313881f902856525317e1d3dfec5a" + sha256: "3591bcd7fbb334b8dd1072ec9f8b93470a46cf4db8a8a4d8ed6d7c865fc7e6ad" url: "https://pub.dev" source: hosted - version: "2.0.2" + version: "2.1.0" clock: dependency: transitive description: @@ -61,10 +61,10 @@ packages: dependency: transitive description: name: ffi - sha256: "35d0f481d939de0d640b3db9a7aa36a52cd22054a798a73b4f50bdad5ce12678" + sha256: ed5337a5660c506388a9f012be0288fb38b49020ce2b45fe1f8b8323fe429f99 url: "https://pub.dev" source: hosted - version: "1.1.2" + version: "2.0.2" file: dependency: transitive description: @@ -171,61 +171,53 @@ packages: source: hosted version: "1.9.0" path_provider: - dependency: transitive + dependency: "direct main" description: name: path_provider - sha256: e92dee4d38a9044605cb3fb253e9b46eb9375dfcad4515d0379b44ac90797568 + sha256: "909b84830485dbcd0308edf6f7368bc8fd76afa26a270420f34cabea2a6467a0" url: "https://pub.dev" source: hosted - version: "2.0.9" + version: "2.1.0" path_provider_android: dependency: transitive description: name: path_provider_android - sha256: c69109bae02c6116bd8ac81319b13eb73dfae02ef74690d2a1a98c1ddd3aaefc + sha256: "5d44fc3314d969b84816b569070d7ace0f1dea04bd94a83f74c4829615d22ad8" url: "https://pub.dev" source: hosted - version: "2.0.11" - path_provider_ios: + version: "2.1.0" + path_provider_foundation: dependency: transitive description: - name: path_provider_ios - sha256: "038d0141ff5d08c60ed071eee2758b68c50c42a1c10066a1fb6c28ab32fac84c" + name: path_provider_foundation + sha256: "1b744d3d774e5a879bb76d6cd1ecee2ba2c6960c03b1020cd35212f6aa267ac5" url: "https://pub.dev" source: hosted - version: "2.0.7" + version: "2.3.0" path_provider_linux: dependency: transitive description: name: path_provider_linux - sha256: ffbb8cc9ed2c9ec0e4b7a541e56fd79b138e8f47d2fb86815f15358a349b3b57 + sha256: ba2b77f0c52a33db09fc8caf85b12df691bf28d983e84cf87ff6d693cfa007b3 url: "https://pub.dev" source: hosted - version: "2.1.11" - path_provider_macos: - dependency: transitive - description: - name: path_provider_macos - sha256: "0adeb313e1f2c3fc52baeeee59b0fe9c2d1f7da56fd96a9234e1702ec653a453" - url: "https://pub.dev" - source: hosted - version: "2.0.5" + version: "2.2.0" path_provider_platform_interface: dependency: transitive description: name: path_provider_platform_interface - sha256: "3dc0d51b07f85fec3746d9f4e8d31c73bb173cafa2e763f03f8df2e8d1878882" + sha256: bced5679c7df11190e1ddc35f3222c858f328fff85c3942e46e7f5589bf9eb84 url: "https://pub.dev" source: hosted - version: "2.0.3" + version: "2.1.0" path_provider_windows: dependency: transitive description: name: path_provider_windows - sha256: "366ad4e3541ea707f859e7148d4d5aba67d589d7936cee04a05c464a277eeb27" + sha256: ee0e0d164516b90ae1f970bdf29f726f1aa730d7cfc449ecc74c495378b705da url: "https://pub.dev" source: hosted - version: "2.0.5" + version: "2.2.0" platform: dependency: transitive description: @@ -238,10 +230,10 @@ packages: dependency: transitive description: name: plugin_platform_interface - sha256: "075f927ebbab4262ace8d0b283929ac5410c0ac4e7fc123c76429564facfb757" + sha256: "43798d895c929056255600343db8f049921cbec94d31ec87f1dc5c16c01935dd" url: "https://pub.dev" source: hosted - version: "2.1.2" + version: "2.1.5" process: dependency: transitive description: @@ -339,18 +331,18 @@ packages: dependency: transitive description: name: win32 - sha256: cde1e6d546d8cfd0b3c72bc6f29d980fa629d1cb107f38e2a039ca5d10d79e41 + sha256: "5a751eddf9db89b3e5f9d50c20ab8612296e4e8db69009788d6c8b060a84191c" url: "https://pub.dev" source: hosted - version: "2.4.1" + version: "4.1.4" xdg_directories: dependency: transitive description: name: xdg_directories - sha256: "7a3f37b05d989967cdddcbb571f1ea834867ae2faa29725fd085180e0883aa15" + sha256: f0c26453a2d47aa4c2570c6a033246a3fc62da2fe23c7ffdd0a7495086dc0247 url: "https://pub.dev" source: hosted - version: "1.1.0" + version: "1.0.2" sdks: dart: ">=3.3.0 <4.0.0" flutter: ">=3.18.0-18.0.pre.54" diff --git a/demo/flutter/pubspec.yaml b/demo/flutter/pubspec.yaml index f1370a1e..0f1b99b9 100644 --- a/demo/flutter/pubspec.yaml +++ b/demo/flutter/pubspec.yaml @@ -3,7 +3,7 @@ description: Demonstrates how to use the cheetah plugin. publish_to: 'none' -version: 2.0.2 +version: 2.1.0 environment: sdk: ">=2.18.0 <4.0.0" @@ -13,8 +13,9 @@ dependencies: flutter: sdk: flutter + path_provider: ^2.0.9 flutter_voice_processor: 1.1.2 - cheetah_flutter: 2.0.2 + cheetah_flutter: ^2.1.0 dev_dependencies: integration_test: @@ -26,8 +27,8 @@ dev_dependencies: flutter: uses-material-design: true assets: - - assets/models/ios/ - - assets/models/android/ + - assets/ + - assets/models/ - assets/test_resources/ - assets/test_resources/audio_samples/ - assets/test_resources/model_files/ diff --git a/demo/flutter/scripts/prepare_demo.dart b/demo/flutter/scripts/prepare_demo.dart new file mode 100644 index 00000000..c62752d8 --- /dev/null +++ b/demo/flutter/scripts/prepare_demo.dart @@ -0,0 +1,58 @@ +import "package:path/path.dart"; + +import "dart:convert"; +import "dart:io"; + +final String resourcePath = + join(dirname(Platform.script.path), "..", "..", "..", "resources"); +final String libPath = + join(dirname(Platform.script.path), "..", "..", "..", "lib"); +final String testDataPath = join(resourcePath, ".test", "test_data.json"); + +final String assetsPath = join(dirname(Platform.script.path), "..", "assets"); +final String modelsPath = join(assetsPath, "models"); + +Future readJsonFile(String filePath) async { + var input = await File(filePath).readAsString(); + var map = jsonDecode(input); + return map; +} + +void main(List arguments) async { + var testData = await readJsonFile(testDataPath); + List availableLanguages = List.from( + testData["tests"]["language_tests"].map((x) => x["language"]).toList()); + + if (arguments.isEmpty) { + print( + "Choose the language you would like to run the demo in with 'dart scripts/prepare_demo.dart [language]'.\n" + "Available languages are ${availableLanguages.join(", ")}."); + exit(1); + } + + String language = arguments[0]; + String suffix = (language == "en") ? "" : "_$language"; + if (!availableLanguages.contains(language)) { + print("'$language' is not an available demo language.\n" + "Available languages are ${availableLanguages.join(", ")}."); + exit(1); + } + + var modelDir = Directory(modelsPath); + if (modelDir.existsSync()) { + modelDir.deleteSync(recursive: true); + } + modelDir.createSync(recursive: true); + + var params = {}; + params["language"] = language; + + File model = File(join(libPath, "common", "cheetah_params$suffix.pv")); + model.copySync(join(modelDir.path, basename(model.path))); + + var encoded = json.encode(params); + File f = File(join(assetsPath, "params.json")); + f.writeAsStringSync(encoded); + + print("Demo is ready to run!"); +} diff --git a/demo/go/go.mod b/demo/go/go.mod index e0b16adc..1876b718 100644 --- a/demo/go/go.mod +++ b/demo/go/go.mod @@ -3,7 +3,7 @@ module cheetahdemo go 1.16 require ( - github.com/Picovoice/cheetah/binding/go/v2 v2.0.2 + github.com/Picovoice/cheetah/binding/go/v2 v2.1.0 github.com/Picovoice/pvrecorder/binding/go v1.2.3 github.com/go-audio/audio v1.0.0 github.com/go-audio/wav v1.0.0 diff --git a/demo/go/go.sum b/demo/go/go.sum index 42fedce4..bdaaa246 100644 --- a/demo/go/go.sum +++ b/demo/go/go.sum @@ -1,11 +1,7 @@ -github.com/Picovoice/cheetah/binding/go/v2 v2.0.2 h1:ClgXf3jVR87MiWD8NMX/n+cgymvH8rnqWu8iLkIEAmU= -github.com/Picovoice/cheetah/binding/go/v2 v2.0.2/go.mod h1:rctztGHGzGPelptxism4IBquo7plpZKgSmQ5SNYFhv8= +github.com/Picovoice/cheetah/binding/go/v2 v2.1.0 h1:7T3H0AxxYym9FFTBE/0ALlqsH88kZLYlhfFGQU/sqfc= +github.com/Picovoice/cheetah/binding/go/v2 v2.1.0/go.mod h1:BKQPVuV+8xxEBLuCdwTxgj4bVsUTiuQg4YJ7oJt9uh4= github.com/Picovoice/pvrecorder/binding/go v1.2.3 h1:VU9cf4g3h7pazecoEsPINi21AHWgxIsn7jFWdRDpWZE= github.com/Picovoice/pvrecorder/binding/go v1.2.3/go.mod h1:gQdvBAjoKmRxMFh8W9cVKWcqHsWvu+d13sCPVFm7dhg= -github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8= -github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo= -github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= -github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= diff --git a/demo/java/build.gradle b/demo/java/build.gradle index 6b91c8f0..038962d7 100644 --- a/demo/java/build.gradle +++ b/demo/java/build.gradle @@ -15,14 +15,14 @@ sourceSets { } dependencies { - implementation 'ai.picovoice:cheetah-java:2.0.2' + implementation 'ai.picovoice:cheetah-java:2.1.0' implementation 'commons-cli:commons-cli:1.4' } jar { manifest { attributes "Main-Class": "ai.picovoice.cheetahdemo.MicDemo", - "Class-Path": "cheetah-2.0.2.jar;commons-cli-1.4.jar" + "Class-Path": "cheetah-2.1.0.jar;commons-cli-1.4.jar" } from sourceSets.main.output exclude "**/FileDemo.class" @@ -33,7 +33,7 @@ jar { task fileDemoJar(type: Jar) { manifest { attributes "Main-Class": "ai.picovoice.cheetahdemo.FileDemo", - "Class-Path": "cheetah-2.0.2.jar;commons-cli-1.4.jar" + "Class-Path": "cheetah-2.1.0.jar;commons-cli-1.4.jar" } from sourceSets.main.output exclude "**/MicDemo.class" diff --git a/demo/nodejs/package.json b/demo/nodejs/package.json index abdba311..8c3cf70e 100644 --- a/demo/nodejs/package.json +++ b/demo/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/cheetah-node-demo", - "version": "2.0.3", + "version": "2.1.0", "description": "Picovoice Cheetah Node.js file-based and microphone demos", "scripts": { "file": "node file.js", @@ -16,7 +16,7 @@ "author": "Picovoice Inc.", "license": "Apache-2.0", "dependencies": { - "@picovoice/cheetah-node": "=2.0.3", + "@picovoice/cheetah-node": "~2.1.0", "@picovoice/pvrecorder-node": "^1.2.4", "commander": "^6.1.0", "readline": "^1.3.0", diff --git a/demo/nodejs/yarn.lock b/demo/nodejs/yarn.lock index 03b1ed68..87356cfc 100644 --- a/demo/nodejs/yarn.lock +++ b/demo/nodejs/yarn.lock @@ -2,10 +2,10 @@ # yarn lockfile v1 -"@picovoice/cheetah-node@=2.0.3": - version "2.0.3" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.0.3.tgz#6b426ded58c2cf21e82a3282582f46698f3ddc32" - integrity sha512-BqcDV72PhjE41GQohlnfu/1xr52QTSMlpo504tTY+JgUHcoHnwT0jEp0AbpZgdXLIexYgH/dzUt8Ls12yXyCgQ== +"@picovoice/cheetah-node@~2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.1.0.tgz#d68a86b55bc21bf586a23f4c33563dce58b18385" + integrity sha512-B63Aqmjs2berQ/YVEooIJTSQVXrJ/naz5YG5ZG+mlyfahYaOBbudXAdQP4FxTiQBlGLNhQSqZjllgoEs394n1Q== "@picovoice/pvrecorder-node@^1.2.4": version "1.2.4" diff --git a/demo/python/requirements.txt b/demo/python/requirements.txt index c9d2486b..71bb9cbc 100644 --- a/demo/python/requirements.txt +++ b/demo/python/requirements.txt @@ -1,2 +1,2 @@ -pvcheetah==2.0.2 +pvcheetah==2.1.0 pvrecorder==1.2.3 diff --git a/demo/python/setup.py b/demo/python/setup.py index 6bc828ec..0e240324 100644 --- a/demo/python/setup.py +++ b/demo/python/setup.py @@ -28,7 +28,7 @@ setuptools.setup( name="pvcheetahdemo", - version="2.0.2", + version="2.1.0", author="Picovoice", author_email="hello@picovoice.ai", description="Cheetah speech-to-text engine demos", @@ -36,7 +36,7 @@ long_description_content_type="text/markdown", url="https://github.com/Picovoice/cheetah", packages=["pvcheetahdemo"], - install_requires=["pvcheetah==2.0.2", "pvrecorder==1.2.3"], + install_requires=["pvcheetah==2.1.0", "pvrecorder==1.2.3"], include_package_data=True, classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/demo/react-native/ios/Podfile.lock b/demo/react-native/ios/Podfile.lock index d3a4d73b..f392240d 100644 --- a/demo/react-native/ios/Podfile.lock +++ b/demo/react-native/ios/Podfile.lock @@ -1,8 +1,8 @@ PODS: - boost (1.76.0) - - Cheetah-iOS (2.0.1) - - cheetah-react-native (2.0.2): - - Cheetah-iOS (~> 2.0.1) + - Cheetah-iOS (2.1.0) + - cheetah-react-native (2.1.0): + - Cheetah-iOS (~> 2.1.0) - React - DoubleConversion (1.1.6) - FBLazyVector (0.68.7) @@ -299,6 +299,7 @@ PODS: DEPENDENCIES: - boost (from `../node_modules/react-native/third-party-podspecs/boost.podspec`) + - Cheetah-iOS (from `https://raw.githubusercontent.com/Picovoice/cheetah/v2.1-react-native/binding/ios/Cheetah-iOS.podspec`) - "cheetah-react-native (from `../node_modules/@picovoice/cheetah-react-native`)" - DoubleConversion (from `../node_modules/react-native/third-party-podspecs/DoubleConversion.podspec`) - FBLazyVector (from `../node_modules/react-native/Libraries/FBLazyVector`) @@ -338,13 +339,14 @@ DEPENDENCIES: SPEC REPOS: trunk: - - Cheetah-iOS - fmt - ios-voice-processor EXTERNAL SOURCES: boost: :podspec: "../node_modules/react-native/third-party-podspecs/boost.podspec" + Cheetah-iOS: + :podspec: https://raw.githubusercontent.com/Picovoice/cheetah/v2.1-react-native/binding/ios/Cheetah-iOS.podspec cheetah-react-native: :path: "../node_modules/@picovoice/cheetah-react-native" DoubleConversion: @@ -414,10 +416,15 @@ EXTERNAL SOURCES: Yoga: :path: "../node_modules/react-native/ReactCommon/yoga" +CHECKOUT OPTIONS: + Cheetah-iOS: + :commit: b07ef50da9058941d268aee19932815f9bd471ba + :git: https://github.com/Picovoice/cheetah.git + SPEC CHECKSUMS: boost: a7c83b31436843459a1961bfd74b96033dc77234 - Cheetah-iOS: 2f1a662b84de3696498b9f9bbac3758b1b19ccb7 - cheetah-react-native: 5ea29790b39efe5c2383f5d7f257e083cbb71d22 + Cheetah-iOS: 3ad9ac1bd4f880a557ce7fe6fbe1fb1f0a7402f3 + cheetah-react-native: dd709d4c52c07bf45828003b58fe4072783e4320 DoubleConversion: 831926d9b8bf8166fd87886c4abab286c2422662 FBLazyVector: 63b89dc85804d5817261f56dc4cfb43a9b6d57f5 FBReactNativeSpec: 1fa200a9862d9369a53b6fddbbfcdc22bab24062 @@ -454,6 +461,6 @@ SPEC CHECKSUMS: RNFS: 4ac0f0ea233904cb798630b3c077808c06931688 Yoga: 0bc4b37c3b8a345336ff601e2cf7d9704bab7e93 -PODFILE CHECKSUM: 6f7cd523f67fd219238da59043721f083653eeba +PODFILE CHECKSUM: dea7c62235d55b5514625b6be593b3f4eb288d9a COCOAPODS: 1.16.2 diff --git a/demo/react-native/package.json b/demo/react-native/package.json index d3e3bac6..3a9e5f4e 100644 --- a/demo/react-native/package.json +++ b/demo/react-native/package.json @@ -1,6 +1,6 @@ { "name": "cheetah-react-native-demo", - "version": "2.0.2", + "version": "2.1.0", "private": true, "scripts": { "start": "react-native start", @@ -16,7 +16,7 @@ "postinstall": "node copy.js" }, "dependencies": { - "@picovoice/cheetah-react-native": "2.0.2", + "@picovoice/cheetah-react-native": "=2.1.0", "@picovoice/react-native-voice-processor": "1.2.3", "@react-native-picker/picker": "^1.9.2", "react": "^17.0.2", diff --git a/demo/react-native/yarn.lock b/demo/react-native/yarn.lock index 89dc4ee6..0f6ce3b5 100644 --- a/demo/react-native/yarn.lock +++ b/demo/react-native/yarn.lock @@ -1090,10 +1090,10 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/cheetah-react-native@2.0.2": - version "2.0.2" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-react-native/-/cheetah-react-native-2.0.2.tgz#e82445cb61becb3a8f46078abe02885e98e17901" - integrity sha512-ciJj859CEBC90ZCj6fI6L0AwEgp7Sjlf+fn2hpGQJQglYtqqfeslWqggg2XAs9p1nbJIFGc9kipk6KwMU/LsaQ== +"@picovoice/cheetah-react-native@=2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-react-native/-/cheetah-react-native-2.1.0.tgz#ec0e49b3fb7ecc40ce80a8da68b59757e1616a07" + integrity sha512-yF62GbapMDP7T7MJqlYWDtdwWTakm1eRLUGdqYtVOzrUgRWiTdgJB4z7zkpAQoqQeVsXU6sj0JfPS44weQCndQ== "@picovoice/react-native-voice-processor@1.2.3": version "1.2.3" diff --git a/demo/react/README.md b/demo/react/README.md index 26ba6fc7..89fc4c29 100644 --- a/demo/react/README.md +++ b/demo/react/README.md @@ -16,22 +16,20 @@ Signup or Login to [Picovoice Console](https://console.picovoice.ai/) to get you ## Install and Run -Use `yarn` or `npm` to install the dependencies. Run `start` to start the demo. +Use `yarn` or `npm` to install the dependencies, and the `start` script with a language code +to start a local web server hosting the demo in the language of your choice (e.g. `de` -> German, `ko` -> Korean). +To see a list of available languages, run `start` without a language code. ```console yarn -yarn start +yarn start ${LANGUAGE} ``` (or) ```console npm install -npm run start +npm run start ${LANGUAGE} ``` Open `http://localhost:3000` to view it in the browser. - -The page will reload if you make edits. You will also see any lint errors in the console. - -Wait until Cheetah has initialized. Start recording audio to see the real-time transcription. diff --git a/demo/react/package.json b/demo/react/package.json index 6f0ddd24..a78cf60b 100644 --- a/demo/react/package.json +++ b/demo/react/package.json @@ -1,10 +1,10 @@ { "name": "cheetah-react-demo", - "version": "2.0.0", + "version": "2.1.0", "private": true, "description": "Cheetah React demo (made with Create React App)", "dependencies": { - "@picovoice/cheetah-react": "~2.0.0", + "@picovoice/cheetah-react": "~2.1.0", "@picovoice/web-voice-processor": "~4.0.8", "@types/node": "^18.11.9", "@types/react": "^18.0.17", diff --git a/demo/react/scripts/run_demo.js b/demo/react/scripts/run_demo.js index e1037652..18a4bb26 100644 --- a/demo/react/scripts/run_demo.js +++ b/demo/react/scripts/run_demo.js @@ -1,8 +1,33 @@ const child_process = require("child_process"); const fs = require("fs"); const path = require("path"); +const testData = require("../../../resources/.test/test_data.json"); -const args = process.argv.slice(2, 3); +const availableLanguages = testData["tests"]["language_tests"].map( + (x) => x["language"] +); + +const args = process.argv.slice(2, -1); +const language = process.argv.slice(-1)[0]; +if (!language) { + console.error( + `Choose the language you would like to run the demo in with "yarn start [language]".\nAvailable languages are ${availableLanguages.join( + ", " + )}` + ); + process.exit(1); +} + +if (!availableLanguages.includes(language)) { + console.error( + `'${language}' is not an available demo language.\nAvailable languages are ${availableLanguages.join( + ", " + )}` + ); + process.exit(1); +} + +const suffix = language === "en" ? "" : `_${language}`; const rootDir = path.join(__dirname, "..", "..", ".."); const libDirectory = path.join(__dirname, "..", "src", "lib"); @@ -16,7 +41,7 @@ if (fs.existsSync(publicDirectory)) { } const modelDir = path.join(rootDir, "lib", "common"); -const modelName = "cheetah_params.pv"; +const modelName = `cheetah_params${suffix}.pv`; fs.copyFileSync( path.join(modelDir, modelName), path.join(publicDirectory, modelName) @@ -37,7 +62,7 @@ fs.writeFileSync( const command = process.platform === "win32" ? "npx.cmd" : "npx"; -child_process.execSync(`${command} react-scripts ${args.join(" ")}`, { +child_process.execSync(`${command} react-scripts ${args.join(" ")}`, { shell: true, stdio: 'inherit' }); diff --git a/demo/react/yarn.lock b/demo/react/yarn.lock index 7a32b3ca..5da611d1 100644 --- a/demo/react/yarn.lock +++ b/demo/react/yarn.lock @@ -1616,19 +1616,19 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/cheetah-react@~2.0.0": - version "2.0.0" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-react/-/cheetah-react-2.0.0.tgz#00bdd9377f2d141d07e6a08a9d57183cda7c9090" - integrity sha512-t6x3RVzmeUpm4+r/0+b6VMJhmzSVuoyXgvquU/K4HpM0gsJIfIruZOzTOEDsnVae/rmw61v98spxT9NjorZZ8g== +"@picovoice/cheetah-react@~2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-react/-/cheetah-react-2.1.0.tgz#69647960d792ff3d076ed39ac31f6a055a6ba521" + integrity sha512-BVW4wZyJo0wy28vrFdgLhiIFCt9etUZmwdWXwkdLI4Y1wZ4GNQuN3iimL+xlL9tmzgAzcN3OuJbGHoSndc5CBw== dependencies: - "@picovoice/cheetah-web" "=2.0.0" + "@picovoice/cheetah-web" "~2.1.0" -"@picovoice/cheetah-web@=2.0.0": - version "2.0.0" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-web/-/cheetah-web-2.0.0.tgz#d4415c25e324726356f979bed3761a94e884198e" - integrity sha512-WqxHUznNS7Rf8XfJCp0m0l+xeYFDSFhzOTg+b2DJn06x1slhpJA4CiK4egiH7FzhHiJtvqjLF0dO14LA8e1Gpg== +"@picovoice/cheetah-web@~2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-web/-/cheetah-web-2.1.0.tgz#d75e579460ca7d3b308ff4d56c82b0171f19ff30" + integrity sha512-LMJC8wRu6vakDdzuL9flWtgmrYk/6fodBwX18cNSqmXgRaVYNn7/jjW+N7XgA5UQrbGLcNBipIdsEBPivuzCBA== dependencies: - "@picovoice/web-utils" "=1.3.1" + "@picovoice/web-utils" "=1.4.3" "@picovoice/web-utils@=1.3.1": version "1.3.1" @@ -1637,6 +1637,13 @@ dependencies: commander "^9.2.0" +"@picovoice/web-utils@=1.4.3": + version "1.4.3" + resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.3.tgz#1de0b20d6080c18d295c6df37c09d88bf7c4f555" + integrity sha512-7JN3YYsSD9Gtce6YKG3XqpX49dkeu7jTdbox7rHQA/X/Q3zxopXA9zlCKSq6EIjFbiX2iuzDKUx1XrFa3d8c0w== + dependencies: + commander "^10.0.1" + "@picovoice/web-voice-processor@~4.0.8": version "4.0.8" resolved "https://registry.npmjs.org/@picovoice/web-voice-processor/-/web-voice-processor-4.0.8.tgz" @@ -3209,6 +3216,11 @@ combined-stream@^1.0.8: dependencies: delayed-stream "~1.0.0" +commander@^10.0.1: + version "10.0.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06" + integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug== + commander@^2.20.0: version "2.20.3" resolved "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz" diff --git a/demo/rust/filedemo/Cargo.lock b/demo/rust/filedemo/Cargo.lock index 1993cf3d..a33cf276 100644 --- a/demo/rust/filedemo/Cargo.lock +++ b/demo/rust/filedemo/Cargo.lock @@ -109,12 +109,12 @@ checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" [[package]] name = "libloading" -version = "0.7.0" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "winapi", + "windows-targets", ] [[package]] @@ -125,9 +125,9 @@ checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4" [[package]] name = "pv_cheetah" -version = "2.0.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e283da7d704c5b16970fe55cf0443df1ab8fe2aa3765cbf8a83c5e2cdefe8b7d" +checksum = "253fb1d50386b0bd41a42208e74d303d969cf53e2776df29551d1d3df89dea5d" dependencies = [ "libc", "libloading", @@ -135,7 +135,7 @@ dependencies = [ [[package]] name = "pv_cheetah_filedemo" -version = "2.0.1" +version = "2.1.0" dependencies = [ "clap", "hound", @@ -194,3 +194,67 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/demo/rust/filedemo/Cargo.toml b/demo/rust/filedemo/Cargo.toml index 81793cbd..a30847a9 100644 --- a/demo/rust/filedemo/Cargo.toml +++ b/demo/rust/filedemo/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "pv_cheetah_filedemo" -version = "2.0.1" +version = "2.1.0" edition = "2018" [dependencies] clap = "3.2.16" hound = "3.4.0" itertools = "0.10.3" -pv_cheetah = "=2.0.3" +pv_cheetah = "=2.1.0" diff --git a/demo/rust/micdemo/Cargo.toml b/demo/rust/micdemo/Cargo.toml index 64538695..6f7058d9 100644 --- a/demo/rust/micdemo/Cargo.toml +++ b/demo/rust/micdemo/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pv_cheetah_micdemo" -version = "2.0.1" +version = "2.1.0" edition = "2018" [dependencies] @@ -9,5 +9,5 @@ clap = "3.2.16" ctrlc = "3.2.2" hound = "3.4.0" itertools = "0.10.3" -pv_cheetah = "=2.0.3" +pv_cheetah = "=2.1.0" pv_recorder = "=1.2.3" diff --git a/demo/web/.gitignore b/demo/web/.gitignore index bdb2b261..20e5a64c 100644 --- a/demo/web/.gitignore +++ b/demo/web/.gitignore @@ -4,4 +4,4 @@ node_modules dist/ *.log .DS_Store -cheetah_params.js +models/* diff --git a/demo/web/README.md b/demo/web/README.md index 38cef64b..d08158f9 100644 --- a/demo/web/README.md +++ b/demo/web/README.md @@ -10,18 +10,20 @@ Signup or Login to [Picovoice Console](https://console.picovoice.ai/) to get you ## Install & run -Use `yarn` or `npm` to install the dependencies, and the `start` script to start a local web server hosting the demo. +Use `yarn` or `npm` to install the dependencies, and the `start` script with a language code +to start a local web server hosting the demo in the language of your choice (e.g. `sv` -> Swedish, `zh` -> Mandarin). +To see a list of available languages, run `start` without a language code. ```console yarn -yarn start +yarn start ${LANGUAGE} ``` (or) ```console npm install -npm run start +npm run start ${LANGUAGE} ``` Open `localhost:5000` in your web browser, as hinted at in the output: @@ -32,4 +34,4 @@ Available on: Hit CTRL-C to stop the server ``` -Wait until Cheetah and the WebVoiceProcessor have initialized. Say any phrase and Cheetah will start transcribing in real time. +Wait until Cheetah and the WebVoiceProcessor have initialized. Choose an audio file or record audio to transcribe. diff --git a/demo/web/index.html b/demo/web/index.html index 66800d85..50eea5c6 100644 --- a/demo/web/index.html +++ b/demo/web/index.html @@ -3,7 +3,7 @@ - +