diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index d07a15a71b..e9a89aaa44 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -208,6 +208,12 @@ Voice and Tone:
 - Formatting standards: **Bold text** for UI elements (buttons, menu items, field names), *Italic text* for emphasis and new terms, `Code formatting` for file names, commands, code elements.
 - Use shortcodes for common pitfalls, warnings, important notes.
 
+### Code fence integrity
+
+- Every fenced code block opened with triple backticks (```) MUST be explicitly closed with matching triple backticks before any non-code content resumes
+- Never generate unterminated or partial code fences
+- Do not rely on implicit closure, indentation, or surrounding formatting to end a code block
+
 ## Arm naming and architecture terms
 
 - Use Arm for the brand in prose (for example, "Arm processors", "Arm servers").
diff --git a/.github/workflows/test-lp.yml b/.github/workflows/test-lp.yml
index 4ed6b1d352..5abdc17914 100644
--- a/.github/workflows/test-lp.yml
+++ b/.github/workflows/test-lp.yml
@@ -31,7 +31,7 @@ jobs:
 
           tmpfile=$(mktemp)
 
-          git diff --name-only origin/${{ github.base_ref }}...HEAD |
+          git diff --name-only --diff-filter=d origin/${{ github.base_ref }}...HEAD |
             grep '^content/' |
             while read -r path; do
               name=$(basename "$path")
diff --git a/.wordlist.txt b/.wordlist.txt
index 3a2774a56f..f8afdacc88 100644
--- a/.wordlist.txt
+++ b/.wordlist.txt
@@ -5493,4 +5493,102 @@ preselected
 reconfiguring
 torchscript
 xxxxxx
-Modescope
\ No newline at end of file
+Modescope
+
+APerf's
+AdamW
+AndroidX
+AssetManager
+BGA
+BatchNormalization
+BoardRenderer
+CVVXMAB
+DigitNet
+EPs
+IC
+ImageFolder
+MAB
+MIMX
+NXP's
+Netron
+ONNX's
+OTP
+Opset
+Opsets
+Otsu
+PGDATA
+PSK
+PrepareData
+PrepareModelForAndroid
+QDQ
+QuantizeModel
+Relu
+RemainAfterExit
+RunSudokuProcessor
+Runlevel
+Runtime's
+SBBM
+SSID
+SSIDs
+ScrollView
+SessionOptions
+SmallNet
+SudokuEngine
+SudokuProcessor
+SudokuSolver
+SudokuSolverOnnx
+TCMalloc
+VVX
+WantedBy
+YOLOv
+accuracies
+acyclic
+androidx
+aug
+bitnami
+claude
+dataloaders
+deployability
+deterministically
+digitnet
+dirs
+drawable
+drawables
+flto
+frdm
+hpa
+httproute
+ifconfig
+initDebug
+interprocedural
+mgmt
+misrecognized
+mlan
+moal
+modprobe
+netdev
+netron
+oneshot
+onnxscript
+opset
+opsets
+picocom
+psk
+pvc
+reimplemented
+scarthgap
+serviceaccount
+silabs
+smallnet
+sourcecode
+ssid
+stepwise
+sudoku
+sudokusolveronnx
+tensor's
+thumbdrive
+ttyLP
+udhcpc
+usb
+vcp
+Aperf's
\ No newline at end of file
diff --git a/content/install-guides/_images/about-wpa.png b/content/install-guides/_images/about-wpa.png
deleted file mode 100644
index 7cd5131059..0000000000
Binary files a/content/install-guides/_images/about-wpa.png and /dev/null differ
diff --git a/content/install-guides/_images/aperf.webp b/content/install-guides/_images/aperf.webp
deleted file mode 100644
index 85f67ed286..0000000000
Binary files a/content/install-guides/_images/aperf.webp and /dev/null differ
diff --git a/content/install-guides/_images/aperf0.webp b/content/install-guides/_images/aperf0.webp
deleted file mode 100644
index 4db30ede31..0000000000
Binary files a/content/install-guides/_images/aperf0.webp and /dev/null differ
diff --git a/content/install-guides/_images/aperf_report_aligned_graphs.png b/content/install-guides/_images/aperf_report_aligned_graphs.png
new file mode 100644
index 0000000000..98306da434
Binary files /dev/null and b/content/install-guides/_images/aperf_report_aligned_graphs.png differ
diff --git a/content/install-guides/_images/aperf_report_help_panel.png b/content/install-guides/_images/aperf_report_help_panel.png
new file mode 100644
index 0000000000..79a3c3a435
Binary files /dev/null and b/content/install-guides/_images/aperf_report_help_panel.png differ
diff --git a/content/install-guides/_images/aperf_report_home.png b/content/install-guides/_images/aperf_report_home.png
new file mode 100644
index 0000000000..160cd651ad
Binary files /dev/null and b/content/install-guides/_images/aperf_report_home.png differ
diff --git a/content/install-guides/_images/aperf_report_statistical_findings.png b/content/install-guides/_images/aperf_report_statistical_findings.png
new file mode 100644
index 0000000000..09d9780fcb
Binary files /dev/null and b/content/install-guides/_images/aperf_report_statistical_findings.png differ
diff --git a/content/install-guides/_images/ChromeOSpf.png b/content/install-guides/_images/chromeospf.png
similarity index 100%
rename from content/install-guides/_images/ChromeOSpf.png
rename to content/install-guides/_images/chromeospf.png
diff --git a/content/install-guides/_images/download-win-armpl_23.10.png b/content/install-guides/_images/download-win-armpl_23.10.png
deleted file mode 100644
index 2b7df887af..0000000000
Binary files a/content/install-guides/_images/download-win-armpl_23.10.png and /dev/null differ
diff --git a/content/install-guides/_images/download_ecosys_fvp.png b/content/install-guides/_images/download_ecosys_fvp.png
deleted file mode 100644
index 4489dbb54b..0000000000
Binary files a/content/install-guides/_images/download_ecosys_fvp.png and /dev/null differ
diff --git a/content/install-guides/_images/MCUXpresso_Installer.png b/content/install-guides/_images/mcuxpresso_installer.png
similarity index 100%
rename from content/install-guides/_images/MCUXpresso_Installer.png
rename to content/install-guides/_images/mcuxpresso_installer.png
diff --git a/content/install-guides/_images/win-sys-path.png b/content/install-guides/_images/win-sys-path.png
deleted file mode 100644
index 40810ef574..0000000000
Binary files a/content/install-guides/_images/win-sys-path.png and /dev/null differ
diff --git a/content/install-guides/_images/windows-sys-env_23.10.png b/content/install-guides/_images/windows-sys-env_23.10.png
deleted file mode 100644
index 4153d46fb9..0000000000
Binary files a/content/install-guides/_images/windows-sys-env_23.10.png and /dev/null differ
diff --git a/content/install-guides/_images/windows-sys-prop.png b/content/install-guides/_images/windows-sys-prop.png
deleted file mode 100644
index e504711317..0000000000
Binary files a/content/install-guides/_images/windows-sys-prop.png and /dev/null differ
diff --git a/content/install-guides/aperf.md b/content/install-guides/aperf.md
index 9ad8337985..7e766fcb3c 100644
--- a/content/install-guides/aperf.md
+++ b/content/install-guides/aperf.md
@@ -8,24 +8,20 @@ official_docs: https://github.com/aws/aperf
 test_images:
 - ubuntu:latest
 test_maintenance: true
-title: AWS Perf (APerf)
+title: APerf
 tool_install: true
 weight: 1
 ---
 
-APerf (AWS Perf) is an open source command line performance analysis tool which saves time by collecting information which is normally collected by multiple tools such as `perf`, `sysstat`, and `sysctl`.
+APerf is an open source command line tool maintained by AWS. It helps you monitor and debug performance on Linux systems by collecting a wide range of performance-related system metrics and data that traditionally require multiple tools, such as `perf`, `sysstat`, and `sysctl`.
 
-APerf was created by AWS to help with Linux performance analysis.
+APerf collects system data and saves it in an archive. It then generates a static HTML report from one or more archives to visualize the data. When you generate the report, APerf analyzes the data to automatically detect potential performance issues. You can open the report in a browser to view all collected data and analytical findings.
 
-In addition to the CLI, APerf includes an HTML view to visualize the collected data.
+## Install APerf
 
-## What should I do before I begin installing APerf?
+This guide provides a quick solution to install APerf on Arm Linux and get started.
 
-APerf works on Linux, and is available as a single binary.
-
-APerf works best if `perf` is installed. Refer to the [Perf for Linux on Arm](/install-guides/perf) install guide for instructions.
-
-This article provides a quick solution to install APerf on Arm Linux and get started.
+## Before you begin
 
 Confirm you are using an Arm machine by running:
 
@@ -39,37 +35,52 @@ The output should be:
 aarch64
 ```
 
-If you see a different result, you are not using an Arm computer running 64-bit Linux.
+{{% notice Note %}} If you see a different result, you are not using an Arm computer running 64-bit Linux. APerf can only run on Linux.{{% /notice %}}
+
+To allow APerf to collect PMU (Processor Monitoring Unit) metrics without sudo or root permissions, set `/proc/sys/kernel/perf_event_paranoid` to -1:
+
+```bash
+sudo sysctl -w kernel.perf_event_paranoid=-1
+```
+
+To use APerf's CPU profiling option (`--profile`), install the `perf` binary. See the [Perf for Linux on Arm](/install-guides/perf) install guide for instructions. 
 
-## How do I download and install APerf?
+For kernel address visibility, set `/proc/sys/kernel/kptr_restrict` to 0:
 
-The easiest way to install APerf is to download a release from GitHub, extract it, and setup your `PATH` environment variable or copy the executable to a directory already in your search path.
+```bash
+sudo sysctl -w kernel.kptr_restrict=0
+```
+
+To use APerf's Java profiling option (`--profile-java`), install the [async-profiler](https://github.com/async-profiler/async-profiler) tool.
+
+## Download and install APerf
+The easiest way to install APerf is to download a release from GitHub and extract it.
 
-Visit the [releases page](https://github.com/aws/aperf/releases/) to see a list of available releases.
+Visit the [releases page](https://github.com/aws/aperf/releases/) to see available releases.
 
-You can also download a release from the command line:
+You can download a release from the command line:
 
 ```bash { target="ubuntu:latest" }
-wget https://github.com/aws/aperf/releases/download/v0.1.15-alpha/aperf-v0.1.15-alpha-aarch64.tar.gz
+wget https://github.com/aws/aperf/releases/download/v1.0.0/aperf-v1.0.0-aarch64.tar.gz
 ```
 
 Extract the release:
 
 ```bash { target="ubuntu:latest" }
-tar xvfz aperf-v0.1.15-alpha-aarch64.tar.gz
+tar xvfz aperf-v1.0.0-aarch64.tar.gz
 ```
 
 Add the path to `aperf` in your `.bashrc` file.
 
 ```console
-echo 'export PATH="$PATH:$HOME/aperf-v0.1.15-alpha-aarch64"' >> ~/.bashrc
+echo 'export PATH="$PATH:$HOME/aperf-v1.0.0-aarch64"' >> ~/.bashrc
 source ~/.bashrc
 ```
 
 Alternatively, you can copy the `aperf` executable to a directory already in your search path.
 
 ```bash { target="ubuntu:latest" }
-sudo cp aperf-v0.1.15-alpha-aarch64/aperf /usr/local/bin
+sudo cp aperf-v1.0.0-aarch64/aperf /usr/local/bin
 ```
 
 Confirm `aperf` is installed by printing the version:
@@ -81,81 +92,100 @@ aperf --version
 The output should print the version:
 
 ```output
-aperf 0.1.0 (4b910d2)
+aperf 1.0.0 (4cf8d28)
 ```
 
-## How do I verify APerf is working?
+## Verify APerf is working
 
-### How do I create and view a report?
+To confirm APerf is working, start a collection run with the default settings. The default interval is 1 second, and the default period is 10 seconds.
 
-To confirm APerf is working, start it for 10 seconds and take a sample every 1 second.
+Run the following command to start data collection:
 
 ```console
-sudo aperf record -i 1 -p 10 -r run1 --profile
+aperf record -r test_1
 ```
 
-After 10 seconds `aperf` completes and you see a directory named `run1` and a tar file named `run1.tar.gz`.
+After 10 seconds, the collection completes. APerf creates a directory named `test_1` and a tar file named `test_1.tar.gz`.
+
+If you need CPU profiling, add the `--profile` flag. For Java profiling, add the `--profile-java` flag.
+
+### How do I create and view a report?
 
-Next, generate a report from the recorded data:
+Generate a report from the recorded data:
 
 ```console
-sudo aperf report -r run1 -n report1
+aperf report -r test_1 -n test_report
 ```
 
-The name of the report is `report1` and you will see a `report1` directory and a tar file named `report1.tar.gz`.
-
-The tar files are useful if you want to copy them to another machine.
+APerf creates a directory named `test_report` and a tar file named `test_report.tar.gz`. The tar file is useful when you want to copy the report to another machine.
 
-Using a web browser, open the file `index.html` in the `report1/` directory. To open the file use `Ctrl+O` for Linux and Windows and use `⌘+O` for macOS.
+To view the report, open the `index.html` file in the `test_report/` directory using a web browser. Press `Ctrl+O` on Linux and Windows, or `⌘+O` on macOS.
 
-The report is now visible in the browser.
+The report's home page displays system information from the APerf run, followed by analytical findings that highlight potential performance issues:
 
-There are a number of tabs on the left side showing the collected data.
+![APerf report home page showing system information and analytical findings alt-txt#center](/install-guides/_images/aperf_report_home.png "APerf report home page")
 
-You can browse the data and see what has been collected.
+You can browse through all collected data using the navigation panel on the left.
 
-![APerf #center](/install-guides/_images/aperf0.webp)
+To learn more about a specific metric, select the info button next to it to open the help panel:
 
-{{% notice Note %}}
-The Kernel Config and Sysctl Data tabs are blank unless you click No.
-{{% /notice %}}
+![APerf report help panel showing detailed metric information alt-txt#center](/install-guides/_images/aperf_report_help_panel.png "APerf report help panel")
 
-### How do I create and view a report containing 2 runs?
+### How do I compare multiple runs?
 
-To demonstrate comparing 2 runs, create a second run with `aperf record`:
+To demonstrate comparing multiple runs, create a second run with `aperf record`:
 
 ```console
-sudo aperf record -i 1 -p 10 -r run2 --profile
+aperf record -r test_2
 ```
 
-After 10 seconds `aperf` completes and you see a directory named `run2` and a tar file named `run2.tar.gz`.
+Similarly, after 10 seconds the collection completes, and APerf produces a directory named `test_2` and a tar file named `test_2.tar.gz`.
 
-Generate a report with both the first and second runs included:
+Generate a report that includes both runs. The first run in the `-r` arguments becomes the base run for automatic comparisons:
 
 ```console
-sudo aperf report -r run1 -r run2 -n compare
+aperf report -r test_1 test_2 -n compare_report
 ```
 
-The name of the report is `compare` and you will see a `compare` directory and a tar file named `compare.tar.gz`.
+APerf creates a directory named `compare_report` and a tar file named `compare_report.tar.gz`.
+
+Open the `index.html` file in the `compare_report/` directory using a web browser. 
+
+Because the report includes multiple runs, APerf compares all runs against the base run and displays statistical findings on the home page:
 
-Open the `index.html` file in the `compare/` directory to see the 2 runs side by side.
+![APerf report home page showing statistical comparisons between multiple runs alt-text#center](/install-guides/_images/aperf_report_statistical_findings.png "APerf report statistical findings")
 
-A screenshot is shown below:
+When you view metric graphs, APerf aligns graphs of the same metric from different runs side by side for easy comparison:
 
-![APerf #center](/install-guides/_images/aperf.webp)
+![APerf report showing aligned metric graphs from multiple runs for comparison alt-text#center](/install-guides/_images/aperf_report_aligned_graphs.png "APerf report aligned graphs")
 
-### How do I use an HTTP server to view reports?
+### How do I view reports from a remote system?
 
-If you are doing performance analysis on a remote system or cloud instance without a remote desktop, you can view the APerf reports from your local browser by running a simple web server on the remote machine.
+If you're working on a remote system or cloud instance without a desktop environment, you can view APerf reports in your local browser by running a web server on the remote machine.
+
+Navigate to the directory containing the report and the `index.html` file:
+
+```console
+cd test_report
+```
 
-In the directory with the report data and the `index.html` file run a simple web server:
+Start a simple HTTP server:
 
 ```console
 python -m http.server 3000
 ```
 
-Make sure port 3000 is open on the remote system and enter the IP address of the remote system followed by `:3000` in your browser address bar.
+The server starts on port 3000. Make sure this port is open in your firewall or security group settings.
+
+Open a web browser on your local machine and navigate to:
+
+```output
+http://<remote-ip-address>:3000
+```
+
+Replace `<remote-ip-address>` with the IP address of your remote system.
+
+The APerf report opens in your browser without needing to copy files to your local machine.
 
-You will see the same APerf report, and avoid the need to copy files to your local machine from the remote system for viewing.
+You're now ready to use APerf for performance analysis on your Arm Linux system.
 
-You are ready to use APerf for performance analysis on your Arm Linux system.
diff --git a/content/install-guides/claude-code.md b/content/install-guides/claude-code.md
new file mode 100644
index 0000000000..5f8c92b3c3
--- /dev/null
+++ b/content/install-guides/claude-code.md
@@ -0,0 +1,300 @@
+---
+title: Claude Code
+
+author: Pareena Verma
+minutes_to_complete: 10
+official_docs: https://code.claude.com/docs
+
+layout: installtoolsall
+multi_install: false
+multitool_install_part: false
+tool_install: true
+weight: 1
+---
+
+Claude Code is an AI-powered command-line tool that helps you build features, debug code, and navigate codebases directly from your terminal. It provides autonomous coding assistance and integrates with your existing development workflow.
+
+Claude Code works seamlessly on Arm-based systems, including Linux distributions running on Arm servers, macOS on Apple Silicon, and Windows on Arm devices.
+
+## Review prerequisites 
+
+You need a Claude account to use Claude Code. A Claude.ai account is recommended, though you can also use a Claude Console account.
+
+If you don't have a Claude account, visit [Claude.ai](https://claude.ai/) and sign up.
+
+Claude Code is only available for paid Pro and Max accounts, if not using API credits. Visit [Claude Pricing](https://www.anthropic.com/pricing) to review the options.
+
+## Install Claude Code
+
+Claude Code is a terminal application that works on macOS, Linux, and Windows systems, including Arm-based platforms.
+
+### Install on Linux (Arm)
+
+The recommended installation method for Linux uses the installation script:
+
+```bash { target="ubuntu:latest" }
+curl -fsSL https://claude.ai/install.sh | bash
+```
+
+This script automatically detects your system architecture and installs the appropriate version for Arm64 systems.
+
+Add Claude Code to your PATH:
+
+```bash
+echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc && source ~/.bashrc
+```
+
+## Install on macOS (Apple Silicon)
+
+On macOS, you can use the installation script:
+
+```bash
+curl -fsSL https://claude.ai/install.sh | bash
+```
+
+Or install using Homebrew:
+
+```bash
+brew install --cask claude-code
+```
+
+## Install on Windows on Arm
+
+On Windows systems, including Windows on Arm, run the following PowerShell command:
+
+```console
+irm https://claude.ai/install.ps1 | iex
+```
+
+For other options, please [see the Claude Code setup page](https://code.claude.com/docs/en/setup).
+
+## Verify installation
+
+Confirm Claude Code is installed by checking the version:
+
+```console
+claude --version
+```
+
+The output shows the installed version:
+
+```output
+2.1.7 (Claude Code)
+```
+
+## Authenticate Claude Code
+
+After installing Claude Code, you need to authenticate:
+
+Navigate to a project directory:
+
+```console
+cd your-project
+```
+
+Start Claude Code:
+
+```console
+claude
+```
+
+Configure your preferences when prompted (dark mode, editor settings, etc.).
+
+On first use, Claude Code prompts you to authenticate:
+
+- If using Claude.ai, authenticate through your browser
+- If on a remote machine, paste the provided link into a local browser, then enter the authentication code in Claude Code
+
+Accept the acknowledgements to complete setup.
+
+Claude Code automatically saves your authentication credentials for future sessions.
+
+## Confirm Claude Code is working
+
+Test Claude Code by asking it to perform a simple task.
+
+Start Claude Code in a project directory:
+
+```console
+claude
+```
+
+Type a request, for example:
+
+```console
+> Create a Python function to calculate fibonacci numbers for my Arm machine
+```
+
+Claude Code analyzes your request, creates a plan, and generates the code.
+
+Review the proposed changes before accepting them. Claude Code shows you a preview of changes before applying them, giving you control over what gets modified in your codebase.
+
+If Claude Code doesn't respond:
+- Verify you're authenticated (run `claude` and check for authentication prompts)
+- Check your internet connection
+- Ensure your Claude account is active
+- Try restarting Claude Code
+
+
+You’re now ready to use Claude Code.
+{{% notice Note %}} The sections below are optional and cover advanced integrations, including using MCP servers with Claude Code on Arm.{{% /notice %}}
+
+
+## Use MCP Servers with Claude Code
+
+Model Context Protocol (MCP) Servers extend Claude Code's capabilities by providing specialized tools and knowledge bases. Claude Code can connect to MCP servers to access domain-specific expertise and functionality.
+
+The Arm MCP Server provides AI assistants with tools and knowledge for Arm architecture development, migration, and optimization. This is particularly useful when working on Arm-based systems.
+
+## Arm MCP Server tools
+
+The Arm MCP Server includes several tools designed for Arm development:
+
+- migrate-ease scan: Analyzes codebases for x86-specific code that needs updating for Arm compatibility
+- skopeo: Inspects container images to check for ARM64 architecture support
+- knowledge_base_search: Searches Arm documentation and learning resources
+- mca (Machine Code Analyzer): Analyzes assembly code for performance on Arm architectures
+- check_image: Verifies Docker image architecture compatibility
+
+## Configure the Arm MCP Server with Claude Code
+
+You need Docker running on your system to use the Arm MCP Server. See the [Docker install guide](/install-guides/docker/) for instructions.
+
+First, pull the Arm MCP Server image:
+
+```console
+docker pull armlimited/arm-mcp:latest
+```
+
+Configure the Arm MCP Server using the `claude mcp add` command. You can configure MCP servers at three different scopes:
+
+- Local scope (default): Available only to you in the current project
+- Project scope: Shared with everyone in the project via `.mcp.json` file
+- User scope: Available to you across all projects
+
+{{% notice Note %}}
+Choose the appropriate scope based on your needs. Project scope is recommended for team collaboration, while user scope is useful for personal tools you use across multiple projects.
+{{% /notice %}}
+
+**Configure for a specific project (local scope)**
+
+Navigate to your project directory and add the Arm MCP Server:
+
+```console
+cd your-project
+claude mcp add --transport stdio arm-mcp -- docker run --rm -i --pull=always -v "$(pwd):/workspace" armlimited/arm-mcp:latest
+```
+
+This configuration is stored in `~/.claude.json` under your project's path and is only accessible when working in this directory.
+
+**Configure for all projects (user scope)**
+
+To make the Arm MCP Server available across all your projects:
+
+```console
+claude mcp add --scope user --transport stdio arm-mcp -- docker run --rm -i --pull=always -v "$(pwd):/workspace" armlimited/arm-mcp:latest
+```
+
+This configuration is stored in `~/.claude.json` and is accessible from any project directory.
+
+**Configure for team sharing (project scope)**
+
+To share the MCP server configuration with your team via version control:
+
+```console
+cd your-project
+claude mcp add --scope project --transport stdio arm-mcp -- docker run --rm -i --pull=always -v "$(pwd):/workspace" armlimited/arm-mcp:latest
+```
+
+This creates a `.mcp.json` file in your project root that can be committed to version control.
+
+## Analyze a local codebase with the Arm MCP Server
+
+The Arm MCP Server automatically mounts your current working directory to the `/workspace` folder inside the Docker container when you use the configuration commands shown above.
+
+To analyze a different directory, modify the volume mount in the `docker run` command. For example, to analyze `/Users/username/myproject`:
+
+```console
+claude mcp add --transport stdio arm-mcp -- docker run --rm -i -v "/Users/username/myproject:/workspace" armlimited/arm-mcp:latest
+```
+
+## Verify the Arm MCP Server is working
+
+List configured MCP servers:
+
+```console
+claude mcp list
+```
+
+You should see `arm-mcp` in the list of configured servers.
+
+Get details about the Arm MCP Server configuration:
+
+```console
+claude mcp get arm-mcp
+```
+
+To test the server's functionality, start Claude Code and ask it to use the Arm MCP tools:
+
+```console
+claude
+```
+
+Then try one of these prompts:
+
+```console
+> Use the Arm MCP Server to scan my codebase for x86-specific code
+```
+
+or
+
+```console
+> Check if the nginx:latest Docker image supports Arm64
+```
+
+You can also use the `/mcp` command within Claude Code to see the status of all connected MCP servers and their available tools.
+
+## Example prompts using the Arm MCP Server
+
+Here are some example prompts that use the Arm MCP Server tools:
+
+- `Scan my workspace for code that needs updating for Arm compatibility`
+- `Check if the postgres:latest container image supports Arm64 architecture`
+- `Search the Arm knowledge base for NEON intrinsics examples`
+- `Find learning resources about migrating from x86 to Arm`
+- `Analyze this assembly code for performance on Arm processors`
+
+## Manage MCP servers
+
+Remove an MCP server:
+
+```console
+claude mcp remove arm-mcp
+```
+
+Update an MCP server configuration by removing and re-adding it with new settings.
+
+Check MCP server status within Claude Code:
+
+```console
+> /mcp
+```
+
+## Troubleshoot MCP Server connections
+
+If the Arm MCP Server doesn't connect:
+
+- Verify Docker is running: `docker ps`
+- Check that the image was pulled successfully: `docker images | grep arm-mcp`
+- Ensure the volume mount path exists and is accessible
+- Check that the Docker daemon is running and accessible
+- Try restarting Claude Code after configuration changes
+- Review the output of `claude mcp get arm-mcp` for configuration errors
+
+If you encounter issues or have questions, reach out to mcpserver@arm.com.
+
+
+## Custom prompts and workflows
+
+Create custom prompts for common tasks in your workflow. Refer to the [Claude Code documentation](https://code.claude.com/docs) for advanced configuration options.
+
diff --git a/content/install-guides/fvps-on-macos.md b/content/install-guides/fvps-on-macos.md
index da67fa3389..363d72a3b3 100644
--- a/content/install-guides/fvps-on-macos.md
+++ b/content/install-guides/fvps-on-macos.md
@@ -26,7 +26,8 @@ multi_install: false            # Set to true if first page of multi-page articl
 multitool_install_part: false   # Set to true if a sub-page of a multi-page article, else false
 layout: installtoolsall         # DO NOT MODIFY. Always true for tool install articles
 ---
-This guide is intended to get you up and running with the [Arm Virtual Hardware (AVH) Fixed Virtual Platforms (FVPs)](https://www.arm.com/products/development-tools/simulation/virtual-hardware) on macOS. For a thorough review of all options, refer to the official documentation.
+This guide shows you how to use [Arm Virtual Hardware (AVH) Fixed Virtual Platforms (FVPs)](https://www.arm.com/products/development-tools/simulation/virtual-hardware) on macOS by running them in Docker containers. The [official repository](https://github.com/Arm-Examples/FVPs-on-Mac/blob/main/README.md) provides additional technical details.
+
 
 ## What are the prerequisites for running AVH FVPs on macOS?
 
diff --git a/content/install-guides/mcuxpresso_vs.md b/content/install-guides/mcuxpresso_vs.md
index 30cb891d56..3b9b16731c 100644
--- a/content/install-guides/mcuxpresso_vs.md
+++ b/content/install-guides/mcuxpresso_vs.md
@@ -65,7 +65,7 @@ Download and run the installer.
 
 Select one or more packages and click `Install`. 
 
-![MCUXpresso Installer #center](/install-guides/_images/MCUXpresso_Installer.png)
+![MCUXpresso Installer #center](/install-guides/_images/mcuxpresso_installer.png)
 
 ### Are there other embedded development extensions for VS Code?
 
diff --git a/content/install-guides/openvscode-server.md b/content/install-guides/openvscode-server.md
index 6405293b2a..65e8513fc4 100644
--- a/content/install-guides/openvscode-server.md
+++ b/content/install-guides/openvscode-server.md
@@ -105,7 +105,7 @@ With the port open, substitute the public IP address of the instance instead of
 
 On ChromeOS you can use the Linux configuration settings to automatically do port forwarding. No SSH connection is needed.
 
-![port forwarding #center](/install-guides/_images/ChromeOSpf.png)
+![port forwarding #center](/install-guides/_images/chromeospf.png)
 
 ## What other configuration options are available?
 
diff --git a/content/learning-paths/automotive/zenacssdebug/configdb.png b/content/learning-paths/automotive/zenacssdebug/configdb.png
deleted file mode 100644
index 819071a7ff..0000000000
Binary files a/content/learning-paths/automotive/zenacssdebug/configdb.png and /dev/null differ
diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_index.md b/content/learning-paths/cross-platform/gitlab-managed-runners/_index.md
index 2f39932a2f..e4615f81fa 100644
--- a/content/learning-paths/cross-platform/gitlab-managed-runners/_index.md
+++ b/content/learning-paths/cross-platform/gitlab-managed-runners/_index.md
@@ -5,14 +5,14 @@ draft: true
 cascade:
     draft: true
 
-minutes_to_complete: 30
+minutes_to_complete: 40
 
 who_is_this_for: This is an Introductory topic for DevOps professionals who are looking to build a CI/CD pipeline with GitLab on Google Axion using GitLab-Hosted runners. 
 
 learning_objectives: 
     - Create a GitLab Project
     - Understand basic pipeline script structure and how to use it
-    - Build and test a simple CI/CD pipeline Using Gitlab-hosted runners
+    - Build and test a simple CI/CD pipeline using Gitlab-hosted runners which will build and produce a tiny docker image from a simple "Hello world" "C" language program. The image will be built to run on Arm64 machines and will be saved in Gitlab Registery to be used later. 
     
 
 prerequisites:
@@ -26,10 +26,12 @@ subjects: CI-CD
 cloud_service_providers: Google Cloud
 
 armips:
-    - Neoverse
+    - Neoverse-N1
 
 tools_software_languages:
     - GitLab
+    - Docker
+    - C
 
 operatingsystems:
     - Linux
diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/info.md b/content/learning-paths/cross-platform/gitlab-managed-runners/info.md
index cc40e6d597..57578b3f4e 100644
--- a/content/learning-paths/cross-platform/gitlab-managed-runners/info.md
+++ b/content/learning-paths/cross-platform/gitlab-managed-runners/info.md
@@ -15,11 +15,9 @@ A GitLab Runner works with GitLab CI/CD to run jobs in a pipeline. It acts as an
 
 3. Multi-architecture support: GitLab runners support multiple architectures including - **`x86/amd64`** and **`arm64`**.
 
-## What is Google Axion?
-Axion is Google's first Arm-based server processor, built using the Armv9 Neoverse V2 CPU. The VM instances are part of the **`C4A`** family of compute instances. To learn more about Google Axion refer to this [page](http://cloud.google.com/products/axion/) .
 
 {{% notice Note %}}
-All The information provided in the next section are from GitLab official Pages and it's provided here for convenience and can be changed by Gitlab at anytime. Please refer to the [Gitlab Documentation](https://docs.gitlab.com/ci/runners/hosted_runners/) for more details and for the latest updates.
+All The information provided in the next section are from GitLab official Pages and it's provided here for convenience and can be changed by Gitlab at anytime. Please refer to the [Gitlab Documentation](https://docs.gitlab.com/ci/runners/hosted_runners/) for more details and for the latest updates. This section is optional.
 {{% /notice %}}
 
 ## GitLab-Hosted Runners Facts
diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md b/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md
index 2a51972858..713ccd3fdf 100644
--- a/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md
+++ b/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md
@@ -8,83 +8,122 @@ layout: learningpathall
 
 ## How to Create a CI/CD Pipeline with Gitlab-hosted Runners?
 
-To create the pipeline we only need to create a new **`.gitlab-ci.yml`** file in our Project and define it's stages. Nothing else is needed since Gtilab-hosted runners are readily available to any Project and doesn't need to be created or instantiated by the Gitlab users.
+To create the pipeline you only need to create a new **`.gitlab-ci.yml`** file in your Project and define it's stages. Nothing else is needed since Gtilab-hosted runners are readily avilable to any Project and doesn't need to be created or instantiated by the Gitlab users.
 
-Once we run our pipeline with the correct **`tags`** Gitlab will create everything that we need and yep it is as simple as that.
+Once you run your pipeline with the correct **`tags`** Gitlab will create everything that you need and yep it is as simple as that.
+
+## How are you going to test your pipeline functionality?
+
+You will test the pipeline by building a Docker image from a simple C language "Hello World" program which can run on Arm64 instances/machines and to do that you will need to create the following files:
+
+1) **`main.c`** File: which is the main program that will get executed when we will run your Docker image later. I only provided a simple example but please feel free to use any program that you like. Although, I advise to start with this simple program to test that everything is working then use anything later after by changing the **`main.c`** file.
+
+```c
+//main.c
+#include <stdio.h>
+
+int main(void) {
+  printf("Hello from an Arm64 Docker image built on GitLab hosted Arm runners!\n");
+  return 0;
+}
+```
+
+2) **`DockerFile`** File: This file has a set of instruction for Docker on how to create a Docker image. It simply instructs the Docker on your runner on how to build and package your **``hello``** app into a Docker image. This produces a tiny image and will run on Arm64 hosts as long as the binary is Arm64 (which it will be, since we’re building on an Arm runner).
+
+```DockerFile
+# DockerFile
+# syntax=docker/dockerfile:1
+
+FROM alpine:3.20 AS build
+RUN apk add --no-cache build-base
+WORKDIR /src
+COPY main.c .
+RUN gcc -O2 -static -s -o hello main.c
+
+FROM scratch
+COPY --from=build /src/hello /hello
+ENTRYPOINT ["/hello"]
+
+```
+
+3) Optionally **`.dockerignore`** file: This file instructs Docker to ignor certain files that has nothing to do with the image that it will create.
+
+```.dockerignore
+.git
+.gitlab-ci.yml
+```
+
+4) You will also need to create a YML file as I mentioned before which I will explain in more details in the next section.
+
+To Create any of those files simply follow the same steps in the next section but instead of choosing **`.gitlab-ci.yml`** file just change the name to each of the corresponding file names above. It is very important to create the 3 files from this section first because once you create and commit/save the **`.gitlab-ci.yml`** file, it will simply run the pipeline. If the other 3 files don't exist at that time then the pipeline will fail.
 
 ## How to Create .gitlab-ci.yml file in a Gitlab Project?
 
-1. Start by going to the main project page where we will need to Create the CI/CD pipeline.
+1. Start by going to the main project page where you will need to Create the CI/CD pipeline.
 
-2. We can choose to create **`.gitlab-ci.yml`** file by using one of the 2 options circled in red in the image below. 
-![CI-CD-New #center](_images/ci-cd-new.webp)
+2. You can choose to create **`.gitlab-ci.yml`** file by using one of the 2 options circled in red in the image below. 
+![CI-CD-New #center](_images/ci-cd-new.png)
 
-Option1: We can Click on **`Set up CI/CD`** button/link and follow the wizard to create an empty **`.gitlab-ci.yml`** file.
+Option1: You can Click on **`Set up CI/CD`** button/link and follow the wizad to create an empty **`.gitlab-ci.yml`** file.
 
 Option2: Click on the "+" button. From the popup menu click on **`New File`** option and name the file **`.gitlab-ci.yml`** and then click on **`Commit Changes`** button on the top right hand side like in the image below (Add any message as your commit message).
 ![New-YML #center](_images/new-yml.png)
 
-3. A page like the one in the image below will be visible with our **`.gitlab-ci.yml`** file. From here, we will need to Click on the **`Edit`** button. A menu will pop up, We will click on **`Edit in pipeline Editor`** which will allow us to add our CD/CD script.
-![Editor-YML #center](_images/editor-yml.webp)
+3. A page like the one in the image below will be visible with your **`.gitlab-ci.yml`** file. From here, you will need to Click on the **`Edit`** button. A menu will pop up, you will click on **`Edit in pipeline Editor`** which will allow you to add your CD/CD script.
+![Editor-YML #center](_images/editor-yml.png)
 
 4. In the pipeline editor, just copy and paste the following YML script and click on commit changes (Add any relevent message as your commit update message).
 ```YML
-# This file is a template, and might need editing before it works on your project.
-# This is a sample GitLab CI/CD configuration file that should run without any modifications.
-# It demonstrates a basic 3 stage CI/CD pipeline. Instead of real tests or scripts,
-# it uses echo commands to simulate the pipeline execution.
-#
-# A pipeline is composed of independent jobs that run scripts, grouped into stages.
-# Stages run in sequential order, but jobs within stages run in parallel.
-#
-# For more information, see: https://docs.gitlab.com/ee/ci/yaml/#stages
-#
-# You can copy and paste this template into a new `.gitlab-ci.yml` file.
-# You should not add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword.
-#
-# To contribute improvements to CI/CD templates, please follow the Development guide at:
-# https://docs.gitlab.com/development/cicd/templates/
-# This specific template is located at:
-# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Getting-Started.gitlab-ci.yml
-
-stages:          # List of stages for jobs, and their order of execution
-  - build
-  - test
-  - deploy
-
-build-job:       # This job runs in the build stage, which runs first.
-  stage: build
-  tags:
-    - saas-linux-small-arm64    #Instruct Gitlab to use it's own Hosted runners that use Linux on Arm64 instance of size small.
-  script:
-    - echo "Compiling the code..."
-    - echo "Compile complete."
+#First Section
+stages: [build, test, push]
 
-unit-test-job:   # This job runs in the test stage.
-  stage: test    # It only starts when the job in the build stage completes successfully.
-  tags:
-    - saas-linux-small-arm64    #Instruct Gitlab to use it's own Hosted runners that use Linux on Arm64 instance of size small.
-  script:
-    - echo "Running unit tests... This will take about 60 seconds."
-    - sleep 60
-    - echo "Code coverage is 90%"
+variables:
+  IMAGE_TAG: "$CI_REGISTRY_IMAGE:$CI_COMMIT_SHORT_SHA"
+  IMAGE_LATEST: "$CI_REGISTRY_IMAGE:latest"
 
-lint-test-job:   # This job also runs in the test stage.
-  stage: test    # It can run at the same time as unit-test-job (in parallel).
-  tags:
-    - saas-linux-small-arm64    #Instruct Gitlab to use it's own Hosted runners that use Linux on Arm64 instance of size small.
+  # Talk to docker:dind over TLS (default behavior for docker:dind)
+  DOCKER_HOST: "tcp://docker:2376"
+  DOCKER_TLS_CERTDIR: "/certs"
+  DOCKER_CERT_PATH: "/certs/client"
+  DOCKER_TLS_VERIFY: "1"
+
+#Second Section
+build_test_push:
+  stage: build
+  tags:   # This tag is used to specify the size of the runner for each stage but it can be defined on the top of the file if you want to use the same exact runner size for all the stages in your pipeline
+    - saas-linux-small-arm64 
+  image: docker:27
+  services:
+    - name: docker:27-dind
+  before_script:
+    - uname -m
+    # install lscpu (provided by util-linux) which is used to identify the CPU used for this stage runner
+    - apk add --no-cache util-linux
+    - lscpu
+    - docker version
+    - echo "$CI_REGISTRY_PASSWORD" | docker login -u "$CI_REGISTRY_USER" --password-stdin "$CI_REGISTRY"
   script:
-    - echo "Linting code... This will take about 10 seconds."
-    - sleep 10
-    - echo "No lint issues found."
+    - docker build --pull -t "$IMAGE_TAG" .
+    - docker run --rm "$IMAGE_TAG"
+    - docker push "$IMAGE_TAG"
 
-deploy-job:      # This job runs in the deploy stage.
-  stage: deploy  # It only runs when *both* jobs in the test stage complete successfully.
+#Third Section
+push_latest:
+  stage: push
   tags:
-    - saas-linux-small-arm64    #Instruct Gitlab to use it's own Hosted runners that use Linux on Arm64 instance of size small.
-  environment: production
+    - saas-linux-small-arm64
+  image: docker:27
+  services:
+    - name: docker:27-dind
+  before_script:
+    - apk add --no-cache util-linux #since each stage is using a different runner then we need to check this CPU as well
+    - lscpu
+    - echo "$CI_REGISTRY_PASSWORD" | docker login -u "$CI_REGISTRY_USER" --password-stdin "$CI_REGISTRY"
   script:
-    - echo "Deploying application..."
-    - echo "Application successfully deployed."
+    - docker pull "$IMAGE_TAG"
+    - docker tag "$IMAGE_TAG" "$IMAGE_LATEST"
+    - docker push "$IMAGE_LATEST"
+  rules:
+    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
 ```
 5. Once you commit the file updates, Gitlab will check the scripts for errors and will try to execute the pipeline directly. 
diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/project.md b/content/learning-paths/cross-platform/gitlab-managed-runners/project.md
index 1ad8bea1a2..aa1c7c196b 100644
--- a/content/learning-paths/cross-platform/gitlab-managed-runners/project.md
+++ b/content/learning-paths/cross-platform/gitlab-managed-runners/project.md
@@ -6,13 +6,13 @@ weight: 10
 layout: learningpathall
 ---
 
-## Where Should We Start?
+## Where Should you Start?
 
 Start by logging into your GitLab account or create a new one in the [Gitlab](https://gitlab.com/) main page.
 
-We will need to a new project/repo that will contain all our project files including our **`CI/CD`** pipeline configuration file. 
+You will need to a new project/repo that will contain all your project files including your **`CI/CD`** pipeline configuration file. 
 
-We can also choose to use any previously created projects in our Gitlab account. Simply open your previously created project. If that is the case then skip the rest of the steps in the current page and move to the next steps in this tutorial.
+You can also choose to use any previously created projects in your Gitlab account. Simply open your previously created project, If that is the case then skip the rest of the steps in the current page and move to the next steps in this tutorial.
 
 ## Create a New Project in Gitlab
 
@@ -21,17 +21,17 @@ We can also choose to use any previously created projects in our Gitlab account.
 2. Click on the **`New Project`** button on the top right hand side as the image below.
 ![Gitlab-Projects #center](_images/gitlab-projects.png)
 
-3. We will get a new screen like the image below with multiple options. You can choose any of the 2 options highlighted in red from the image below.
+3. You will get a new screen like the image below with multiple options. You can choose any of the 2 options highlighted in red from the image below.
 
 {{% notice Note %}}
-If we chose option 2 then we will need to choose **`GitLab CI/CD components`** option from the list of templates.
+If you chose option 2 then you will need to choose the **`GitLab CI/CD components`** option from the list of templates.
 {{%/notice%}}
 
 ![New-Project #center](_images/new-project.png)
 
-4. Regardles of which option we choose, We will get a screen like the image below where we need to fill-in fields highlighted in red. The first field is the **`Project Name`** which we will name it **`CI-CD Runner`**. In the second field we need to choose any option from the **`Project Url`** list then click on the  **`Create Project`** button at the end of the page.
+4. Regardles of which option you choose, you will get a screen like the image below where you will need to fill-in the fields highlighted in red. The first field is the **`Project Name`** which you can name it **`CI-CD Runner`**. In the second field you need to choose any option from the **`Project Url`** list then click on the  **`Create Project`** button at the end of the page.
 ![Project-Info #center](_images/project-info.png) 
 
-##### **If we did everything correctly then we should get a screen like the one in the image below.**
+##### **If you did everything correctly then you should get a screen like the one in the image below.**
 ![Project-Done #center](_images/project-done.png)
 
diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/results.md b/content/learning-paths/cross-platform/gitlab-managed-runners/results.md
index afde4d2267..67ab88e0e0 100644
--- a/content/learning-paths/cross-platform/gitlab-managed-runners/results.md
+++ b/content/learning-paths/cross-platform/gitlab-managed-runners/results.md
@@ -10,16 +10,18 @@ layout: learningpathall
 
 The Pipeline script has multiple sections where each section instructs the pipeline operator on what todo or use and how each Stage looks like.
 
-### First Section: Stages
+### First Section: The stages
 
-In this section we are describing how many sequential stages will our pipeline have and what are their names (ex. **`Build, Test and Deploy`**). If we would like all the stages or jobs to run simultaneously then we simply don't define this section.
+In this section you are describing how many squentional stages that your pipeline will have and what are their names (ex. **`Build, Test and Push`**). If you would like all the stages or jobs to run simultinously then you simply don't define this section.
 
-### Second Section: Build-Job part of the Build stage
+I am also defining some **`Variables`** that I will use in the other sections for simplisty.
 
-In this section we are defining the Build-Job as part of the Build stage. This stage will run on Gitlab-Hosted runner that uses Linux OS on Arm64 instance of size small.
+### Second Section: build_test_push part for the Build stage
+
+In this section you are defining the build_test_push as the Build stage. This stage will run on Gitlab-Hosted runner that uses Linux OS on Arm64 instance of size small. I used the **`lscpu`** command to print out the CPU information for reference.
 
 {{% notice Important Note %}}
-Gitlab offers 3 Arm64 based Instances that use Linux as their OS.
+Gitlab offers 3 Arm64 based Instances that use Linux as their OS. In the Free tier you can only use the small version.
 
 - saas-linux-small-arm64
 - saas-linux-medium-arm64
@@ -29,7 +31,11 @@ For more information about all Arm and other available Gitlab-hosted runners che
 
 {{%/notice%}}
 
-### Other Sections: 
+I am also saving my Docker image in GitLab registery because it's the easiest way to do that but you can modify your pipeline to save your image in any other registery that you prefer. To get the Gitlab registery creditiationals I am using **`$CI`** variables that are defined in the Gitlab enviornment and saving them in Docker for simplisty. Please note it's always recommended to encrypt your information or save it in a sercrets/passwords manages.  
+
+### Third Section: 
+
+You will notice that in this stage I am simply testing that I am able to get the image that I saved in my registery before and pushing it back as the latest version.
 
 The rest of the other sections follow the same pattern. You will notice that the **`Test`** stage for example has 2 Jobs in it (unit-test-job and lint-test-job). The **`Deploy`** stage here has only 1 Job called **`deploy-job`**.
 As you get to learn more YML scripting you will be able to add a lot more complex functionality to your pipelines.
@@ -46,6 +52,29 @@ From the left hand side panel, Navigate to **`Build`** then to **`Pipeline`** th
 To check the status of your pipeline and to check the output of any of it's Jobs simply click on any of the **`Jobs`** as the image below (with red rectangle around them).
 ![pipeline-execution #center](_images/pipeline-execution.webp)
 
+You can also download the docker image that you saved in your gitlab registery and run it on an Arm64 instance/box for testing using the following bash script.
+
+```bash
+docker login registry.gitlab.com
+docker pull registry.gitlab.com/<namespace>/<project>:latest
+docker run --rm registry.gitlab.com/<namespace>/<project>:latest
+```
+
+If everything works correctly you should see an output like in the box below.
+
+```output
+Hello from an Arm64 Docker image built on GitLab hosted Arm runners!
+```
+
+You can also check your Gitlab Registery by going to your project then:
+
+- Go to Deploy → Container Registry
+- You should see new-docker
+- With tags like latest and **`<commit-sha>`**
+
+If it’s there, the registry did its job.
+
+
 ## Gitlab Helpful tools
 
 If you navigate to your pipeline editor from before you will notice that there are more tabs in that page other than the **`Edit`** tab. ![visual-pipeline #center](_images/visual-pipeline.webp)
diff --git a/content/learning-paths/cross-platform/kleidiai-explainer/Arm_KleidiAI_square_color.png b/content/learning-paths/cross-platform/kleidiai-explainer/Arm_KleidiAI_square_color.png
deleted file mode 100644
index b55c12e40e..0000000000
Binary files a/content/learning-paths/cross-platform/kleidiai-explainer/Arm_KleidiAI_square_color.png and /dev/null differ
diff --git a/content/learning-paths/cross-platform/kleidiai-explainer/KleidiAI-src-matmul.JPG b/content/learning-paths/cross-platform/kleidiai-explainer/KleidiAI-src-matmul.JPG
deleted file mode 100644
index 96a15e9636..0000000000
Binary files a/content/learning-paths/cross-platform/kleidiai-explainer/KleidiAI-src-matmul.JPG and /dev/null differ
diff --git a/content/learning-paths/cross-platform/kleidiai-explainer/KleidiAI-src.JPG b/content/learning-paths/cross-platform/kleidiai-explainer/kleidiai-src.JPG
similarity index 100%
rename from content/learning-paths/cross-platform/kleidiai-explainer/KleidiAI-src.JPG
rename to content/learning-paths/cross-platform/kleidiai-explainer/kleidiai-src.JPG
diff --git a/content/learning-paths/cross-platform/kleidiai-explainer/page2.md b/content/learning-paths/cross-platform/kleidiai-explainer/page2.md
index fe1c0be900..1e25fa8da1 100644
--- a/content/learning-paths/cross-platform/kleidiai-explainer/page2.md
+++ b/content/learning-paths/cross-platform/kleidiai-explainer/page2.md
@@ -16,7 +16,7 @@ There are essentially two types of KleidiAI micro-kernels today:
 2. Matrix Multiplication routines - the three directories with the prefix `matmul_clamp`. Each directory contains routines specialized for a specific input data type.
 
 
-![KleidiAI stuff](KleidiAI-src.JPG "KleidiAI src directory")
+![KleidiAI stuff](kleidiai-src.JPG "KleidiAI src directory")
 
 ### What are the quantization levels that KleidiAI supports?
 KleidiAI has multiple matrix multiplication micro-kernels, and dynamic quantization routines, to optimally support all model quantization levels. To learn more about model quantization and how selecting the right quantization level affects your AI-based application, refer to [this Learning Path](/learning-paths/servers-and-cloud-computing/llama-cpu/llama-chatbot#quantization-format).
diff --git a/content/learning-paths/cross-platform/mca-godbolt/running_mca.md b/content/learning-paths/cross-platform/mca-godbolt/running_mca.md
index 4936df31d2..b16ee0cc4e 100644
--- a/content/learning-paths/cross-platform/mca-godbolt/running_mca.md
+++ b/content/learning-paths/cross-platform/mca-godbolt/running_mca.md
@@ -393,6 +393,12 @@ You can see by looking at the timeline view that instructions no longer depend o
 
 Instructions also spend less time waiting in the scheduler's queue. This explains why the performance of `sum_test2.s` is so much better than `sum_test1.s`.
 
-Note the use of the flag `-mcpu=neoverse-v2` throughout all of those examples. This flag tells MCA to simulate the performance of the code in `sum_test1.s` and `sum_test2.s` on a Neoverse V2 core. This flag can be changed to any core supported in MCA. You can find what cores are supported in MCA by running `llvm-mca -mcpu=help <<<''`. You can also look at the LLVM sources in [llvm-project](https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-mca/AArch64), which will give you more detailed examples. For instance, when looking at the Neoverse cores, there is currently support for the N1, N2, N3 and the V1, V2, V3 cores.
+Note the use of the flag `-mcpu=neoverse-v2` throughout all of those examples. This flag tells MCA to simulate the performance of the code in `sum_test1.s` and `sum_test2.s` on a Neoverse V2 core. This flag can be changed to any core supported in MCA. 
+
+You can find what cores are supported in MCA by running `llvm-mca -mcpu=help`. 
+
+If you are using an older version of `llvm-mca`, you may have to add an empty string to print the help, use `llvm-mca -mcpu=help <<<''`. 
+
+You can also look at the LLVM sources in [llvm-project](https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-mca/AArch64), which will give you more detailed examples. For instance, when looking at the Neoverse cores, there is currently support for the N1, N2, N3 and the V1, V2, V3 cores.
 
 In the next section, you can try running `llvm-mca` with Compiler Explorer.
diff --git a/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md b/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md
index 8c5b2e3bfe..d826539d12 100644
--- a/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md
+++ b/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md
@@ -299,7 +299,7 @@ Then select the **Reopen in Container** menu entry as shown below.
 
 It automatically finds and uses `.devcontainer/devcontainer.json`:
 
-![VSCode Docker alt-text#center](VSCode.png "Figure 1: Setting up the Docker container.")
+![VSCode Docker alt-text#center](vscode.png "Figure 1: Setting up the Docker container.")
 
 All your commands now run within the container, so there is no need to prepend
 them with a Docker invocation, as VS Code handles all this seamlessly for you.
diff --git a/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/VSCode.png b/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/vscode.png
similarity index 100%
rename from content/learning-paths/cross-platform/multiplying-matrices-with-sme2/VSCode.png
rename to content/learning-paths/cross-platform/multiplying-matrices-with-sme2/vscode.png
diff --git a/content/learning-paths/cross-platform/remoteit/connections.md b/content/learning-paths/cross-platform/remoteit/connections.md
index eb5287ef43..dbfa4e7b93 100644
--- a/content/learning-paths/cross-platform/remoteit/connections.md
+++ b/content/learning-paths/cross-platform/remoteit/connections.md
@@ -18,7 +18,7 @@ The advantages are:
 
 Peer to Peer connections are created by installing a software package with initiator support on the initiator device. Install the Remote.It Desktop application or Remote.It CLI on your initiator device to support Peer to Peer connection.
 
-![example image alt-text#center](Remote.It-Connections.png "Proxy and Peer to Peer Connection Illustration")
+![example image alt-text#center](remote.it-connections.png "Proxy and Peer to Peer Connection Illustration")
 
 ## Peer to Peer Connections
 
diff --git a/content/learning-paths/cross-platform/remoteit/Remote.It-Connections.png b/content/learning-paths/cross-platform/remoteit/remote.it-connections.png
similarity index 100%
rename from content/learning-paths/cross-platform/remoteit/Remote.It-Connections.png
rename to content/learning-paths/cross-platform/remoteit/remote.it-connections.png
diff --git a/content/learning-paths/cross-platform/remoteit/targets.png b/content/learning-paths/cross-platform/remoteit/targets.png
deleted file mode 100644
index 4d576b10e3..0000000000
Binary files a/content/learning-paths/cross-platform/remoteit/targets.png and /dev/null differ
diff --git a/content/learning-paths/cross-platform/woa_azure/images/azure2.png b/content/learning-paths/cross-platform/woa_azure/images/azure2.png
deleted file mode 100644
index 7e1572b843..0000000000
Binary files a/content/learning-paths/cross-platform/woa_azure/images/azure2.png and /dev/null differ
diff --git a/content/learning-paths/cross-platform/woa_azure/images/click_create.png b/content/learning-paths/cross-platform/woa_azure/images/click_create.png
deleted file mode 100644
index b7e1acc668..0000000000
Binary files a/content/learning-paths/cross-platform/woa_azure/images/click_create.png and /dev/null differ
diff --git a/content/learning-paths/cross-platform/woa_azure/images/select_win_arm_64.png b/content/learning-paths/cross-platform/woa_azure/images/select_win_arm_64.png
deleted file mode 100644
index 7dfe7d5355..0000000000
Binary files a/content/learning-paths/cross-platform/woa_azure/images/select_win_arm_64.png and /dev/null differ
diff --git a/content/learning-paths/cross-platform/woa_azure/images/vm_search.png b/content/learning-paths/cross-platform/woa_azure/images/vm_search.png
deleted file mode 100644
index dcc22795d3..0000000000
Binary files a/content/learning-paths/cross-platform/woa_azure/images/vm_search.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/_index.md b/content/learning-paths/embedded-and-microcontrollers/_index.md
index 49c994ac52..c7b2a30c7d 100644
--- a/content/learning-paths/embedded-and-microcontrollers/_index.md
+++ b/content/learning-paths/embedded-and-microcontrollers/_index.md
@@ -11,8 +11,8 @@ maintopic: true
 operatingsystems_filter:
 - Android: 2
 - Baremetal: 30
-- Linux: 34
-- macOS: 7
+- Linux: 35
+- macOS: 8
 - RTOS: 10
 - Windows: 5
 subjects_filter:
@@ -20,7 +20,7 @@ subjects_filter:
 - Containers and Virtualization: 6
 - Embedded Linux: 4
 - Libraries: 3
-- ML: 18
+- ML: 19
 - Performance and Architecture: 22
 - RTOS Fundamentals: 5
 - Security: 2
@@ -39,7 +39,9 @@ tools_software_languages_filter:
 - Arm Streamline: 1
 - Arm Virtual Hardware: 12
 - Assembly: 1
-- C: 5
+- Baremetal: 1
+- Bash: 1
+- C: 6
 - ChatGPT: 1
 - Clang: 1
 - CMSIS: 4
@@ -50,7 +52,7 @@ tools_software_languages_filter:
 - Containerd: 1
 - CPP: 1
 - DetectNet: 1
-- Docker: 10
+- Docker: 11
 - DSTREAM: 2
 - Edge AI: 3
 - Edge Impulse: 2
@@ -86,13 +88,14 @@ tools_software_languages_filter:
 - Paddle: 1
 - Performance analysis: 1
 - Porcupine: 1
-- Python: 9
+- Python: 10
 - PyTorch: 4
 - QEMU: 1
 - Raspberry Pi: 7
 - Remote.It: 1
 - Runbook: 4
 - STM32: 2
+- systemd: 1
 - TensorFlow: 3
 - TensorRT: 1
 - tinyML: 2
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/connecting_peripheral.md b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/connecting_peripheral.md
index 86fdf0c0a1..08644f427f 100644
--- a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/connecting_peripheral.md
+++ b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/connecting_peripheral.md
@@ -12,23 +12,23 @@ Follow the steps outline below to connect the AXI4 peripheral you created in the
 
 1. Right-click on the empty space again in the diagram and choose “Create Port...” (Ctrl+K). Create 2 ports with the following settings and names: 
 
-    ![Creating output port](images/Picture15.png) 
+    ![Creating output port](images/picture15.png) 
 
     *Figure 3.1. Creating output port*
 
-    ![Creating input port](images/Picture16.png) 
+    ![Creating input port](images/picture16.png) 
 
     *Figure 3.2. Creating input port*
 
 2. Wire the “sw[3:0]” input to the “sw[3:0]” port of the “axi_gpio_asoc_0” block and the same for the “led[3:0]” output to the equivalent port of the block as shown in the diagram below. This connects them to external ports of the ZYNQ chip:
 
-    ![Vivado block diagram](images/Picture17.png) 
+    ![Vivado block diagram](images/picture17.png) 
 
     *Figure 3.3. Vivado block diagram*
 
 3. Two IP blocks will be generated automatically. The “Processor System Reset” IP is used to generate reset signals for different peripherals. The “AXI Interconnect” IP here is used to interconnect AXI4-Lite Subordinate and AXI Manager. Select the “Address Editor” tab next to “Diagram” and change the “64K” to “4K”. Save all your progress.
 
-    ![Changing peripheral address settings](images/Picture18.png) 
+    ![Changing peripheral address settings](images/picture18.png) 
 
     *Figure 3.4. Changing peripheral address settings*
 
@@ -40,26 +40,26 @@ Follow the steps outline below to connect the AXI4 peripheral you created in the
 
 5. Right-click the “Generate Bitstream” option in the “Flow Navigator” on the left and select “Bitstream settings”. Click the three dots next to “tcl.pre”:
 
-    ![Changing bitstream settings](images/Picture19.png) 
+    ![Changing bitstream settings](images/picture19.png) 
 
     *Figure 3.5. Changing bitstream settings*
 
 6. Select the “New Script” option, click the three dots next to the empty box, choose the “pins.tcl” file you created earlier and click “Ok” on all windows.
 Right-click the “Constraints” under the sources tab and select “Add sources”:
 
-    ![Adding sources](images/Picture20.png) 
+    ![Adding sources](images/picture20.png) 
 
     *Figure 3.6. Adding sources*
 
 7. Select “Add or create constraints” and click “Next”. Select “Create File”, give any name to the file for example pin_constraints, and click “Finish”:
 
-    ![Creating a constraints file](images/Picture21.png) 
+    ![Creating a constraints file](images/picture21.png) 
 
     *Figure 3.7. Creating a constraints file*
 
 8. Expand the “Constraints” folder within the “Sources” tab and double-click the file you just created to open it. Add the following constraints from [Digilent/Zybo-Z7-10-Pmod-VGA/blob/master/src/constraints/Zybo-Z7-Master.xdc](https://github.com/Digilent/Zybo-Z7-10-Pmod-VGA/blob/master/src/constraints/Zybo-Z7-Master.xdc), and save the file: 
 
-    ![Editing constraints file](images/Picture22.png) 
+    ![Editing constraints file](images/picture22.png) 
 
     *Figure 3.8. Editing constraints file*
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/creating_peripheral.md b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/creating_peripheral.md
index 1498a78246..7b569a65dc 100644
--- a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/creating_peripheral.md
+++ b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/creating_peripheral.md
@@ -12,19 +12,19 @@ The Xilinx Vivado tools provide a simplified way to create an AXI4 peripheral. Y
 
 1. Start by clicking “Tools” -> “Create and Package New IP”. Click “Next” and choose the following option:
 
-    ![Creating AXI4 peripheral](images/Picture7.png) 
+    ![Creating AXI4 peripheral](images/picture7.png) 
 
     *Figure 2.1. Creating AXI4 peripheral*
 
 2. Give the following name to the IP (you may keep the IP location path as provided by default):
 
-    ![Adding peripheral details](images/Picture8.jpg) 
+    ![Adding peripheral details](images/picture8.jpg) 
 
     *Figure 2.2. Adding peripheral details*
 
 3. You need a Subordinate interface and four 32-bit registers for the switches and LEDs.
 
-    ![Peripheral settings for Subordinate interface](images/Picture9.png) 
+    ![Peripheral settings for Subordinate interface](images/picture9.png) 
 
     *Figure 2.3. Peripheral settings for Subordinate interface*
 
@@ -36,7 +36,7 @@ These are the basic template files generated for an AXI-Lite peripheral. You can
 
 5. Double-click to open the top-level Verilog file called “AUP_advanced_SoC_v1_0.v”:
 
-    ![Editing top-level Verilog file](images/Picture10.png) 
+    ![Editing top-level Verilog file](images/picture10.png) 
 
     *Figure 2.4. Editing top-level Verilog file*
 
@@ -64,7 +64,7 @@ These are the basic template files generated for an AXI-Lite peripheral. You can
     ```
 8. Save the changes in the file (Ctrl+S). Next, expand and open the other Verilog file (AUP_advanced_SoC_v1_0_S00_AXI.v) shown below:
 
-    ![Edit the second Verilog file](images/Picture11.png) 
+    ![Edit the second Verilog file](images/picture11.png) 
 
     *Figure 2.5. Edit the second Verilog file*
 
@@ -93,7 +93,7 @@ These are the basic template files generated for an AXI-Lite peripheral. You can
 
 12. Save the changes in the file (Ctrl+S). Next, go to the “Package IP – AUP_advanced_SoC” tab, choose the “Customization Parameters” option on the left and click “Merge Changes from Customization Parameters Wizard” to update the IP package with the changes made in HDL files:
 
-    ![. Saving all the changes](images/Picture12.png) 
+    ![. Saving all the changes](images/picture12.png) 
 
     *Figure 2.6. Saving all the changes*
 
@@ -127,7 +127,7 @@ These are the basic template files generated for an AXI-Lite peripheral. You can
     ```
 15. Then, click on “Settings” (under “Project Manager”) in the “Flow Navigator” menu on the left. Expand the “IP” section in the new window that appears and choose the “Repository” option. 
 
-    ![Adding IP Repository](images/Picture13.png) 
+    ![Adding IP Repository](images/picture13.png) 
 
     *Figure 2.7. Adding IP Repository*
 
@@ -135,7 +135,7 @@ These are the basic template files generated for an AXI-Lite peripheral. You can
 
 17. Click “Run Connection Automation” and then click “OK” to connect the AXI-Lite Subordinate interface on GPIO peripheral to the AXI Manager interface on Arm processor.
 
-    ![Connect AXI-Lite Subordinate interface (Custom IP) to AXI Manager interface](images/Picture14.png) 
+    ![Connect AXI-Lite Subordinate interface (Custom IP) to AXI Manager interface](images/picture14.png) 
 
     *Figure 2.8. Connect AXI-Lite Subordinate interface (Custom IP) to AXI Manager interface*
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/generating_bitstream.md b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/generating_bitstream.md
index 89982fca43..69fd4994d2 100644
--- a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/generating_bitstream.md
+++ b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/generating_bitstream.md
@@ -12,56 +12,56 @@ In this section you will generate the bitstream for this project and write a C p
 
 1. Under the “Sources” tab on the left, expand the “Design Sources” folder, right-click the design1.bd file, choose the “Create HDL Wrapper” and select all default options. 
 
-    ![Creating HDL Wrapper](images/Picture23.png) 
+    ![Creating HDL Wrapper](images/picture23.png) 
 
     *Figure 3.1. Creating HDL Wrapper*
 
 2. Save any other changes and click “Generate Bitstream” on the left and click “Ok” for all pop-ups. This process takes some time. Once the process is done, select “Open Hardware Manager”:
 
-    ![Generating bitstream and uploading to board](images/Picture24.png) 
+    ![Generating bitstream and uploading to board](images/picture24.png) 
 
     *Figure 3.2. Generating bitstream and uploading to board*
 
 3. Connect the board and make sure the jumper (JP3) above the red LED on the Zybo board is in JTAG mode. Then, in Vivado, click “Auto Connect” in the Hardware Manager as shown below:
 
-    ![Connecting the board](images/Picture25.png) 
+    ![Connecting the board](images/picture25.png) 
 
     *Figure 3.3. Connecting the board*
 
 4. Right-click on the board, select “Program Device” as shown below and click “Program” on the pop-up window. 
 
-    ![Programming the board](images/Picture26.png) 
+    ![Programming the board](images/picture26.png) 
 
     *Figure 3.4. Programming the board*
 
 5. Once the board is programmed, the green LED labeled “LD12” should light up on the board. Click “File” on the main menu bar and select “Export” -> “Export Hardware” and click “Next” on the pop-up window. Choose the following option on the next page:
 
-    ![Exporting hardware and bitstream file](images/Picture27.png) 
+    ![Exporting hardware and bitstream file](images/picture27.png) 
 
     *Figure 3.5. Exporting hardware and bitstream file* 
 
 6. Choose the “export to” location as the project folder and save the file. Then click “Finish”. Next, click “Tools” on the main menu bar and select “Launch Vitis IDE”. Choose the same project folder as your workspace. Click “File” -> “New” -> “Application Project”.
 
-    ![Creating a new application project](images/Picture28.png) 
+    ![Creating a new application project](images/picture28.png) 
 
     *Figure 3.6. Creating a new application project*
 
 7. Select the “Create a new platform from hardware (XSA)” tab and click browse to select the XSA file you saved earlier:
 
-    ![Adding the XSA file](images/Picture29.png) 
+    ![Adding the XSA file](images/picture29.png) 
 
     *Figure 3.7. Adding the XSA file*
 
 8. Click next and give a name (e.g. led_system) to the application project. Click “Next” until you reach the following page and choose “Empty Application(C)” and click “Finish”:
 
-    ![Creating an empty C Application](images/Picture30.png) 
+    ![Creating an empty C Application](images/picture30.png) 
 
     *Figure 3.8. Creating an empty C Application*
 
 
 9. Then right-click the “src” folder within the application project you created and add a new file called “main.c”. 
 
-    ![Adding a main.c file](images/Picture31.png) 
+    ![Adding a main.c file](images/picture31.png) 
 
     *Figure 3.9. Adding a main.c file*
 
@@ -81,7 +81,7 @@ In this section you will generate the bitstream for this project and write a C p
 
 12. Right-click the application project in the explorer tab, select “Build Project” and ensure that the build is successful. Then right click again and select “Run As” and then “1 Launch Hardware” to upload everything to the board.
 
-    ![Running the program on the board.](images/Picture32.png) 
+    ![Running the program on the board.](images/picture32.png) 
 
     *Figure 3.10. Running the program on the board.*
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture10.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture10.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture10.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture10.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture11.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture11.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture11.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture11.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture12.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture12.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture12.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture12.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture13.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture13.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture13.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture13.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture14.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture14.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture14.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture14.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture15.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture15.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture15.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture15.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture16.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture16.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture16.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture16.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture17.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture17.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture17.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture17.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture18.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture18.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture18.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture18.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture19.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture19.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture19.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture19.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture2.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture2.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture2.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture2.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture20.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture20.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture20.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture20.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture21.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture21.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture21.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture21.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture22.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture22.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture22.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture22.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture23.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture23.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture23.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture23.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture24.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture24.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture24.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture24.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture25.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture25.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture25.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture25.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture26.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture26.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture26.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture26.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture27.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture27.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture27.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture27.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture28.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture28.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture28.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture28.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture29.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture29.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture29.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture29.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture3.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture3.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture3.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture3.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture30.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture30.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture30.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture30.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture31.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture31.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture31.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture31.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture32.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture32.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture32.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture32.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture4.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture4.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture4.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture4.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture5.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture5.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture5.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture5.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture6.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture6.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture6.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture6.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture7.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture7.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture7.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture7.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture8.jpg b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture8.jpg
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture8.jpg
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture8.jpg
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture9.png b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture9.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/Picture9.png
rename to content/learning-paths/embedded-and-microcontrollers/advanced_soc/images/picture9.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/setup.md b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/setup.md
index 8ba3dd68ef..ce962fc650 100644
--- a/content/learning-paths/embedded-and-microcontrollers/advanced_soc/setup.md
+++ b/content/learning-paths/embedded-and-microcontrollers/advanced_soc/setup.md
@@ -16,31 +16,31 @@ After installation is complete, follow the steps outlined below to setup a works
 
 2. Open Xilinx Vivado and create a new project in your workspace as shown below:
 
-    ![Creating a project](images/Picture2.png) 
+    ![Creating a project](images/picture2.png) 
 
     *Figure 1.1. Creating a project*
 
 3. Give it a meaningful name and click "Next". Select RTL Project and click “Next”. You will not be adding any sources or constraints so click “Next” for the next two pages. In the next page, click the “Boards” tab and search for “Zybo” in the search bar and click the download button next to “Zybo Z7-10” if the option is available. Then select it and click “Next”. Do not click on the hyperlink but click on the empty area next to “Zybo Z7-10”. 
 
-    ![Board Selection](images/Picture3.png) 
+    ![Board Selection](images/picture3.png) 
 
     *Figure 1.2. Board Selection*
 
 4. Click “Finish”. In the “Flow Navigator” menu on the left, click “Create Block Design” under “IP Integrator”:
 
-    ![Creating a new block design](images/Picture4.png) 
+    ![Creating a new block design](images/picture4.png) 
 
     *Figure 1.3. Creating a new block design*
 
 5. Choose a design name or use the default one and click “Ok”. Within the empty “Diagram” box on the right-hand side, right-click and select “Add IP”. Enter “Zynq” in the search box and choose “ZYNQ7 Processing System”. Click the “Run Block Automation” option that has now appeared.
 
-    ![Running Block Automation](images/Picture5.png) 
+    ![Running Block Automation](images/picture5.png) 
 
     *Figure 1.4. Running Block Automation*
 
 6. Click “Ok” on the pop-up window. 
 
-    ![Run Block Automation default settings](images/Picture6.png) 
+    ![Run Block Automation default settings](images/picture6.png) 
 
     Figure 1.5. Run Block Automation default settings*
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/Breadboard.jpeg b/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/breadboard.jpeg
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/Breadboard.jpeg
rename to content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/breadboard.jpeg
diff --git a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/complete.webp b/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/complete.webp
deleted file mode 100644
index b654878a73..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/complete.webp and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/PIR-Sensor-Pinout.png b/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/pir-sensor-pinout.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/PIR-Sensor-Pinout.png
rename to content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/pir-sensor-pinout.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/traditional_arm.png b/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/traditional_arm.png
deleted file mode 100644
index 9c1270ee43..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/_images/traditional_arm.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/raspberrypi_pico.md b/content/learning-paths/embedded-and-microcontrollers/arduino-pico/raspberrypi_pico.md
index 9ce571da90..73540ff9f9 100644
--- a/content/learning-paths/embedded-and-microcontrollers/arduino-pico/raspberrypi_pico.md
+++ b/content/learning-paths/embedded-and-microcontrollers/arduino-pico/raspberrypi_pico.md
@@ -20,7 +20,7 @@ You were probably expecting an Arduino board, rather than a Raspberry Pi board b
 
 For interacting with the physical world you can use two cheap commodity components. 
 
-![PIR Sensor](_images/PIR-Sensor-Pinout.png)
+![PIR Sensor](_images/pir-sensor-pinout.png)
 
 First a PIR motion sensor. This sensor reacts to infrared photons emitted by a warm moving object, like a person or animal. The interface is simple, it has one pin for input voltage, one pin for ground to complete the circuit, and a third pin that will have the same voltage as the input pin when motion is detected, and the same voltage as the ground pin when it isn't.
 
@@ -29,7 +29,7 @@ First a PIR motion sensor. This sensor reacts to infrared photons emitted by a w
 
 Second, a very simple electric buzzer. You could get fancy with one of these and make it play different sounds with something called Pulse Width Modulation (PWM) but, for simplicity, you can give it a constant voltage which will result in a high-pitched beeping noise.
 
-![Breadboard](_images/Breadboard.jpeg)
+![Breadboard](_images/breadboard.jpeg)
 
 Finally, you can use a breadboard to connect the components together without having to do any soldering.
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/AddSource.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/AddSource.png
deleted file mode 100644
index e6700e4f54..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/AddSource.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/AddSource2.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/AddSource2.png
deleted file mode 100644
index bb5bc3d418..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/AddSource2.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack.png
deleted file mode 100644
index 983b8d835f..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack2.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack2.png
deleted file mode 100644
index bab1cf685c..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack2.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack3.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack3.png
deleted file mode 100644
index eb4e1f2e6a..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/CallStack3.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/Flash.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/Flash.png
deleted file mode 100644
index 34664fe284..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/Flash.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/NewKeilProject.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/NewKeilProject.png
deleted file mode 100644
index cea8ac39dc..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/NewKeilProject.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/ProjectExplorer.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/ProjectExplorer.png
deleted file mode 100644
index 97dfc9d378..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/ProjectExplorer.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/SelectDevice1.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/SelectDevice1.png
deleted file mode 100644
index a8430717d1..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/SelectDevice1.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/SelectDevice2.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/SelectDevice2.png
deleted file mode 100644
index e03bea4ed0..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/SelectDevice2.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/SoftwareComponents.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/SoftwareComponents.png
deleted file mode 100644
index 138298cdb7..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/SoftwareComponents.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions.png
deleted file mode 100644
index 1780023801..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions2.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions2.png
deleted file mode 100644
index 58198dc09e..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions2.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions3.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions3.png
deleted file mode 100644
index fa0a8d0474..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/asm/images/TargetOptions3.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/Build.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/build.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/asm/images/Build.png
rename to content/learning-paths/embedded-and-microcontrollers/asm/images/build.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/asm/images/Debug.png b/content/learning-paths/embedded-and-microcontrollers/asm/images/debug.png
similarity index 100%
rename from content/learning-paths/embedded-and-microcontrollers/asm/images/Debug.png
rename to content/learning-paths/embedded-and-microcontrollers/asm/images/debug.png
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure1.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure1.png
deleted file mode 100644
index 45715e64b9..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure1.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure2.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure2.png
deleted file mode 100644
index 01931b8594..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure2.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure4.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure4.png
deleted file mode 100644
index 8c22788772..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure4.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure6.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure6.png
deleted file mode 100644
index 1f1c6be737..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure6.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure7.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure7.png
deleted file mode 100644
index d48dd2bf4e..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/Figure7.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/end-to-end_workflow.md b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/end-to-end_workflow.md
index c4ad31b306..3104f754a8 100644
--- a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/end-to-end_workflow.md
+++ b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/end-to-end_workflow.md
@@ -80,7 +80,7 @@ Training the model usually takes a lot of time. In step 2, an already trained En
 
 By default, the script uses the image shown below (QBHOUSE) as an example to verify the inference results on the Corstone-300 FVP with Arm Cortex-M55.
 
-![QBHOUSE#center](./Figure4.png)
+![QBHOUSE#center](./figure4.png)
 
 Make the script executable with `chmod`.
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure5.webp b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure5.webp
deleted file mode 100644
index 92eb070e1e..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure5.webp and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure6.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure6.png
deleted file mode 100644
index 1f1c6be737..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure6.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure7.png b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure7.png
deleted file mode 100644
index d48dd2bf4e..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/figure7.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/overview.md b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/overview.md
index 3efb086329..6abe12100b 100644
--- a/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/overview.md
+++ b/content/learning-paths/embedded-and-microcontrollers/avh_ppocr/overview.md
@@ -16,7 +16,7 @@ OCR has been widely used in many industry scenarios such as ticket information e
 
 Text recognition is a sub-task of OCR. It's the step after text detection in OCR's two-stage algorithm which converts image information into text information. 
 
-![Example of English text recognition #center](./Figure1.png "Figure 1. Example of English text recognition (Image source: https://iapr.org/archives/icdar2015/index.html)")
+![Example of English text recognition #center](./figure1.png "Figure 1. Example of English text recognition (Image source: https://iapr.org/archives/icdar2015/index.html)")
 
 
 In this Learning Path, you will learn how to apply deep learning (DL) to the OCR text recognition task and setup a development flow from model training to application deployment. 
@@ -39,7 +39,7 @@ As seen in Figure 2, the overall pipeline of PP-OCRv3 is similar to PP-OCRv2 wit
 
 For example, the text recognition model introduces [SVTR](https://arxiv.org/abs/2205.00159) (Scene Text Recognition with a Single Visual Model) based on PP-OCRv2. The model also uses [GTC](https://arxiv.org/pdf/2002.01276.pdf) (Guided Training of CTC) to guide training and model distillation. For more details, please refer to this PP-OCRv3 [technical report](https://arxiv.org/abs/2206.03001v2).
 
-![PP-OCRv3 pipeline diagram #center](./Figure2.png "Figure 2. PP-OCRv3 pipeline diagram (Image source: https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/PP-OCRv3_introduction_en.md)")
+![PP-OCRv3 pipeline diagram #center](./figure2.png "Figure 2. PP-OCRv3 pipeline diagram (Image source: https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/PP-OCRv3_introduction_en.md)")
 
 In the next section, you will deploy a trained PP-OCR text recognition model on the Arm Corstone-300 FVP.
 
diff --git a/content/learning-paths/embedded-and-microcontrollers/edge/images/3b.webp b/content/learning-paths/embedded-and-microcontrollers/edge/images/3b.webp
deleted file mode 100644
index 5f1554f555..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/edge/images/3b.webp and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/introduction-to-tinyml-on-arm/Connect.png b/content/learning-paths/embedded-and-microcontrollers/introduction-to-tinyml-on-arm/Connect.png
deleted file mode 100644
index 6af713b403..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/introduction-to-tinyml-on-arm/Connect.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/introduction-to-tinyml-on-arm/Overview.png b/content/learning-paths/embedded-and-microcontrollers/introduction-to-tinyml-on-arm/Overview.png
deleted file mode 100644
index cbcd944107..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/introduction-to-tinyml-on-arm/Overview.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/1-overview.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/1-overview.md
new file mode 100644
index 0000000000..d03d9757ad
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/1-overview.md
@@ -0,0 +1,41 @@
+---
+title: Overview
+weight: 2
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Hardware Overview - NXP's FRDM i.MX 93 Board
+
+Selecting the best hardware for machine learning (ML) models depends on effective tools. You can visualize ML performance early in the development cycle by using NXP's [FRDM i.MX 93](https://www.nxp.com/design/design-center/development-boards-and-designs/frdm-i-mx-93-development-board:FRDM-IMX93) board.
+
+<center>
+<iframe src='//players.brightcove.net/4089003392001/Sk5u9ln3l_default/index.html?videoId=6366294691112' allowfullscreen frameborder=0 width="800" height="400"></iframe>
+
+*Unboxing NXP's FRDM i.MX 93 board*
+</center>
+
+![NXP FRDM i.MX 93 Board SoC Highlighted alt-text#center](./nxp-frdm-imx93-board-soc-highlighted.png "Arm Ethos-U65 NPU location")
+
+### NXP's FRDM i.MX 93 Processor Decoded
+
+![i.MX 93 Processor SoC alt-text#center](./imx93-application-processor-soc.png "NXP's FRDM i.MX 93 processor")
+
+**NXP's Processor Labeling Convention:**
+|Line|Meaning|
+|----|-------|
+|MIMX9352|• MI – Microcontroller IC<br>• MX93 – i.MX 93 family<br>• 52 – Variant:<br>• Dual-core Arm Cortex-A55<br> • Single Cortex-M33<br>• Includes **Ethos-U65 NPU**|
+|CVVXMAB|• C - Commercial temperature grade (0°C to 95°C)<br>• VVX - Indicates package type and pinout (BGA, pitch, etc.)<br>• MAB - Specific configuration (e.g., NPU present, security level, memory interfaces)
+|
+|1P87F|• Silicon mask set identifier|
+|SBBM2410E|• NXP traceability code|
+
+## Benefits and applications
+
+NPUs, like Arm's [Ethos-U65](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u65) NPU are available on physical devices specifically made for developers. Development boards like NXP's [FRDM i.MX 93](https://www.nxp.com/design/design-center/development-boards-and-designs/frdm-i-mx-93-development-board:FRDM-IMX93) also connect to displays via a HDMI cable. Additionally the board accepts video inputs. This is useful for for ML performance visualization due to:
+- visual confirmation that your ML model is running on the physical device,
+- image and video inputs for computer vision models running on the device,
+- clearly indicated instruction counts,
+- confirmation of total execution time and
+- visually appealing output for prototypes and demos.
\ No newline at end of file
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/2-boot-nxp.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/2-boot-nxp.md
new file mode 100644
index 0000000000..8765903b69
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/2-boot-nxp.md
@@ -0,0 +1,103 @@
+---
+# User change
+title: "Boot the NXP FRDM i.MX 93 Board"
+
+weight: 3
+
+# Do not modify these elements
+layout: "learningpathall"
+---
+
+In this section, you will prepare the NXP [FRDM i.MX 93](https://www.nxp.com/design/design-center/development-boards-and-designs/frdm-i-mx-93-development-board:FRDM-IMX93) board for ML development.
+
+## Unbox the NXP Board
+
+Follow NXP's getting started instructions: [Getting Started with FRDM-IMX93](https://www.nxp.com/document/guide/getting-started-with-frdm-imx93:GS-FRDM-IMX93):
+* Stop when you complete section "1.6 Connect Power Supply"
+
+## Connect to the NXP Board
+
+Prior to logging in to the NXP board, you need to configure `picocom`. This allows you to connect to the board using a USB cable.
+
+{{% notice macOS %}}
+
+1. Install the Silicon Labs driver:
+
+   https://www.silabs.com/developer-tools/usb-to-uart-bridge-vcp-drivers?tab=downloads
+   
+2. Install [picocom](https://github.com/npat-efault/picocom):
+   ```bash
+   brew install picocom
+   ```
+
+3. Establish a USB-to-UART (serial) connection:
+   - Connect the board's "DEBUG" USB-C connector to your Mac
+   - Find the NXP board's USB connections in your computer's terminal:
+     ```bash { output_lines = "2-7" }
+     ls /dev/tty.*
+     # output lines
+     ...
+     /dev/tty.debug-console
+     /dev/tty.usbmodem56D70442811
+     /dev/tty.usbmodem56D70442813
+     ...
+     ```
+
+   - Connect to the NXP board:
+     ```bash { output_lines = "2-5" }
+     sudo picocom -b 115200 /dev/tty.usbmodem56D70442811
+     # output lines
+     picocom v3.1
+     ...
+     Terminal ready
+     ```
+
+4. Go straight to step 2, in the below instructions
+
+{{% /notice %}}
+
+1. Establish a USB-to-UART (serial) connection:
+   - Connect the board's "DEBUG" USB-C connector to your Linux machine
+   - Find the NXP board's USB connections in your computer's terminal:
+     ```bash { output_lines = "2-3" }
+     ls /dev/ttyUSB* /dev/ttyACM* 2>/dev/null
+     # output lines
+     /dev/ttyACM0   /dev/ttyACM1
+     ```
+
+   - Connect to the NXP board:
+     ```bash { output_lines = "2-5" }
+     sudo picocom -b 115200 /dev/ttyACM0
+     # output lines
+     picocom v3.1
+     ...
+     Terminal ready
+     ```
+2. Log in to Linux on the NXP board:
+   - Connect the board's "POWER" USB-C connector to your laptop
+   - At this point you should see one red and one white light on the board
+   - Next you should see scrolling text in your `picocom` window, as the NXP board boots
+   - The last line should say `login:`
+     ```bash { output_lines = "1-9" }
+     # output lines
+     ...
+     [  OK  ] Reached target Graphical Interface.
+              Starting Record Runlevel Change in UTMP...
+     [  OK  ] Finished Record Runlevel Change in UTMP.
+
+     NXP i.MX Release Distro 6.6-scarthgap imx93frdm ttyLP0
+
+     imx93frdm login: 
+     ```
+3. Type `root` to log in as root. There is no password
+
+### Troubleshooting
+* Restart the NXP board, to get to the `login:` prompt:
+  * Hold the NXP board's power button for 2-seconds, until the lights turn off
+  * Hold the NXP board's power button again for 2-seconds, until the lights turn on
+
+## [Optional] Run the Built-In NXP Demos
+* Connect the NXP board to a monitor via HDMI
+* Connect a mouse to the NXP board's USB-A port
+
+![NXP board built-in ML demos alt-text#center](./nxp-board-built-in-ml-demos.png "NXP board built-in ML demos")
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/3-create-super-user.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/3-create-super-user.md
new file mode 100644
index 0000000000..927a029927
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/3-create-super-user.md
@@ -0,0 +1,36 @@
+---
+# User change
+title: "Create a Linux Super User"
+
+weight: 4
+
+# Do not modify these elements
+layout: "learningpathall"
+---
+
+On the NXP board, create a non-root super user (if you do not already have one):
+
+1. While [logged in as root]( {{< relref "2-boot-nxp.md" >}} ):
+
+   * Enable super user privileges:
+     ```bash
+     sudo visudo
+     ```
+   * In the vi editor that opens up, uncomment the below line:  
+     ```bash { output_lines = "1" }
+     %wheel ALL=(ALL:ALL) ALL # uncomment this line
+     ```
+
+2. Add a super user:
+   ```bash
+   sudo adduser testuser
+   sudo usermod -aG wheel testuser
+   ```
+
+3. While still logged in as root, confirm successful super user creation:
+   ```bash
+   su - testuser 
+   sudo whoami # should return "root"
+   ```
+
+4. Log out of the NXP board and log back in to Linux as the super user
\ No newline at end of file
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/4-enable-wifi.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/4-enable-wifi.md
new file mode 100644
index 0000000000..12ce8e216d
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/4-enable-wifi.md
@@ -0,0 +1,88 @@
+---
+# User change
+title: "Enable WiFi"
+
+weight: 5 # 1 is first, 2 is second, etc.
+
+# Do not modify these elements
+layout: "learningpathall"
+---
+
+{{% notice Note %}}
+
+* WiFi network connectivity **does not persist** on NXP board reboot
+* It **does persist** on logging out and then logging back in as the same Linux user
+
+{{% /notice %}}
+
+1. [Log in to Linux]( {{< relref "2-boot-nxp.md" >}} ) on the board, as a [super user]( {{< relref "3-create-super-user" >}} )
+
+2. Run the below terminal commands:
+   ```bash
+   sudo /usr/sbin/modprobe moal mod_para=nxp/wifi_mod_para.conf
+   sudo connmanctl
+   ```
+
+3. The prompt will change to `connmanctl>`, where you will enter the following commands:
+
+   ```bash
+   enable wifi
+   scan wifi
+   services
+   ```
+
+4. Your available WiFi networks will be listed in the following form:
+
+   ```bash { output_lines = "1-3" }
+   <SSID>                wifi_0123456789ab_cdef0123456789_managed_psk
+   <SSID>                wifi_abcdef012345_6789abcdef0123_managed_psk
+   <SSID>                wifi_fedcba987654_3210fedcba9876_managed_psk
+   ```
+
+   {{% notice Note %}}
+   
+   Duplicate SSIDs may appear, so you will have to experiment with the different `wifi_..._managed_psk` names, when you try to connect in the next step
+
+   {{% /notice %}}
+
+5. Still within the `connmanctl>` prompt, enter the following commands:
+
+   ```bash
+   agent on
+   connect wifi_0123456789ab_cdef0123456789_managed_psk # Your wifi_..._managed_ps name will be different
+   Agent RequestInput wifi_0123456789ab_cdef0123456789_managed_psk
+   Passphrase = [ Type=psk, Requirement=mandatory ]
+   Passphrase? # Enter your WiFi password
+   connmanctl> quit
+   ```
+
+6. Assuming your WiFi network is connected to the Internet, test connectivity:
+
+   ```bash
+   curl -I http://www.example.com
+   ```
+
+   If WiFi is configured correctly, you will see the example.com web page load:
+
+   ```bash { output_lines = "1-2" }
+   HTTP/1.1 200 OK
+   ...
+   ```
+
+7. [optional] If your WiFi network is not connected to the internet, test connectivity this way:
+
+   ```bash
+   ifconfig | grep RUNNING -A 1
+   ```
+
+   If WiFi is configured correctly, you will see a list of `RUNNING` network adapters:
+   * one for `127.0.0.1` (`localhost`) and
+   * a second for the NXP board's assigned IP address on the WiFi network
+   * Example output, where `192.168.1.89` is the NXP board's successfully assigned IP address:
+     ```bash { output_lines = "1-5" }
+     lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
+          inet 127.0.0.1  netmask 255.0.0.0
+     --
+     mlan0: flags=-28605<UP,BROADCAST,RUNNING,MULTICAST,DYNAMIC>  mtu 1500
+          inet 192.168.1.89  netmask 255.255.255.0  broadcast 192.168.1.255
+     ```
\ No newline at end of file
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/5-transfer-files-wifi.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/5-transfer-files-wifi.md
new file mode 100644
index 0000000000..8eaa0c09ed
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/5-transfer-files-wifi.md
@@ -0,0 +1,30 @@
+---
+# User change
+title: "Transfer Files Over WiFi"
+
+weight: 6 # 1 is first, 2 is second, etc.
+
+# Do not modify these elements
+layout: "learningpathall"
+---
+
+1. [Log in to Linux]( {{< relref "2-boot-nxp.md" >}} ) on the board, as a [super user]( {{< relref "3-create-super-user" >}} )
+
+2. [Enable Wifi]( {{< relref "4-enable-wifi.md" >}} ) on the NXP board
+
+3. Note down the NXP board's IP address on your WiFi network:
+   ```bash
+   ifconfig | grep RUNNING -A 1
+   ```
+
+4. Open a terminal window on the machine with the source file
+
+5. Navigate to the source file directory and copy the file to the NXP board's destination directory:
+   ```bash
+   # On your machine, in the source file directory
+   scp <source_file> <nxp_user>@<nxp_ip_address>:/home/nxp_user/path/to/destination/directory/
+   ```
+   Example:
+   ```bash { output_lines = "1" }
+   scp install.sh testuser@192.168.1.1:/home/testuser/apps/test_app/
+   ```
\ No newline at end of file
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/6-tranfer-files-usb.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/6-tranfer-files-usb.md
new file mode 100644
index 0000000000..89ee721ff8
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/6-tranfer-files-usb.md
@@ -0,0 +1,32 @@
+---
+# User change
+title: "Transfer Files Over USB"
+
+weight: 7 # 1 is first, 2 is second, etc.
+
+# Do not modify these elements
+layout: "learningpathall"
+---
+
+1. [Log in to Linux]( {{< relref "2-boot-nxp.md" >}} ) on the board, as a [super user]( {{< relref "3-create-super-user" >}} )
+
+2. On your machine with the source file, copy the source file to a USB-A thumb drive:
+
+3. Insert the thumb drive into the NXP board's USB-A port
+  
+4. Mount the thumb drive and then copy the files to the board:
+   ```bash { output_lines = "1" }
+   # Execute these commands on the board
+   mount /dev/sda1 /mnt
+   cp /mnt/<source_file> /path/to/destination/directory/
+   ```
+
+   Example:
+   ```bash { output_lines = "1" }
+   cp /mnt/install.sh ./apps/test_app/
+   ```
+
+5. [optional] Unmount the thumbdrive and then remove it from the NXP board
+   ```bash
+   umount /mnt
+   ```
\ No newline at end of file
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/7-enable-persistent-wifi.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/7-enable-persistent-wifi.md
new file mode 100644
index 0000000000..d8b2f7a6cf
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/7-enable-persistent-wifi.md
@@ -0,0 +1,148 @@
+---
+# User change
+title: "(Optional) Enable Persistent WiFi"
+
+weight: 8 # 1 is first, 2 is second, etc.
+
+# Do not modify these elements
+layout: "learningpathall"
+---
+
+On this page you will configure the NXP board to connect to a specific WiFi network on boot.
+
+1. [Log in to Linux]( {{< relref "2-boot-nxp.md" >}} ) on the board, as `root`
+
+2. Create a `wpa_supplicant.conf`:
+   ```bash
+   touch /etc/wpa_supplicant.conf
+   nano /etc/wpa_supplicant.conf
+   ```
+   Enter your WiFi credentials into the `wpa_supplicant.conf` file:
+   ```bash
+   ctrl_interface=DIR=/var/run/wpa_supplicant GROUP=netdev
+   update_config=1
+
+   network={
+       ssid="YOUR_SSID"
+       psk="YOUR_PASSWORD"
+       key_mgmt=WPA-PSK
+   }
+   ```
+
+3. Test the `wpa_supplicant.conf` file:
+   ```bash
+   modprobe moal mod_para=nxp/wifi_mod_para.conf
+   ifconfig mlan0 up
+   wpa_supplicant -B -i mlan0 -c /etc/wpa_supplicant.conf
+   udhcpc -i mlan0
+   ```
+   * mlan0 is the WiFi interface on i.MX93
+	* If this connects to WiFi, we’re ready for automation
+
+4. Configure DNS server IP addresses, so that the NXP board can resolve Internet addresses:
+   ```bash
+   touch /usr/share/udhcpc/default.script
+   nano /usr/share/udhcpc/default.script
+   ```
+   and add in the following `udhcpc` script:
+   ```bash
+   #!/bin/sh
+   # udhcpc script
+   case "$1" in
+       deconfig)
+           ip addr flush dev $interface
+           ;;
+       bound|renew)
+           ip addr add $ip/$subnet dev $interface
+           ip route add default via $router
+           echo "nameserver 8.8.8.8" > /etc/resolv.conf
+           echo "nameserver 1.1.1.1" >> /etc/resolv.conf
+           ;;
+   esac
+   ```
+   Make the `default.script` executable:
+   ```bash
+   chmod +x /usr/share/udhcpc/default.script
+   ```
+
+5. Create a `nxp-wifi-setup.sh` script:
+   ```bash
+   touch /usr/bin/nxp-wifi-setup.sh
+   nano /usr/bin/nxp-wifi-setup.sh
+   ```
+   and add in the following lines:
+   ```bash
+   #!/bin/sh
+   # Load WiFi driver
+   /usr/sbin/modprobe moal mod_para=nxp/wifi_mod_para.conf
+
+   # Bring interface up
+   /usr/bin/ifconfig mlan0 up
+
+   # Connect to WiFi
+   /usr/sbin/wpa_supplicant -B -i mlan0 -c /etc/wpa_supplicant.conf
+
+   # Obtain DHCP IP + DNS
+   /usr/sbin/udhcpc -i mlan0 -s /usr/share/udhcpc/default.script
+   ```
+   Make the `nxp-wifi-setup.sh` executable:
+   ```bash
+   chmod +x /usr/bin/nxp-wifi-setup.sh
+   ```
+
+6. Create a `nxp-wifi-setup.service`:
+   ```bash
+   touch /etc/systemd/system/nxp-wifi-setup.service
+   nano /etc/systemd/system/nxp-wifi-setup.service
+   ```
+   Enter the following systemd commands into the `nxp-wifi-setup.service` file:
+   ```bash
+   [Unit]
+   Description=WiFi Setup for NXP FRDM i.MX93
+   After=network.target
+
+   [Service]
+   Type=oneshot
+   ExecStart=/usr/bin/nxp-wifi-setup.sh
+   RemainAfterExit=yes
+
+   [Install]
+   WantedBy=multi-user.target
+   ```
+
+7. Create a `wpa_supplicant.service`:
+   ```bash
+   touch /etc/systemd/system/wpa_supplicant.service
+   nano /etc/systemd/system/wpa_supplicant.service
+   ```
+   Enter the following systemd commands into the `wpa_supplicant.service` file:
+   ```bash
+   [Unit]
+   Description=WPA Supplicant daemon
+   After=network.target
+
+   [Service]
+   Type=simple
+   ExecStart=/usr/sbin/wpa_supplicant -i mlan0 -c /etc/wpa_supplicant.conf
+   Restart=always
+
+   [Install]
+   WantedBy=multi-user.target
+   ```
+   
+8. Enable and Start the `nxp-wifi-setup.service`:
+   ```bash
+   systemctl daemon-reload
+   systemctl enable nxp-wifi-setup.service wpa_supplicant.service
+   systemctl start nxp-wifi-setup.service wpa_supplicant.service
+   ```
+
+10. Check status:
+   ```bash
+   systemctl status nxp-wifi-setup.service
+   systemctl status wpa_supplicant.service
+   ```
+   and confirm Internet connectivity:
+   ```bash
+   curl -I http://www.example.com
+   ```
\ No newline at end of file
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/_index.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/_index.md
new file mode 100644
index 0000000000..9040c9ca8e
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/_index.md
@@ -0,0 +1,64 @@
+---
+title: Using Linux on the NXP FRDM IMX93 Board
+
+draft: true
+cascade:
+    draft: true
+    
+minutes_to_complete: 120
+
+who_is_this_for: This is an advanced topic for embedded device developers and machine learning engineers, who want need a professional-grade Arm embedded device development platform.
+
+learning_objectives:
+    - Identify suitable physical Arm-based devices for TinyML applications.
+    - Configure physical embedded devices.
+    - Enable hardware modules on embedded devices.
+    - Deploy a apps to NXP's FRDM i.MX 93 board.
+    - Using Linux on an embedded device's Arm Cortex-A processor(s).
+    - Deploying machine learning models to Arm Cortex-M and Ethos-U processors will be covered in a later learning path.
+
+prerequisites:
+    - Purchase of a NXP [FRDM i.MX 93](https://www.nxp.com/design/design-center/development-boards-and-designs/frdm-i-mx-93-development-board:FRDM-IMX93) board.
+    - A computer running Linux or macOS.
+
+author: Waheed Brown
+
+### Tags
+skilllevels: Introductory
+subjects: ML
+armips:
+    - Cortex-A
+
+operatingsystems:
+    - Linux
+    - macOS
+
+tools_software_languages:
+    - Baremetal
+    - Python
+    - Bash
+    - systemd
+
+further_reading:
+    - resource:
+        title: TinyML Brings AI to Smallest Arm Devices
+        link: https://newsroom.arm.com/blog/tinyml
+        type: blog
+    - resource:
+        title: Arm Machine Learning Resources
+        link: https://www.arm.com/developer-hub/embedded-and-microcontrollers/ml-solutions/getting-started
+        type: documentation
+    - resource:
+        title: Arm Developers Guide for Cortex-M Processors and Ethos-U NPU
+        link: https://developer.arm.com/documentation/109267/0101
+        type: documentation
+
+
+
+
+### FIXED, DO NOT MODIFY
+# ================================================================================
+weight: 1                       # _index.md always has weight of 1 to order correctly
+layout: "learningpathall"       # All files under learning paths have this same wrapper
+learning_path_main_page: "yes"  # This should be surfaced when looking for related content. Only set for _index.md of learning path content.
+---
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/_next-steps.md b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/_next-steps.md
new file mode 100644
index 0000000000..c3db0de5a2
--- /dev/null
+++ b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/_next-steps.md
@@ -0,0 +1,8 @@
+---
+# ================================================================================
+#       FIXED, DO NOT MODIFY THIS FILE
+# ================================================================================
+weight: 21                  # Set to always be larger than the content in this path to be at the end of the navigation.
+title: "Next Steps"         # Always the same, html page title.
+layout: "learningpathall"   # All files under learning paths have this same wrapper for Hugo processing.
+---
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/imx93-application-processor-soc.png b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/imx93-application-processor-soc.png
new file mode 100644
index 0000000000..838d47f6d5
Binary files /dev/null and b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/imx93-application-processor-soc.png differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/nxp-board-built-in-ml-demos.png b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/nxp-board-built-in-ml-demos.png
new file mode 100644
index 0000000000..e50d656b13
Binary files /dev/null and b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/nxp-board-built-in-ml-demos.png differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/nxp-frdm-imx93-board-soc-highlighted.png b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/nxp-frdm-imx93-board-soc-highlighted.png
new file mode 100644
index 0000000000..b50ace3a21
Binary files /dev/null and b/content/learning-paths/embedded-and-microcontrollers/linux-nxp-board/nxp-frdm-imx93-board-soc-highlighted.png differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/Cortex-a53_MPIDR_EL1.png b/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/Cortex-a53_MPIDR_EL1.png
deleted file mode 100644
index 1f204e5b88..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/Cortex-a53_MPIDR_EL1.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/armds_ide.webp b/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/armds_ide.webp
deleted file mode 100644
index 48e4f5652a..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/armds_ide.webp and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a53_mpidr_el1.png b/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a53_mpidr_el1.png
deleted file mode 100644
index 1f204e5b88..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a53_mpidr_el1.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a55_MPIDR_EL1.png b/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a55_MPIDR_EL1.png
deleted file mode 100644
index eea3e19a05..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a55_MPIDR_EL1.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a55_mpidr_el1.png b/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a55_mpidr_el1.png
deleted file mode 100644
index eea3e19a05..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/cortex-a55_mpidr_el1.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/debug_config.webp b/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/debug_config.webp
deleted file mode 100644
index 8e5c97a234..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/linux-on-fvp/debug_config.webp and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/mlek/term.png b/content/learning-paths/embedded-and-microcontrollers/mlek/term.png
deleted file mode 100644
index 7ebc52be3e..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/mlek/term.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/mlek/vis.png b/content/learning-paths/embedded-and-microcontrollers/mlek/vis.png
deleted file mode 100644
index 4658424ac0..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/mlek/vis.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/CMSIS-RTX_missing.png b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/CMSIS-RTX_missing.png
deleted file mode 100644
index 23a31f7c89..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/CMSIS-RTX_missing.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/CoreDebug_uvision.png b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/CoreDebug_uvision.png
deleted file mode 100644
index 1706587529..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/CoreDebug_uvision.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/Device_missing.png b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/Device_missing.png
deleted file mode 100644
index 9a9b1ee390..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/Device_missing.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/EventRecorder_migration.png b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/EventRecorder_migration.png
deleted file mode 100644
index fb4f06a639..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/EventRecorder_migration.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/IO_migration.png b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/IO_migration.png
deleted file mode 100644
index 2b5aa12196..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/IO_migration.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/OS_Tick_missing.png b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/OS_Tick_missing.png
deleted file mode 100644
index 5f262287ad..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/OS_Tick_missing.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/troubleshooting.md b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/troubleshooting.md
index 30f740436a..42a5277aa4 100644
--- a/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/troubleshooting.md
+++ b/content/learning-paths/embedded-and-microcontrollers/project-migration-cmsis-v6/troubleshooting.md
@@ -24,7 +24,7 @@ This section provides an overview of the most common errors you might encounter
 
 The following error occurs when you try to migrate a project to CMSIS v6 but you have not installed the Cortex_DFP pack:
 
-![Requested device not found for target](./Device_missing.png)
+![Requested device not found for target](./device_missing.png)
 
 {{% notice Resolution %}}
 Install [ARM.Cortex_DFP.1.0.0.pack](https://www.keil.arm.com/packs/cortex_dfp-arm) or above.
@@ -34,7 +34,7 @@ Install [ARM.Cortex_DFP.1.0.0.pack](https://www.keil.arm.com/packs/cortex_dfp-ar
 
 The following error occurs when you try to migrate a Keil RTX-based project but you have not installed the CMSIS-RTX pack:
 
-![Keil RTX5 component not available for target](./CMSIS-RTX_missing.png)
+![Keil RTX5 component not available for target](./cmsis-rtx_missing.png)
 
 {{% notice Resolution %}}
 Install [ARM.CMSIS-RTX.5.8.0.pack](https://www.keil.arm.com/packs/cmsis-rtx-arm) or above.
@@ -46,7 +46,7 @@ Install [ARM.CMSIS-RTX.5.8.0.pack](https://www.keil.arm.com/packs/cmsis-rtx-arm)
 
 The following warning is shown in the Validation Output window when you try to migrate a Keil RTX-based project:
 
-![Additional software components required](./OS_Tick_missing.png)
+![Additional software components required](./os_tick_missing.png)
 
 {{% notice Resolution %}}
 Use the **Resolve** button to select the missing component automatically.
@@ -98,12 +98,12 @@ As the components from the Keil.ARM_Compiler pack do not have 1:1 replacements,
 
    Deselect the `Compiler:Event Recorder` component and select the `CMSIS-View:Event Recorder` component:
 
-   ![Event Recorder migration](./EventRecorder_migration.png)
+   ![Event Recorder migration](./eventrecorder_migration.png)
 2. Compiler:I/O migration
 
    Deselect any component of `Compiler:I/O` and select the corresponding `CMSIS-Compiler` component:
 
-   ![I/O migration](./IO_migration.png)
+   ![I/O migration](./io_migration.png)
 
    This table helps you to identify the correct components:
 
@@ -245,7 +245,7 @@ For more information on how to configure your *.cproject.yml file, please refer
 
 1. Go to **Project - Options for Target** and switch to the **C/C++ (AC6)** tab. Add `-include <known_location>\cmsis_5_to_6_patch.h` to the **Misc Controls**:
 
-   ![Add patch file](./CoreDebug_uvision.png)
+   ![Add patch file](./coredebug_uvision.png)
 
 1. Rebuild the project.
 {{% /notice %}}
diff --git a/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/4-smart-home-assistant.md b/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/4-smart-home-assistant.md
index 3ec24fd82c..45c88a4d11 100644
--- a/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/4-smart-home-assistant.md
+++ b/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/4-smart-home-assistant.md
@@ -78,7 +78,7 @@ Try asking the assistant to `turn on living room light`. If you've connected add
 
 Open your browser and navigate to `http://0.0.0.0:8000`, or as printed in the terminal output.
 
-![Web interface of the smart home assistant showing device control through LLM commands alt-text#center](UI3.png "Interacting with the LLM through the web interface")
+![Web interface of the smart home assistant showing device control through LLM commands alt-text#center](ui3.png "Interacting with the LLM through the web interface")
 
 
 ## Command line interface
diff --git a/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/UI3.png b/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/UI3.png
deleted file mode 100644
index bc4dfecd52..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/raspberry-pi-smart-home/UI3.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/uv_debug/2_basics.md b/content/learning-paths/embedded-and-microcontrollers/uv_debug/2_basics.md
index fa81e39f5f..6c69f237d8 100644
--- a/content/learning-paths/embedded-and-microcontrollers/uv_debug/2_basics.md
+++ b/content/learning-paths/embedded-and-microcontrollers/uv_debug/2_basics.md
@@ -104,7 +104,7 @@ There is a global variable `g_msTicks` located in `Blinky.c` near line 11 that y
 
 1. Click on `<Enter expression>` twice and enter: `SystemCoreClock`
 2. Right click on the `Value` and deselect **Hexadecimal Display**. 32 MHz will be displayed:  
-![SystemCoreClock in Watch 1 Window](./SystemCoreClockWatch.png)
+![SystemCoreClock in Watch 1 Window](./systemcoreclockwatch.png)
 
 {{% notice Note %}}
 You do not need to stop the program execution to enter variables, raw addresses or structures in a **Watch** or **Memory** window.
@@ -140,7 +140,7 @@ The example application uses the Arm Cortex-M system tick timer.
 
 1. ![System Viewer](./b_uv4_systemviewer.png) Go to **Peripherals - Core Peripherals** and then select **System Tick Timer S (SysTick)**.
 2. The **SysTick Timer** window opens:  
-![SysTick Timer Window](./SysTickTimerWindow.png)
+![SysTick Timer Window](./systicktimerwindow.png)
 1. ![Run](./b_uv4_run.png) **Run (F5)** the application.
 6. While the program is running, type `0x10000` in the `SysTick -> LOAD` register and click in another register or press Enter.
 7. The program execution will speed up. This is the power of Arm CoreSight debugging.
diff --git a/content/learning-paths/embedded-and-microcontrollers/uv_debug/SysTickTimerWindow.png b/content/learning-paths/embedded-and-microcontrollers/uv_debug/SysTickTimerWindow.png
deleted file mode 100644
index 1a38093d8d..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/uv_debug/SysTickTimerWindow.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/uv_debug/SystemCoreClockWatch.png b/content/learning-paths/embedded-and-microcontrollers/uv_debug/SystemCoreClockWatch.png
deleted file mode 100644
index ebf593c735..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/uv_debug/SystemCoreClockWatch.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/uv_debug/b_uv4_memory_window.png b/content/learning-paths/embedded-and-microcontrollers/uv_debug/b_uv4_memory_window.png
deleted file mode 100644
index b00af3bac8..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/uv_debug/b_uv4_memory_window.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/uv_debug/b_uv4_rebuild_all.png b/content/learning-paths/embedded-and-microcontrollers/uv_debug/b_uv4_rebuild_all.png
deleted file mode 100644
index 68aa04275a..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/uv_debug/b_uv4_rebuild_all.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/uv_debug/blinky_c_proj_win.png b/content/learning-paths/embedded-and-microcontrollers/uv_debug/blinky_c_proj_win.png
deleted file mode 100644
index 4f12e62851..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/uv_debug/blinky_c_proj_win.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/uvprojx-conversion/convert_project.png b/content/learning-paths/embedded-and-microcontrollers/uvprojx-conversion/convert_project.png
deleted file mode 100644
index a4e0910f32..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/uvprojx-conversion/convert_project.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/yolo-on-himax/pose_estimation.jpg b/content/learning-paths/embedded-and-microcontrollers/yolo-on-himax/pose_estimation.jpg
deleted file mode 100644
index 09e24fcbcf..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/yolo-on-himax/pose_estimation.jpg and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/zephyr_vsworkbench/images/build_application.png b/content/learning-paths/embedded-and-microcontrollers/zephyr_vsworkbench/images/build_application.png
deleted file mode 100644
index 4d83aa2f3f..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/zephyr_vsworkbench/images/build_application.png and /dev/null differ
diff --git a/content/learning-paths/embedded-and-microcontrollers/zephyr_vsworkbench/images/install_debug_tools.png b/content/learning-paths/embedded-and-microcontrollers/zephyr_vsworkbench/images/install_debug_tools.png
deleted file mode 100644
index 63b23b78e4..0000000000
Binary files a/content/learning-paths/embedded-and-microcontrollers/zephyr_vsworkbench/images/install_debug_tools.png and /dev/null differ
diff --git a/content/learning-paths/iot/azure-iot/image.webp b/content/learning-paths/iot/azure-iot/image.webp
deleted file mode 100644
index 6bcb5b038c..0000000000
Binary files a/content/learning-paths/iot/azure-iot/image.webp and /dev/null differ
diff --git a/content/learning-paths/laptops-and-desktops/_index.md b/content/learning-paths/laptops-and-desktops/_index.md
index 6e75fe728b..8c972241f1 100644
--- a/content/learning-paths/laptops-and-desktops/_index.md
+++ b/content/learning-paths/laptops-and-desktops/_index.md
@@ -29,7 +29,7 @@ tools_software_languages_filter:
 - Arm64EC: 1
 - Assembly: 1
 - Bash: 3
-- C: 10
+- C: 11
 - C#: 6
 - CCA: 1
 - Clang: 13
@@ -37,7 +37,7 @@ tools_software_languages_filter:
 - CPP: 12
 - CSS: 1
 - Daytona: 1
-- Docker: 5
+- Docker: 6
 - FFmpeg: 1
 - GCC: 12
 - Git: 1
diff --git a/content/learning-paths/laptops-and-desktops/win_aws_iot/image.webp b/content/learning-paths/laptops-and-desktops/win_aws_iot/image.webp
deleted file mode 100644
index fe5742b2e3..0000000000
Binary files a/content/learning-paths/laptops-and-desktops/win_aws_iot/image.webp and /dev/null differ
diff --git a/content/learning-paths/laptops-and-desktops/windows_armpl/figures/vs_console_code.png b/content/learning-paths/laptops-and-desktops/windows_armpl/figures/vs_console_code.png
deleted file mode 100755
index c3075df38d..0000000000
Binary files a/content/learning-paths/laptops-and-desktops/windows_armpl/figures/vs_console_code.png and /dev/null differ
diff --git a/content/learning-paths/laptops-and-desktops/windows_armpl/vs_new_project1.png b/content/learning-paths/laptops-and-desktops/windows_armpl/vs_new_project1.png
deleted file mode 100755
index e6cb43321c..0000000000
Binary files a/content/learning-paths/laptops-and-desktops/windows_armpl/vs_new_project1.png and /dev/null differ
diff --git a/content/learning-paths/laptops-and-desktops/windows_cicd_github/images/ghrunner_1.png b/content/learning-paths/laptops-and-desktops/windows_cicd_github/images/ghrunner_1.png
deleted file mode 100644
index 8aa65170a0..0000000000
Binary files a/content/learning-paths/laptops-and-desktops/windows_cicd_github/images/ghrunner_1.png and /dev/null differ
diff --git a/content/learning-paths/laptops-and-desktops/windowsperf-vs-extension/sampling-settings.png b/content/learning-paths/laptops-and-desktops/windowsperf-vs-extension/sampling-settings.png
deleted file mode 100644
index 68bcc1cf5c..0000000000
Binary files a/content/learning-paths/laptops-and-desktops/windowsperf-vs-extension/sampling-settings.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/_index.md b/content/learning-paths/mobile-graphics-and-gaming/_index.md
index 9fec7623ba..c7f4d24740 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/_index.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/_index.md
@@ -10,23 +10,23 @@ key_ip:
 maintopic: true
 operatingsystems_filter:
 - Android: 35
-- Linux: 35
-- macOS: 15
-- Windows: 14
+- Linux: 36
+- macOS: 16
+- Windows: 15
 subjects_filter:
 - Gaming: 6
 - Graphics: 6
-- ML: 17
+- ML: 18
 - Performance and Architecture: 36
 subtitle: Optimize Android apps and build faster games using cutting-edge Arm tech
 title: Mobile, Graphics, and Gaming
 tools_software_languages_filter:
 - 7-Zip: 1
 - adb: 2
-- Android: 4
+- Android: 5
 - Android NDK: 2
 - Android SDK: 1
-- Android Studio: 12
+- Android Studio: 13
 - Arm Development Studio: 1
 - Arm Mobile Studio: 1
 - Arm Performance Studio: 3
@@ -50,10 +50,10 @@ tools_software_languages_filter:
 - Google Test: 1
 - Halide: 1
 - Hugging Face: 6
-- Java: 6
+- Java: 7
 - Jupyter Notebook: 1
 - KleidiAI: 2
-- Kotlin: 8
+- Kotlin: 9
 - LiteRT: 1
 - llama.cpp: 1
 - LLM: 1
@@ -62,11 +62,11 @@ tools_software_languages_filter:
 - MediaPipe: 2
 - MTE: 2
 - NEON: 1
-- ONNX Runtime: 1
-- ONNX runtime: 1
+- ONNX: 1
+- ONNX Runtime: 2
 - OpenGL ES: 1
-- Python: 8
-- PyTorch: 2
+- Python: 9
+- PyTorch: 3
 - QEMU: 1
 - RenderDoc: 1
 - RME: 1
@@ -74,6 +74,7 @@ tools_software_languages_filter:
 - Rust: 2
 - SDDiskTool: 1
 - SVE2: 1
+- TensorFlow: 1
 - Trusted Firmware: 1
 - Unity: 6
 - Unreal Engine: 4
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ams/fa.md b/content/learning-paths/mobile-graphics-and-gaming/ams/fa.md
index 77d77a8f6e..5d83889000 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/ams/fa.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/ams/fa.md
@@ -59,7 +59,7 @@ Explore each frame to evaluate how efficiently they were rendered on the device.
 
 1. Look at the Render Graph to see how the frame was constructed.
 
-    ![Render Graph](images/FA_render_graph_1.1.gif)
+    ![Render Graph](images/fa_render_graph_1.1.gif)
 
     Evaluate the render graph to look for render passes or input or output attachments that aren’t used in the final output, and could be removed, saving processing power and bandwidth.
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ams/images/FA_render_graph_1.1.gif b/content/learning-paths/mobile-graphics-and-gaming/ams/images/FA_render_graph_1.1.gif
deleted file mode 100644
index 9e74966d16..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/ams/images/FA_render_graph_1.1.gif and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ams/images/fa_render_graph.png b/content/learning-paths/mobile-graphics-and-gaming/ams/images/fa_render_graph.png
deleted file mode 100644
index 32fce118d1..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/ams/images/fa_render_graph.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ams/images/ga_device_manager.png b/content/learning-paths/mobile-graphics-and-gaming/ams/images/ga_device_manager.png
deleted file mode 100644
index 3e0616181e..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/ams/images/ga_device_manager.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_Sphinx.png b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_Sphinx.png
deleted file mode 100644
index de50e00fac..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_Sphinx.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_render_graph_1.1.gif b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_render_graph_1.1.gif
deleted file mode 100644
index 9e74966d16..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_render_graph_1.1.gif and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_step_drawcalls.gif b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_step_drawcalls.gif
deleted file mode 100644
index 4c8d438306..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/FA_step_drawcalls.gif and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/Pillars.gif b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/Pillars.gif
deleted file mode 100644
index 3090674db0..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/Pillars.gif and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/Render_graph_egypt_redundant_attachments.png b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/Render_graph_egypt_redundant_attachments.png
deleted file mode 100644
index e267572f04..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/Render_graph_egypt_redundant_attachments.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze.md b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze.md
index 100ca052ff..14b9196090 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze.md
@@ -15,7 +15,7 @@ When the analysis completes, you will see Frame Advisor's `Analysis` screen.
 
 1. You can see all the render passes that make up the frame. Expand a render pass to see the draw calls within it. Step through the draw calls to see how the scene is built.
 
-    ![Stepping through draw calls alt-text#center](FA_step_drawcalls.gif "Figure 1. Stepping through draw calls in Frame Advisor")
+    ![Stepping through draw calls alt-text#center](fa_step_drawcalls.gif "Figure 1. Stepping through draw calls in Frame Advisor")
 
     Draw calls are expensive for the CPU to process, so it is important to reduce the number of them where possible. Look for draw calls that don’t render visible changes to the framebuffer. If you don’t see any change, draws could be outside of the frustum or behind other objects. Use software culling techniques to eliminate them.
 
@@ -23,4 +23,4 @@ When the analysis completes, you will see Frame Advisor's `Analysis` screen.
     
 1. Look for instances where many identical objects are being drawn individually, like these pillars. There could be an opportunity to reduce the number of draw calls by batching multiple objects into a single combined mesh or by using an instanced draw call.
 
-    ![Framebuffers view alt-text#center](Pillars.gif "Figure 1. Framebuffers view in Frame Advisor")
+    ![Framebuffers view alt-text#center](pillars.gif "Figure 1. Framebuffers view in Frame Advisor")
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_geometry.md b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_geometry.md
index 87408a96c3..95217e96bf 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_geometry.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_geometry.md
@@ -13,7 +13,7 @@ Use the Content Metrics view in Frame Advisor to find geometry-related problems
     
 1. Right-click the draw call at the top of the list and choose `Navigate to call`. This complex object is now selected in the `Frame Hierarchy` view and you can see it in the `Framebuffers` view. This object is the Sphinx model and it is built using almost 23,000 primitives. This is a high number for a game object on mobile, so the first step is to see whether this model can be simplified. Fewer primitives reduces GPU processing cost and memory bandwidth.
 
-    ![The Sphinx model shown in the Framebuffers view alt-text#center](FA_Sphinx.png "Figure 2. The Sphinx model shown in the Framebuffers view")
+    ![The Sphinx model shown in the Framebuffers view alt-text#center](fa_sphinx.png "Figure 2. The Sphinx model shown in the Framebuffers view")
 
     In cases where the model cannot be simplified any further, there are other options to consider.
     
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_render_graph.md b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_render_graph.md
index afc773038e..dec17523f6 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_render_graph.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/analyze_render_graph.md
@@ -9,11 +9,11 @@ The render graph in Frame Advisor shows a visualization of the rendering operati
 
 Render passes flow from left to right. The render pass that outputs to the swapchain is the final render pass that outputs to the screen.
 
-![The Render Graph view in Frame Advisor alt-text#center](FA_render_graph_1.1.gif "Figure 1. The Render Graph view")
+![The Render Graph view in Frame Advisor alt-text#center](fa_render_graph_1.1.gif "Figure 1. The Render Graph view")
 
 1. Here, we can see some output attachments that are not used in a future render pass.
 
-    ![Redundant output attachments alt-text#center](Render_graph_egypt_redundant_attachments.png "Figure 3. Redundant output attachments")
+    ![Redundant output attachments alt-text#center](render_graph_egypt_redundant_attachments.png "Figure 3. Redundant output attachments")
 
     You should clear or invalidate input and output attachments that are not used to avoid unnecessary memory accesses. If clear or invalidate calls are present within a render pass, they are shown in the `Frame Hierarchy` view.  
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/fa_show_descriptions.png b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/fa_show_descriptions.png
deleted file mode 100644
index 695daffd15..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/fa_show_descriptions.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/render_graph_egypt.webp b/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/render_graph_egypt.webp
deleted file mode 100644
index c92c9aa72e..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/analyze_a_frame_with_frame_advisor/render_graph_egypt.webp and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/3-integrate-dawn.md b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/3-integrate-dawn.md
index 544058f2ac..dfd0a822ef 100755
--- a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/3-integrate-dawn.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/3-integrate-dawn.md
@@ -26,7 +26,7 @@ GameActivity is a Jetpack library designed to assist Android games in processing
 
 GameActivity is a direct descendant of NativeActivity and shares a similar architecture:
 
-![Game Activity Architecture #center](./images/GameActivityArchitecture.png "Figure 5: Game Activity Architecture")
+![Game Activity Architecture #center](./images/gameactivityarchitecture.png "Figure 5: Game Activity Architecture")
 
 With GameActivity, you can focus on game development and avoid spending excessive amounts of time dealing with the Java Native Interface (JNI) code.
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/7-profiling-app-using-streamline.md b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/7-profiling-app-using-streamline.md
index 1f5326e38d..87a2ae3b62 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/7-profiling-app-using-streamline.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/7-profiling-app-using-streamline.md
@@ -36,13 +36,13 @@ Wait until Streamline completes processing the data.
 
 Switch to **Mali Timeline** view as shown below:
 
-!["Mali Timeline Streamline" #center](images/Streamline-mali-timeline.png "Figure 13: Mali Timeline Streamline")
+!["Mali Timeline Streamline" #center](images/streamline-mali-timeline.png "Figure 13: Mali Timeline Streamline")
 
 You might have to zoom into the data up to the maximum (**500 us**), as you are rendering a simple 3D object. 
 
 You can analyze two consecutive frames as shown below:
 
-!["Two Consecutive Frames" #center](./images/Streamline-mali-analysis.png "Figure 14: Two Consecutive Frames")
+!["Two Consecutive Frames" #center](./images/streamline-mali-analysis.png "Figure 14: Two Consecutive Frames")
 
 Arm has worked with the Dawn team to optimize data uploading to GPU buffers for Mali GPUs. 
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/GameActivityArchitecture.png b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/GameActivityArchitecture.png
deleted file mode 100644
index 717875772e..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/GameActivityArchitecture.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/Streamline-mali-analysis.png b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/Streamline-mali-analysis.png
deleted file mode 100644
index a7ec578686..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/Streamline-mali-analysis.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/Streamline-mali-timeline.png b/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/Streamline-mali-timeline.png
deleted file mode 100644
index 799734c347..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/android_webgpu_dawn/images/Streamline-mali-timeline.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/add_asr_feature.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/add_asr_feature.png
deleted file mode 100644
index 979e73abf5..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/add_asr_feature.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_debugger.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_debugger.png
deleted file mode 100644
index 7ed7c26e2f..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_debugger.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_settings.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_settings.png
deleted file mode 100644
index 7f79d38e62..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_settings.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_view.webp b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_view.webp
deleted file mode 100644
index 8eeb583ea6..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/arm_asr_view.webp and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/camera_settings.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/camera_settings.png
deleted file mode 100644
index 7da6fb34ef..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/camera_settings.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/create_renderer.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/create_renderer.png
deleted file mode 100644
index e09399b81c..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/create_renderer.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/disable_opaque_downsampling.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/disable_opaque_downsampling.png
deleted file mode 100644
index 0a3b1187c9..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/disable_opaque_downsampling.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/project_settings.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/project_settings.png
deleted file mode 100644
index 968df94bed..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/project_settings.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/select_arm_asr.png b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/select_arm_asr.png
deleted file mode 100644
index 3ee3202255..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/select_arm_asr.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/third_person_pack_opening_screen.webp b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/third_person_pack_opening_screen.webp
deleted file mode 100644
index 67e500192d..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-arm-asr/images/third_person_pack_opening_screen.webp and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-unity-on-android/1-setup.md b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-unity-on-android/1-setup.md
index b8a99fbadc..95c1ddac24 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/get-started-with-unity-on-android/1-setup.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/get-started-with-unity-on-android/1-setup.md
@@ -77,7 +77,7 @@ A simple project is provided to accompany this Learning Path. To open it in Unit
 
 You will see your project listed in the _Projects_ tab in Unity Hub.
 
-- You can now click on the project to open it. The sample was created with Unity 2022.3.18f1; if you use a different version, you will get a warning. The project is very simple and should be safe to convert. However, if in doubt, install 2022.3.18f1 via the Unity Hub as already shown.
+- You can now click on the project to open it. The sample was created with Unity 2022.3.18f1; if you use a different version, you will get a warning. The project is simple and should be safe to open in your installed version. However, if in doubt, install 2022.3.18f1 via the Unity Hub as already shown.
 
 - The project will now open in Unity. Once loaded (the first time can take a while) find the scene folder, open the sample scene, and then click the _Play_ button to run the sample. This will run the project inside the editor. You will see a spinning cube.
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/mte_on_pixel8/pictures/01_mte_option_in_developer_options.png b/content/learning-paths/mobile-graphics-and-gaming/mte_on_pixel8/pictures/01_mte_option_in_developer_options.png
deleted file mode 100644
index 1ba96d1862..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/mte_on_pixel8/pictures/01_mte_option_in_developer_options.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/1-install-plugin.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/1-install-plugin.md
index d4ce0ad4ca..7a7e51a8c0 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/1-install-plugin.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/1-install-plugin.md
@@ -1,6 +1,6 @@
 ---
 title: Introduction to neural graphics and Neural Super Sampling (NSS)
-weight: 2
+weight: 3
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
@@ -27,20 +27,4 @@ Under the hood, Neural Super Sampling for Unreal Engine (NSS for UE) runs its ne
 
 With these resources, you can seamlessly integrate NSS into any Unreal Engine project. The setup is designed to work with Vulkan as your rendering backend, and you don’t need to overhaul your workflow - just plug it in and start leveraging ML-powered upscaling right away. The technology is available as a source-code implementation that you will build with Visual Studio.
 
-## Download required artifacts
-
-Before you begin, download the required plugins and dependencies. These two repositories contain everything you need to set up NSS for Unreal Engine, including the VGF model file, and the ML Emulations Layers for Vulkan.
-
-### 1. Download the NSS plugin
-
-[**Neural Super Sampling Unreal Engine Plugin** → GitHub Repository](https://github.com/arm/neural-graphics-for-unreal)
-
-Download the latest release package and extract it on your Windows machine. Use the folder corresponding to your Unreal version.
-
-
-### 2. Download the runtime for ML Extensions for Vulkan
-[**Unreal NNE Runtime RDG for ML Extensions for Vulkan** → GitHub Repository](https://github.com/arm/ml-extensions-for-vulkan-unreal-plugin).
-
-Download and extract the release package on your Windows machine.
-
-Once you’ve extracted both repositories, proceed to the next section to set up your development environment and enable the NSS plugin.
\ No newline at end of file
+Proceed to the next section to set up your development environment and enable the NSS plugin.
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md
index 4d9b825d88..655c2fab3c 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md
@@ -1,6 +1,6 @@
 ---
 title: Setting up the emulation layers
-weight: 3
+weight: 4
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
@@ -8,11 +8,11 @@ layout: learningpathall
 
 ## Install dependencies
 
-To run NSS in your Unreal Engine project, install and configure the following:
+To run NSS in your Unreal Engine project, you will need to install and configure the following:
 
 - **Vulkan SDK**: Required for development of applications that use Vulkan, and to enable the Vulkan Configurator. The latter sets up the emulation layers used for running ML extensions for Vulkan workloads.
-- **ML Emulation Layer for Vulkan**: These layers allows neural inference to run in emulation through Vulkan’s compute backend. They are activated by Vulkan Configurator to run with the Unreal Engine plugin. The layers are included in the `NNERuntimeRDGMLExtensionsForVulkan` zip you downloaded in a previous step. The Vulkan layer configuration activates the ML Emulation Layer for Vulkan, which implements the ML extensions for Vulkan.
-- **NSS for Unreal Engine plugins**: These include `NSS` (the inference and model interface) and `NNERuntimeRDGMLExtensionsForVulkan` (which connects Unreal’s Render Dependency Graph to the ML extensions for Vulkan).
+- **ML Emulation Layer for Vulkan**: These layers allows neural inference to run in emulation through Vulkan’s compute backend. They are activated by Vulkan Configurator to run with the Unreal Engine plugin. The Vulkan layer configuration activates the ML Emulation Layer for Vulkan, which implements the ML extensions for Vulkan.
+- **NSS for Unreal Engine plugin**: You will download and integrate the plugin in the next section.
 
 These components allow you to run NSS in Unreal Engine, using ML emulation layers for Vulkan for development and testing.
 
@@ -20,6 +20,15 @@ These components allow you to run NSS in Unreal Engine, using ML emulation layer
 
 Go to the [Vulkan SDK landing page](https://vulkan.lunarg.com/sdk/home) and download the SDK Installer for Windows. After you have run the installer, you can move on to the next step.
 
+## Download the emulation layers
+
+For this Learning Path, a pre-built of package of the emulation layers is available. Download them by clicking the link.
+
+[**ML Emulation Layer for Vulkan** → Arm Developer Downloads](https://www.arm.com/-/media/Files/developer/MLEmulationLayerForVulkan20251107)
+
+Extract the downloaded file in a location of your choice.
+
+
 ## Configure Vulkan Layers
 
 Vulkan Configurator is a program that will run the emulation layers in the background when you want to utilize them with Unreal Engine.
@@ -28,9 +37,9 @@ To emulate the ML extensions for Vulkan:
 1. Launch the **Vulkan Configurator** (bundled with the Vulkan SDK) from the Windows **Start** menu.
 2. In the **Apply a Vulkan Loader Configuration** list, right-click and choose **Create a new Configuration**. You can give the new configuration any name, for example `NSS`.
 3. Navigate to the **Vulkan Layers Location** tab.
-4. Append a user-defined path pointing to the emulation layers you downloaded in the previous section:
+4. Append a user-defined path pointing to the emulation layers you downloaded in the previous step:
    ```
-   <download-path>/NNERuntimeRDGMLExtensionsForVulkan/MLEmulationLayerForVulkan
+   <download-path>/MLEmulationLayerForVulkan20251107
    ```
 ![Add user-defined Vulkan layers path in Vulkan Configurator#center](./images/load_layers.png "Figure 1: Add Vulkan layer path.")
 
@@ -38,56 +47,10 @@ To emulate the ML extensions for Vulkan:
 
 ![Layer configuration showing Graph above Tensor#center](./images/verify_layers.png "Figure 2: Verify layer ordering and scope.")
 
-
-{{% notice %}}
-Keep the Vulkan Configurator running to enable the emulation layers during engine execution.
+{{% notice Before you move on %}}
+Make sure you keep Vulkan Configurator running in the background as you go through the next steps.
 {{% /notice %}}
 
-## Enable NSS for Unreal Engine
-
-1. Open Unreal Engine and create a new **Third Person** template project using the **C++** option.
-
-![Unreal Engine project selection screen showing C++ Third Person template#center](./images/unreal_startup.webp "Figure 3: Create a new C++ project in Unreal Engine.")
-
-2. Open the project in **Visual Studio**. Build it from source through **Build** > **Build Solution** or with `Ctrl+Shift+B`.
-
-After the build is finished, open your project in Unreal Engine.
-
-## Change Unreal’s Rendering Interface to Vulkan
-
-By default, Unreal uses DirectX. Instead, you need to choose Vulkan as the default RHI:
-1. Go to:
-   ```
-   Project Settings > Platform > Windows > Targeted RHIs > Default RHI
-   ```
-2. Select **Vulkan**.
-3. Restart Unreal Engine to apply the change.
-
-![Project Settings with Vulkan selected as Default RHI under Targeted RHIs#center](./images/targeted_rhis.png "Figure 4: Set Vulkan as the default RHI.")
-
-## Create the Plugins directory
-
-Open your project directory in Windows explorer, and create a new folder called `Plugins`.
-
-Enabling the plugin will look slightly different depending on what Unreal version you are using. Follow the steps corresponding to your setup.
-
-## For Unreal 5.4 and 4.27
-
-1. Copy the downloaded and extracted `.zip` archive into the new `Plugins` directory:
-   - `UE5.4` or `UE4.27`
-2. Re-open Unreal Engine. When prompted, confirm plugin integration.
-3. Rebuild your project in Visual Studio from source.
-4. Verify the installation by opening the Plugins view in Unreal Engine, and making sure the checkbox is selected for `NSS`. Restart Unreal Engine if prompted.
-
-## For Unreal 5.5
-
-1. Copy the downloaded and extracted `.zip` archives into the new `Plugins` directory:
-   - `UE5.5`
-   - `NNERuntimeRDGMLExtensionsForVulkan`
-2. Re-open Unreal Engine. When prompted, confirm plugin integration.
-3. Rebuild your project in Visual Studio from source.
-4. Verify the installation by opening the Plugins view in Unreal Engine, and making sure the checkbox is selected for both `NSS` and `NNERuntimeRDGMLExtensionsForVulkan` as shown. Restart Unreal Engine if prompted.
-
-![Unreal Engine plugins window showing NSS and NNERuntimeRDGMLExtensionsForVulkan enabled#center](./images/verify_plugin_enabled.png "Figure 5: Verify plugin installation in Unreal Engine.")
+With the ML emulation layers configured, Vulkan is now able to run machine learning workloads through the ML extensions for Vulkan. This enables neural inference to execute alongside the graphics pipeline during development, without requiring access to hardware with dedicated neural accelerators.
 
-With the emulation layers and plugins configured, you're ready to run Neural Super Sampling in Unreal Engine. Continue to the next section to test the integration.
+The next step is to integrate Neural Super Sampling into an Unreal Engine project. You’ll do this by installing the NSS plugin and creating a simple example game that lets you verify the setup and visualize the upscaling in action.
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-create-example.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-create-example.md
new file mode 100644
index 0000000000..d4089fdb75
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-create-example.md
@@ -0,0 +1,60 @@
+---
+title: Create an example game
+weight: 5
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+## Download the NSS plugin
+
+This repository's release package contains everything you need to set up NSS for Unreal Engine, including the plugin and the VGF model file.
+
+[**Neural Super Sampling Unreal Engine Plugin** → GitHub Repository](https://github.com/arm/neural-graphics-for-unreal)
+
+Download the latest release `.zip` and extract it on your Windows machine. 
+
+{{% notice Unreal Engine 5.5 plugin %}}
+For this UE version, the steps will be slightly different. Refer to the repository documentation for more information.
+{{% /notice %}}
+
+
+## Enable NSS for Unreal Engine
+
+1. Open Unreal Engine and create a new **Third Person** template project using the **C++** option.
+
+![Unreal Engine project selection screen showing C++ Third Person template#center](./images/unreal_startup.webp "Figure 3: Create a new C++ project in Unreal Engine.")
+
+2. Open the project in **Visual Studio**. Build it from source through **Build** > **Build Solution** or with `Ctrl+Shift+B`.
+
+After the build is finished, open your project in Unreal Engine.
+
+## Change Unreal’s Rendering Interface to Vulkan
+
+By default, Unreal uses DirectX. Instead, you need to choose Vulkan as the default RHI:
+1. Go to:
+   ```
+   Project Settings > Platform > Windows > Targeted RHIs > Default RHI
+   ```
+2. Select **Vulkan**.
+3. Restart Unreal Engine to apply the change.
+
+![Project Settings with Vulkan selected as Default RHI under Targeted RHIs#center](./images/targeted_rhis.png "Figure 4: Set Vulkan as the default RHI.")
+
+## Create the Plugins directory
+
+Open your project directory in Windows explorer, and create a new folder called `Plugins`.
+
+![Windows File Explorer showing project directory with newly created Plugins folder alongside other project directories#center](./images/plugins_dir.png "Figure 5: The new Plugins directory")
+
+## Enable the plugin
+
+The plugin is included in the release package you downloaded in the previous section. The package contains a separate folder for each supported Unreal Engine version. Make sure you use the folder that matches your engine version (for example, UE5.5 for Unreal Engine 5.5).
+
+1.	Copy the appropriate engine-version folder from the extracted .zip archive into your project's Plugins directory.
+2.	Reopen Unreal Engine. When prompted, confirm that you want to enable the plugin.
+3.	Rebuild your project from source in Visual Studio.
+4.	Verify the installation by opening Edit → Plugins in Unreal Engine and confirming that the NSS plugin is enabled. Restart Unreal Engine if prompted.
+![Unreal Engine plugins window showing NSS enabled#center](./images/verify_plugin_enabled.png "Figure 5: Verify plugin installation in Unreal Engine.")
+
+With the emulation layers and plugins configured, you're ready to run Neural Super Sampling in Unreal Engine. Continue to the next section to test the integration.
+
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-run-example.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-run-example.md
index b1c3304a64..6c8ea8333f 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-run-example.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/3-run-example.md
@@ -1,6 +1,6 @@
 ---
 title: Run the example
-weight: 4
+weight: 6
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
@@ -20,13 +20,43 @@ In **Project Settings > Plugins > Neural Super Sampling**, you can view and conf
 
 Run `ShowFlag.VisualizeTemporalUpscaler 0` to disable the overview. To visualize the NSS model output in real-time, run the following command:
    ```
-   r.NSS.Debug 2
+   r.NSS.Debug 1
    ```
 
 This will add real-time views showing the model’s processed outputs, such as predicted filter coefficients and feedback, as below. In the [Wrapping up section](/learning-paths/mobile-graphics-and-gaming/nss-unreal/6-wrapping-up), you will find links to learn more about what the debug outputs mean.
 
 ![Debug view of Neural Super Sampling model output in Unreal Engine#center](./images/nss_debug.png "Figure 6: Visualize NSS model debug output in real time.")
 
+## Troubleshooting tips
+
+If the example does not behave as expected, check the following common issues before continuing.
+
+### Check for build issues in Visual Studio
+- Build failures related to `AutomationTool`, `Gauntlet`, or other `*.Automation` projects can be ignored.
+- Focus on whether the project itself, named as `<Your Project Name>Editor`, builds successfully.
+
+### Check you Unreal Engine configuration
+- Verify that Vulkan is selected as the **Default RHI**.
+- Confirm the NSS plugin is enabled and that Unreal Engine was restarted after enabling it.
+- Check **Project Settings → Plugins → Neural Super Sampling** to confirm a model is selected and active.
+
+If the NSS plugin is enabled but appears to have no effect:
+- Ensure Vulkan Configurator is running.
+- Verify that the correct layer configuration is selected and active.
+- Double-check that:
+  - The emulation layer path is correct
+  - The Graph layer is ordered above the Tensor layer
+
+Refer back to the [emulation layer section](/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer/) for the full Vulkan Configurator setup and validation steps.
+
+### Check the software and hardware setup
+- Confirm that the plugin version exactly matches your Unreal Engine version.
+- Verify that your GPU driver supports Vulkan.
+- Verify that your Visual Studio version aligns with the Unreal Engine version you are using.
+- Return to the Visual Studio build output and inspect the logs carefully to identify the first reported error
+
+Build or startup failures are often caused by version mismatches or missing dependencies.
+
 ## NSS model on Hugging Face
 
 The model that powers NSS is published on Hugging Face in the [VGF format](https://github.com/arm/ai-ml-sdk-vgf-library). This format is optimized for inference via ML extensions for Vulkan.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/5-renderdoc.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/5-renderdoc.md
index 0a099cc209..219fb6f59a 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/5-renderdoc.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/5-renderdoc.md
@@ -1,6 +1,6 @@
 ---
 title:  Using RenderDoc for Debugging and Analysis
-weight: 6
+weight: 7
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/6-wrapping-up.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/6-wrapping-up.md
index e75384be86..d635bba4d5 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/6-wrapping-up.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/6-wrapping-up.md
@@ -1,6 +1,6 @@
 ---
 title:  Wrapping up
-weight: 7
+weight: 8
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/_index.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/_index.md
index 852e91d22c..9ded8ab2a2 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/_index.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/_index.md
@@ -15,8 +15,8 @@ learning_objectives:
 
 prerequisites:
     - Windows 11
-    - Unreal Engine 4.27, 5.4 or 5.5 (with the Templates and Feature Pack enabled)
-    - Visual Studio 2022 (with Desktop Development with C++ and .NET desktop build tools)
+    - Unreal Engine 4.27 or 5.4-5.6 (with the Templates and Feature Pack enabled)
+    - Visual Studio (with Desktop Development with C++ and .NET desktop build tools)
 
 
 author: Annie Tallund
@@ -26,6 +26,7 @@ skilllevels: Introductory
 subjects: ML
 armips:
     - Mali
+    - Immortalis
 tools_software_languages:
     - Unreal Engine
     - Vulkan SDK
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/add_plugin_folder.png b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/add_plugin_folder.png
deleted file mode 100644
index 3bacc9afc5..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/add_plugin_folder.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/confirm_layers.png b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/confirm_layers.png
deleted file mode 100644
index 86ed7de624..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/confirm_layers.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/plugins_dir.png b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/plugins_dir.png
new file mode 100644
index 0000000000..d8799bf429
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/plugins_dir.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/verify_plugin_enabled.png b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/verify_plugin_enabled.png
index 96d8e1e267..0c1aea78a6 100644
Binary files a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/verify_plugin_enabled.png and b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/images/verify_plugin_enabled.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/01_fundamentals.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/01_fundamentals.md
new file mode 100644
index 0000000000..0dcca61c93
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/01_fundamentals.md
@@ -0,0 +1,103 @@
+---
+# User change
+title: "ONNX Fundamentals"
+
+weight: 2
+
+layout: "learningpathall"
+---
+The goal of this tutorial is to provide developers with a practical, end-to-end pathway for working with Open Neural Network Exchange (ONNX) in real-world scenarios. Starting from the fundamentals, we will build a simple neural network model in Python, export it to the ONNX format, and demonstrate how it can be used for both inference and training on Arm64 platforms. Along the way, we will cover model optimization techniques such as layer fusion, and conclude by deploying the optimized model into a fully functional Android application. By following this series, you will gain not only a solid understanding of ONNX’s philosophy and ecosystem but also the hands-on skills required to integrate ONNX into your own projects from prototyping to deployment.
+
+In this first step, we will introduce the ONNX standard and explain why it has become a cornerstone of modern machine learning workflows. You will learn what ONNX is, how it represents models in a framework-agnostic format, and why this matters for developers targeting different platforms such as desktops, Arm64 devices, or mobile environments. We will also discuss the role of ONNX Runtime as the high-performance engine that brings these models to life, enabling efficient inference and even training across CPUs, GPUs, and specialized accelerators. Finally, we will outline the typical ONNX workflow, from training in frameworks like PyTorch or TensorFlow, through export and optimization, to deployment on edge and Android devices, which we will gradually demonstrate throughout the tutorial.
+
+## What is ONNX
+The ONNX is an open standard for representing machine learning models in a framework-independent format. Instead of being tied to the internal model representation of a specific framework—such as PyTorch, TensorFlow, or scikit-learn—ONNX provides a universal way to describe models using a common set of operators, data types, and computational graphs.
+
+At its core, an ONNX model is a directed acyclic graph (DAG) where nodes represent mathematical operations (e.g., convolution, matrix multiplication, activation functions) and edges represent tensors flowing between these operations. This standardized representation allows models trained in one framework to be exported once and executed anywhere, without requiring the original framework at runtime.
+
+ONNX was originally developed by Microsoft and Facebook to address a growing need in the machine learning community: the ability to move models seamlessly between training environments and deployment targets. Today, it is supported by a wide ecosystem of contributors and hardware vendors, making it the de facto choice for interoperability and cross-platform deployment.
+
+For developers, this means flexibility. You can train your model in PyTorch, export it to ONNX, run it with ONNX Runtime on an Arm64 device such as a Raspberry Pi, and later deploy it inside an Android application without rewriting the model. This portability is the main reason ONNX has become a central building block in modern AI workflows.
+
+A useful way to think of ONNX is to compare it to a PDF for machine learning models. Just as a PDF file ensures that a document looks the same regardless of whether you open it in Adobe Reader, Preview on macOS, or a web browser, ONNX ensures that a machine learning model behaves consistently whether you run it on a server GPU, a Raspberry Pi, or an Android phone. It is this “write once, run anywhere” principle that makes ONNX especially powerful for developers working across diverse hardware platforms.
+
+At the same time, ONNX is not a closed box. Developers can extend the format with custom operators or layers when standard ones are not sufficient. This flexibility makes it possible to inject novel research ideas, proprietary operations, or hardware-accelerated kernels into an ONNX model while still benefiting from the portability of the core standard. In other words, ONNX gives you both consistency across platforms and extensibility for innovation.
+
+## Why ONNX Matters
+Machine learning today is not limited to one framework or one device. A model might be trained in PyTorch on a GPU workstation, tested in TensorFlow on a cloud server, and then finally deployed on an Arm64-based edge device or Android phone. Without a common standard, moving models between these environments would be complex, error-prone, and often impossible. ONNX solves this problem by acting as a universal exchange format, ensuring that models can flow smoothly across the entire development and deployment pipeline.
+
+The main reasons ONNX matters are:
+1. Interoperability – ONNX eliminates framework lock-in. You can train in PyTorch, validate in TensorFlow, and deploy with ONNX Runtime on almost any device, from servers to IoT boards.
+2. Performance – ONNX Runtime includes highly optimized execution backends, supporting hardware acceleration through Arm NEON, CUDA, DirectML, and Android NNAPI. This means the same model can run efficiently across a wide spectrum of hardware.
+3. Portability – Once exported to ONNX, the model can be deployed to Arm64 devices (like Raspberry Pi or AWS Graviton servers) or even embedded in an Android app, without rewriting the code.
+4. Ecosystem – The ONNX Model Zoo provides ready-to-use, pre-trained models for vision, NLP, and speech tasks, making it easy to start from state-of-the-art baselines.
+5. Extensibility – Developers can inject their own layers or custom operators when the built-in operator set is not sufficient, enabling innovation while preserving compatibility.
+
+In short, ONNX matters because it turns the fragmented ML ecosystem into a cohesive workflow, empowering developers to focus on building applications rather than wrestling with conversion scripts or hardware-specific code.
+
+## ONNX Model Structure
+An ONNX model is more than just a collection of weights—it is a complete description of the computation graph that defines how data flows through the network. Understanding this structure is key to seeing why ONNX is both portable and extensible.
+
+At a high level, an ONNX model consists of three main parts:
+1. Graph, which is the heart of the model, represented as a directed acyclic graph (DAG). In this graph nodes correspond to operations (e.g., Conv, Relu, MatMul), edges represent tensors flowing between nodes, carrying input and output data.
+2. Opset (Operator Set), which is a versioned collection of supported operations. Opsets guarantee that models exported with one framework will behave consistently when loaded by another, as long as the same opset version is supported.
+3. Metadata, which contains information about inputs, outputs, tensor shapes, and data types. Metadata can also include custom annotations such as the model author, domain, or framework version.
+
+This design allows ONNX to describe anything from a simple logistic regression to a deep convolutional neural network. For example, a single ONNX graph might define:
+* An input tensor representing a camera image.
+* A sequence of convolution and pooling layers.
+* Fully connected layers leading to classification probabilities.
+* An output tensor with predicted labels.
+
+Because the ONNX format is based on a standardized graph representation, it is both human-readable (with tools like Netron for visualization) and machine-executable (parsed directly by ONNX Runtime or other backends).
+
+Importantly, ONNX models are not static. Developers can insert, remove, or replace nodes in the graph, making it possible to add new layers, prune unnecessary ones, or fuse operations for optimization. This graph-level flexibility is what enables many of the performance improvements we’ll explore later in this tutorial, such as layer fusion and quantization.
+
+## ONNX Runtime
+While ONNX provides a standard way to represent models, it still needs a high-performance engine to actually execute them. This is where ONNX Runtime (ORT) comes in. ONNX Runtime is the official, open-source inference engine for ONNX models, designed to run them quickly and efficiently across a wide variety of hardware.
+
+At its core, ONNX Runtime is optimized for speed, portability, and extensibility:
+1. Cross-platform support. ORT runs on Windows, Linux, and macOS, as well as mobile platforms like Android and iOS. It supports both x86 and Arm64 architectures, making it suitable for deployment from cloud servers to edge devices such as Raspberry Pi boards and smartphones.
+
+2. Hardware acceleration. ORT integrates with a wide range of execution providers (EPs) that tap into hardware capabilities:
+* Arm Kleidi kernels accelerated with Arm NEON, SVE2, and SME2 instructions for efficient CPU execution on Arm64.
+* CUDA for NVIDIA GPUs.
+* DirectML for Windows.
+* NNAPI on Android, enabling direct access to mobile accelerators (DSPs, NPUs).
+
+3. Inference and training. ONNX Runtime also supports training and fine-tuning, making it possible to use the same runtime across the entire ML lifecycle.
+
+4. Optimization built in. ORT can automatically apply graph optimizations such as constant folding, operator fusion, or memory layout changes to squeeze more performance out of your model.
+
+For developers, this means you can take a model trained in PyTorch, export it to ONNX, and then run it with ONNX Runtime on virtually any device—without worrying about the underlying hardware differences. The runtime abstracts away the complexity, choosing the best available execution provider for your environment.
+
+This flexibility makes ONNX Runtime a powerful bridge between training frameworks and deployment targets, and it is the key technology that allows ONNX models to run effectively on Arm64 platforms and Android devices.
+
+## How ONNX Fits into the Workflow
+
+One of the biggest advantages of ONNX is how naturally it integrates into a developer’s machine learning workflow. Instead of locking you into a single framework from training to deployment, ONNX provides a bridge that connects different stages of the ML lifecycle.
+
+A typical ONNX workflow looks like this:
+1. Train the model. You first use your preferred framework (e.g., PyTorch, TensorFlow, or scikit-learn) to design and train a model. At this stage, you benefit from the flexibility and ecosystem of the framework of your choice.
+2. Export to ONNX. Once trained, the model is exported into the ONNX format using built-in converters (such as torch.onnx.export for PyTorch). This produces a portable .onnx file describing the network architecture, weights, and metadata.
+3. Run inference with ONNX Runtime. The ONNX model can now be executed on different devices using ONNX Runtime. On Arm64 hardware, ONNX Runtime can take advantage of Arm Kleidi kernels accelerated with NEON, SVE2, and SME2 instructions, while on Android devices it can leverage NNAPI to access mobile accelerators (where available).
+4. Optimize the model. Apply graph optimizations like layer fusion, constant folding, or quantization to improve performance and reduce memory usage, making the model more suitable for edge and mobile deployments.
+5. Deploy. Finally, the optimized ONNX model is packaged into its target environment. This could be an Arm64-based embedded system (e.g., Raspberry Pi), a server powered by Arm CPUs (e.g., AWS Graviton), or an Android application distributed via the Play Store.
+
+This modularity means developers are free to mix and match the best tools for each stage: train in PyTorch, optimize with ONNX Runtime, and deploy to Android—all without rewriting the model. By decoupling training from inference, ONNX enables efficient workflows that span from research experiments to production-grade applications.
+
+## Example Use Cases
+ONNX is already widely adopted in real-world applications where portability and performance are critical. A few common examples include:
+1. Computer Vision at the Edge – Running an object detection model (e.g., YOLOv5 exported to ONNX) on a Raspberry Pi 4 or NVIDIA Jetson, enabling low-cost cameras to detect people, vehicles, or defects in real time.
+2. Mobile Applications – Deploying face recognition or image classification models inside an Android app using ONNX Runtime Mobile, with NNAPI acceleration for efficient on-device inference.
+3. Natural Language Processing (NLP) – Running BERT-based models on Arm64 cloud servers (like AWS Graviton) to provide fast, low-cost inference for chatbots and translation services.
+4. Healthcare Devices – Using ONNX to integrate ML models into portable diagnostic tools or wearable sensors, where Arm64 processors dominate due to their low power consumption.
+5. Cross-platform Research to Production – Training experimental architectures in PyTorch, exporting them to ONNX, and validating them across different backends to ensure consistent performance.
+6. AI Accelerator Integration – ONNX is especially useful for hardware vendors building custom AI accelerators. Since accelerators often cannot support the full range of ML operators, ONNX’s extensible operator model allows manufacturers to plug in custom kernels where hardware acceleration is available, while gracefully falling back to the standard runtime for unsupported ops. This makes it easier to adopt new hardware without rewriting entire models.
+
+## Summary
+In this section, we introduced ONNX as an open standard for representing machine learning models across frameworks and platforms. We explored its model structure—graphs, opsets, and metadata—and explained the role of ONNX Runtime as the high-performance execution engine. We also showed how ONNX fits naturally into the ML workflow: from training in PyTorch or TensorFlow, to exporting and optimizing the model, and finally deploying it on Arm64 or Android devices.
+
+A useful way to think of ONNX is as the PDF of machine learning models—a universal, consistent format that looks the same no matter where you open it, but with the added flexibility to inject your own layers and optimizations.
+
+Beyond portability for developers, ONNX is also valuable for hardware and AI-accelerator builders. Because accelerators often cannot support every possible ML operator, ONNX’s extensible operator model allows manufacturers to seamlessly integrate custom kernels where acceleration is available, while relying on the runtime for unsupported operations. This combination of consistency, flexibility, and extensibility makes ONNX a cornerstone technology for both AI application developers and hardware vendors.
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/02_setup.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/02_setup.md
new file mode 100644
index 0000000000..fe9179c87c
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/02_setup.md
@@ -0,0 +1,130 @@
+---
+# User change
+title: "Environment Setup"
+
+weight: 3
+
+layout: "learningpathall"
+---
+
+## Objective
+This step gets you ready to build, export, run, and optimize ONNX models on Arm64. You’ll set up Python, install ONNX & ONNX Runtime, confirm hardware-backed execution providers.
+
+## Choosing the hardware
+You can choose a variety of hardware, including:
+* Edge boards (Linux/Arm64) - Raspberry Pi 4/5 (64-bit OS), Jetson (Arm64 CPU; GPU via CUDA if using NVIDIA stack), Arm servers (e.g., AWS Graviton).
+* Apple Silicon (macOS/Arm64) - Great for development, deploy to Arm64 Linux later.
+* Windows on Arm - Dev/test on WoA, deploy to Linux Arm64 for production if desired.
+
+The nice thing about ONNX is that the **same model file** can run across all of these, so your setup is flexible.
+
+## Install Python
+Depending on the hardware you use you follow different installation paths
+
+1. Linux (Arm64). In the console type:
+```console
+sudo apt update
+sudo apt install -y python3 python3-venv python3-pip build-essential libopenblas-dev
+```
+
+2. macOS (Apple Sillicon):
+```console
+brew install python
+```
+
+3. Windows on Arm:
+* Install Python 3.10+ from python.org (Arm64 build).
+* Ensure pip is on PATH.
+
+After installing Python, open a terminal or console, create a clean virtual environment, and update pip and wheel:
+
+```console
+python3 -m venv .venv
+source .venv/bin/activate                   # on Windows use: .venv\Scripts\activate
+python -m pip install --upgrade pip wheel
+```
+
+Using a virtual environment keeps dependencies isolated and avoids conflicts with system-wide Python packages.
+
+## Install Core Packages
+Start by installing the minimal stack:
+```console
+pip install onnx onnxruntime onnxscript netron numpy
+```
+The above will install the following:
+* onnx – core library for loading/saving ONNX models.
+* onnxruntime – high-performance runtime to execute models.
+* onnxscript – required for the new Dynamo-based exporter.
+* netron – tool for visualizing ONNX models.
+* numpy – used for tensor manipulation.
+
+Now, install PyTorch (we’ll use it later to build and export a sample model):
+
+```console
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+```
+
+## Verify the installation
+Let’s verify everything works end-to-end by training a toy network and exporting it to ONNX.
+
+Create a new file 01_Init.py and add the following code
+
+```python
+import torch, torch.nn as nn
+import onnx, onnxruntime as ort
+import numpy as np
+
+class SmallNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.seq = nn.Sequential(
+            nn.Conv2d(1, 8, 3, padding=1),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((1,1)),
+            nn.Flatten(),
+            nn.Linear(8, 10)
+        )
+    def forward(self, x): return self.seq(x)
+
+m = SmallNet().eval()
+dummy = torch.randn(1, 1, 28, 28)
+
+torch.onnx.export(
+    m, dummy, "smallnet.onnx",
+    input_names=["input"], output_names=["logits"],
+    opset_version=19,
+    do_constant_folding=True,
+    keep_initializers_as_inputs=False,
+    dynamo=True       
+)
+
+# Quick sanity run
+sess = ort.InferenceSession("smallnet.onnx", providers=["CPUExecutionProvider"])
+out = sess.run(["logits"], {"input": dummy.numpy()})[0]
+print("Output shape:", out.shape, "Providers:", sess.get_providers())
+```
+
+Then, run it as follows
+
+```console
+python3 01_Init.py
+```
+
+You should see the following output:
+```output
+python3 01_Init.py
+[torch.onnx] Obtain model graph for `SmallNet([...]` with `torch.export.export(..., strict=False)`...
+[torch.onnx] Obtain model graph for `SmallNet([...]` with `torch.export.export(..., strict=False)`... ✅
+[torch.onnx] Run decomposition...
+[torch.onnx] Run decomposition... ✅
+[torch.onnx] Translate the graph into ONNX...
+[torch.onnx] Translate the graph into ONNX... ✅
+Output shape: (1, 10) Providers: ['CPUExecutionProvider']
+```
+
+The 01_Init.py script serves as a quick end-to-end validation of your ONNX environment. It defines a very small convolutional neural network (SmallNet) in PyTorch, which consists of a convolution layer, activation function, pooling, flattening, and a final linear layer that outputs 10 logits. Instead of training the model, we simply run it in evaluation mode on a random input tensor to make sure the graph structure works. This model is then exported to the ONNX format using PyTorch’s new Dynamo-based exporter, producing a portable smallnet.onnx file.
+
+After export, the script immediately loads the ONNX model with ONNX Runtime and executes a forward pass using the CPU execution provider. This verifies that the installation of ONNX, ONNX Runtime, and PyTorch is correct and that models can flow seamlessly from definition to inference. By printing the output tensor’s shape and the active execution provider, the script demonstrates that the toolchain is fully functional on your Arm64 device, giving you a solid baseline before moving on to more advanced models and optimizations.
+
+## Summary
+You now have a fully functional ONNX development environment on Arm64. Python and all required packages are installed, and you successfully exported a small PyTorch model to ONNX using the new Dynamo exporter, ensuring forward compatibility. Running the model with ONNX Runtime confirmed that inference works end-to-end with the CPU execution provider, proving that your toolchain is correctly configured. With this foundation in place, the next step is to build and export a more complete model and run it on Arm64 hardware to establish baseline performance before applying optimizations.
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/03_preparingdata.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/03_preparingdata.md
new file mode 100644
index 0000000000..b5bea9f3b2
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/03_preparingdata.md
@@ -0,0 +1,261 @@
+---
+# User change
+title: "Preparing a Synthetic Sudoku Digit Dataset"
+
+weight: 4
+
+layout: "learningpathall"
+---
+
+## Big picture
+Our end goal is a camera-to-solution Sudoku app that runs efficiently on Arm64 devices (e.g., Raspberry Pi or Android phones). ONNX is the glue: we’ll train the digit recognizer in PyTorch, export it to ONNX, and run it anywhere with ONNX Runtime (CPU EP on edge devices, NNAPI EP on Android). Everything around the model—grid detection, perspective rectification, and solving—stays deterministic and lightweight.
+
+## Objective
+In this step, we will generate a custom dataset of Sudoku puzzles and their digit crops, which we’ll use to train a digit recognition model. Starting from a Hugging Face parquet dataset that provides paired puzzle/solution strings, we transform raw boards into realistic, book-style Sudoku pages, apply camera-like augmentations to mimic mobile captures, and automatically slice each page into 81 labeled cell images. This yields a large, diverse, perfectly labeled set of digits (0–9 with 0 = blank) without manual annotation. By the end, you’ll have a structured dataset ready to train a lightweight model in the next section.
+
+## Why Synthetic Generation?
+When building a Sudoku digit recognizer, the hardest part is obtaining a well-labeled dataset that matches real capture conditions. MNIST contains handwritten digits, which differ from printed, grid-aligned Sudoku digits; relying on it alone hurts real-world performance.
+
+By generating synthetic Sudoku pages directly from the parquet dataset, we get:
+1. Perfect labeling. Since the puzzle content is known, every cropped cell automatically comes with the correct label (digit or blank), eliminating manual annotation.
+2. Control over style. We can render Sudoku pages to look like those in printed books, with realistic fonts, grid lines, and difficulty levels controlled by how many cells are left blank.
+3. Robustness through augmentation: By applying perspective warps, blur, noise, and lighting variations, we simulate how a smartphone camera might capture a Sudoku page, improving the model’s ability to handle real-world photos.
+4. Scalability. With millions of Sudoku solutions available, we can easily generate tens of thousands of training samples in minutes, ensuring a dataset that is both large and diverse.
+
+This synthetic data generation strategy allows us to create a custom-fit dataset for our Sudoku digit recognition problem, bridging the gap between clean digital puzzles and noisy real-world inputs.
+
+## What we’ll produce
+By the end of this step, you will have two complementary datasets:
+1. Digit crops for training the classifier. A folder tree structured for torchvision.datasets.ImageFolder, containing tens of thousands of labeled 28×28 images of Sudoku digits (0–9, with 0 meaning blank):
+
+```console
+data/
+  train/
+    0/....png   (blank)
+    1/....png
+    ...
+    9/....png
+  val/
+    0/....png
+    ...
+    9/....png
+```
+
+These will be used in next step to train a lightweight model for digit recognition.
+
+2. Rendered Sudoku grids for camera simulation. Full-page Sudoku images (both clean book-style and augmented camera-like versions) stored in:
+```console
+data/
+  grids/
+    train/
+      000001_clean.png
+      000001_cam.png
+      ...
+    val/
+      ...
+```
+
+These grid images allow us to later test the end-to-end pipeline: detect the board with OpenCV, rectify perspective, classify each cell using the ONNX digit recognizer, and then solve the Sudoku puzzle.
+
+Together, these datasets provide both the micro-level data needed to train the digit recognizer and the macro-level data to simulate the camera pipeline for testing and deployment.
+
+## Implementation
+Start by creating a new file 02_PrepareData.py and modify it as follows:
+```python
+import os, random, pathlib
+import numpy as np
+import cv2 as cv
+import pandas as pd
+from tqdm import tqdm
+
+random.seed(0)
+
+# Parameters
+PARQUET_PATH = "train_1.parquet"   # path to your downloaded HF Parquet
+OUT_DIR      = pathlib.Path("data")
+N_TRAIN      = 1000               # how many puzzles to render for training
+N_VAL        = 100                # how many for validation
+IMG_H, IMG_W = 1200, 800          # page size (portrait-ish)
+GRID_MARGIN  = 60                 # outer margin, px
+CELL_SIZE    = 28                 # output crop size for classifier (MNIST-like)
+FONT         = cv.FONT_HERSHEY_SIMPLEX
+# 
+
+def str_to_grid(s: str):
+    """81-char '012345678' string -> 9x9 list of ints."""
+    s = s.strip()
+    assert len(s) == 81, f"bad length: {len(s)}"
+    return [[int(s[9*r+c]) for c in range(9)] for r in range(9)]
+
+def load_puzzles(parquet_path, n_train, n_val):
+    """Load puzzles/solutions; return two lists of 9x9 int grids for train/val."""
+    df = pd.read_parquet(parquet_path, engine="pyarrow")
+    # Shuffle reproducibly
+    df = df.sample(frac=1.0, random_state=0).reset_index(drop=True)
+    # Keep only needed columns if present
+    need_cols = [c for c in ["puzzle", "solution"] if c in df.columns]
+    if not need_cols or "puzzle" not in need_cols:
+        raise ValueError(f"Expected 'puzzle' (and optionally 'solution') columns; got: {list(df.columns)}")
+
+    # Slice train/val partitions
+    df_train = df.iloc[:n_train]
+    df_val   = df.iloc[n_train:n_train+n_val]
+
+    puzzles_train = [str_to_grid(p) for p in df_train["puzzle"].astype(str)]
+    puzzles_val   = [str_to_grid(p) for p in df_val["puzzle"].astype(str)]
+
+    # Solutions are optional (useful later for solver validation)
+    solutions_train = [str_to_grid(s) for s in df_train["solution"].astype(str)] if "solution" in df_train else None
+    solutions_val   = [str_to_grid(s) for s in df_val["solution"].astype(str)]   if "solution" in df_val   else None
+
+    return (puzzles_train, solutions_train), (puzzles_val, solutions_val)
+
+def draw_grid(img, size=9, margin=GRID_MARGIN):
+    H, W = img.shape[:2]
+    step = (min(H, W) - 2*margin) // size
+    x0 = (W - size*step) // 2
+    y0 = (H - size*step) // 2
+    for i in range(size+1):
+        thickness = 3 if i % 3 == 0 else 1
+        # vertical
+        cv.line(img, (x0 + i*step, y0), (x0 + i*step, y0 + size*step), (0, 0, 0), thickness)
+        # horizontal
+        cv.line(img, (x0, y0 + i*step), (x0 + size*step, y0 + i*step), (0, 0, 0), thickness)
+    return (x0, y0, step)
+
+def put_digit(img, r, c, d, x0, y0, step):
+    if d == 0:
+        return  # blank cell
+    text = str(d)
+    scale = step / 60.0
+    thickness = 2
+    (tw, th), base = cv.getTextSize(text, FONT, scale, thickness)
+    cx = x0 + c*step + (step - tw)//2
+    cy = y0 + r*step + (step + th)//2 - th//4
+    cv.putText(img, text, (cx, cy), FONT, scale, (0, 0, 0), thickness, cv.LINE_AA)
+
+def render_page(puzzle9x9):
+    page = np.full((IMG_H, IMG_W, 3), 255, np.uint8)
+    x0, y0, step = draw_grid(page, 9, GRID_MARGIN)
+    for r in range(9):
+        for c in range(9):
+            put_digit(page, r, c, puzzle9x9[r][c], x0, y0, step)
+    return page, (x0, y0, step)
+
+def aug_camera(img):
+    """Light camera-like augmentation: perspective jitter + optional Gaussian blur."""
+    H, W = img.shape[:2]
+    def jitter(pt, s=20):
+        return (pt[0] + random.randint(-s, s), pt[1] + random.randint(-s, s))
+    src = np.float32([(0, 0), (W, 0), (W, H), (0, H)])
+    dst = np.float32([jitter((0,0)), jitter((W,0)), jitter((W,H)), jitter((0,H))])
+    M = cv.getPerspectiveTransform(src, dst)
+    warped = cv.warpPerspective(img, M, (W, H), flags=cv.INTER_LINEAR, borderValue=(220, 220, 220))
+    if random.random() < 0.5:
+        k = random.choice([1, 2])
+        warped = cv.GaussianBlur(warped, (2*k+1, 2*k+1), 0)
+    return warped
+
+def ensure_dirs(split):
+    for cls in range(10):  # 0..9 (0 == blank)
+        (OUT_DIR / split / str(cls)).mkdir(parents=True, exist_ok=True)
+
+def save_crops(page, geom, puzzle9x9, split, base_id):
+    x0, y0, step = geom
+    idx = 0
+    for r in range(9):
+        for c in range(9):
+            x1, y1 = x0 + c*step, y0 + r*step
+            roi = page[y1:y1+step, x1:x1+step]
+            g = cv.cvtColor(roi, cv.COLOR_BGR2GRAY)
+            g = cv.resize(g, (CELL_SIZE, CELL_SIZE), interpolation=cv.INTER_AREA)
+            label = puzzle9x9[r][c]  # 0 for blank, 1..9 digits
+            out_path = OUT_DIR / split / str(label) / f"{base_id}_{idx:02d}.png"
+            cv.imwrite(str(out_path), g)
+            idx += 1
+
+def process_split(puzzles, split_name, n_limit):
+    ensure_dirs(split_name)
+    grid_dir = OUT_DIR / "grids" / split_name
+    grid_dir.mkdir(parents=True, exist_ok=True)
+
+    N = min(n_limit, len(puzzles))
+    for i in tqdm(range(N), desc=f"render {split_name}"):
+        puzzle = puzzles[i]
+
+        # Clean page
+        page, geom = render_page(puzzle)
+        save_crops(page, geom, puzzle, split_name, base_id=f"{i:06d}_clean")
+        cv.imwrite(str(grid_dir / f"{i:06d}_clean.png"), page)
+
+        # Camera-like
+        warped = aug_camera(page)
+        save_crops(warped, geom, puzzle, split_name, base_id=f"{i:06d}_cam")
+        cv.imwrite(str(grid_dir / f"{i:06d}_cam.png"), warped)
+
+def main():
+    (p_train, _s_train), (p_val, _s_val) = load_puzzles(PARQUET_PATH, N_TRAIN, N_VAL)
+    process_split(p_train, "train", N_TRAIN)
+    process_split(p_val,   "val",   N_VAL)
+    print("Done. Output under:", OUT_DIR.resolve())
+
+if __name__ == "__main__":
+    main()
+```
+
+At the top, you set basic knobs for the generator: where to read the Parquet file, where to write outputs, how many puzzles to render for train/val, page size, grid margin, crop size, and the OpenCV font. Tweaking these lets you control dataset scale, visual style, and classifier input size (e.g., CELL_SIZE=32 if you want a slightly larger digit crop).
+
+The method str_to_grid(s) converts an 81-character Sudoku string into a 9×9 list of integers. Each character represents a cell: 0 is blank, 1–9 are digits. This is the canonical internal representation used throughout the script.
+
+Then, we have load_puzzles(parquet_path, n_train, n_val), which loads the dataset from Parquet, shuffles it deterministically, and slices it into train/val partitions. It returns the puzzles (and, if present, solutions) as 9×9 integer grids. In this step we only need puzzle for rendering and labeling digit crops (blanks included); solution is useful later for solver validation.
+
+Subsequently, draw_grid(img, size=9, margin=GRID_MARGIN) draws a Sudoku grid on a blank page image. It computes the step size from the page dimensions and margin, then draws both thin inner lines and thick 3×3 box boundaries. It returns the top-left corner (x0, y0) and the cell size (step), which are reused to place digits and to locate each cell for cropping.
+
+Next, put_digit(img, r, c, d, x0, y0, step) renders a single digit d at row r, column c inside the grid. The text is centered in the cell using the font metrics; if d == 0, it leaves the cell blank. This mirrors printed-book Sudoku styling so our crops look realistic.
+
+Another method, render_page(puzzle9x9) builds a complete “book-style” Sudoku page: creates a white canvas, draws the grid, loops over all 81 cells, and writes digits using put_digit. It returns the page plus the grid geometry (x0, y0, step) for subsequent cropping.
+
+A method aug_camera(img) applies a light, camera-like augmentation to mimic smartphone captures: a small perspective warp (random corner jitter) and optional Gaussian blur. The warp uses a light gray border fill so any exposed areas look like paper rather than colored artifacts. This produces a second version of each page that’s closer to real-world inputs.
+
+Afterward, ensure_dirs(split) makes the class directories for a given split (train or val) so that crops can be saved in data/{split}/{class}/.... The classes are 0..9 with 0 = blank.
+
+A method save_crops(page, geom, puzzle9x9, split, base_id) slices the page into 81 cell crops using the grid geometry, converts each crop to grayscale, resizes it to CELL_SIZE × CELL_SIZE, and saves it into the appropriate class directory based on the puzzle’s value at that cell (0..9). Using the puzzle for labels ensures we learn to recognize blanks as well as digits.
+
+Then, process_split(puzzles, split_name, n_limit) is the workhorse for each partition. For each puzzle, it (1) renders a clean page, saves its 81 crops, and writes the full page under data/grids/{split}; then (2) generates an augmented “camera-like” version and saves its crops and full page too. This gives you both micro-level training data (crops) and macro-level test images (full grids) for the later camera pipeline.
+
+Finally, main() loads train/val puzzles from Parquet and calls process_split for each. When it finishes, you’ll have:
+```console
+data/
+  train/
+    0/… 1/… … 9/…
+  val/
+    0/… … 9/…
+  grids/
+    train/  (..._clean.png, ..._cam.png)
+    val/    (..._clean.png, ..._cam.png)
+```
+
+## Launching instructions
+1. Install dependencies (inside your virtual env):
+```console
+pip install pandas pyarrow opencv-python tqdm numpy
+```
+
+2. Place the Parquet file (e.g., train_1.parquet) next to the script or update PARQUET_PATH accordingly. Here we used the file from [this location](https://huggingface.co/datasets/Ritvik19/Sudoku-Dataset/blob/main/train_1.parquet).
+
+3. Run the generator
+```console
+python3 02_PrepareData.py
+```
+
+4.	Inspect outputs:
+* Digit crops live under data/train/{0..9}/ and data/val/{0..9}/.
+* Full-page grids (clean + camera-like) live under data/grids/train/ and data/grids/val/.
+
+Tips
+* Start small (N_TRAIN=1000, N_VAL=100) to verify everything, then scale up.
+* If you want larger inputs for the classifier, increase CELL_SIZE to 32 or 40.
+* To make augmentation a bit stronger (more realistic), slightly increase the perspective jitter in aug_camera, add brightness/contrast jitter, or a faint gradient shadow overlay.
+
+## Summary
+After running this step you’ll have a robust, labeled, Sudoku-specific dataset: thousands of digit crops (including blanks) for training and realistic full-page grids for pipeline testing. You’re ready for the next step—training the digit recognizer and exporting it to ONNX.
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/04_training.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/04_training.md
new file mode 100644
index 0000000000..332db92273
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/04_training.md
@@ -0,0 +1,238 @@
+---
+# User change
+title: "Train the Digit Recognizer"
+
+weight: 5
+
+layout: "learningpathall"
+---
+
+## Objective ##
+We will now train a small CNN to classify Sudoku cell crops into 10 classes (0=blank, 1..9=digit), verify accuracy, then export the model to ONNX using the Dynamo exporter and sanity-check parity with ONNX Runtime. This gives us a portable model ready for Arm64 inference and later Android deployment.
+
+## Creating a model
+We use a tiny convolutional neural network (CNN) called DigitNet, designed to be both fast (so it runs efficiently on Arm64 and mobile) and accurate enough for recognizing 28×28 grayscale crops of Sudoku digits. It expects 1 input channel (in_channels=1) because we forced grayscale in the preprocessing step.
+
+We start by creating a new file digitnet_model.py and defining the DigitNet class:
+```python
+import torch
+import torch.nn as nn
+
+class DigitNet(nn.Module):
+    """
+    Tiny CNN for Sudoku digit classification.
+    Classes: 0..9 where 0 = blank.
+    Input: (N,1,H,W) grayscale (default 28x28).
+    """
+    def __init__(self, num_classes: int = 10):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(1, 16, 3, padding=1), nn.ReLU(),
+            nn.MaxPool2d(2),
+            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(),
+            nn.AdaptiveAvgPool2d((1,1)),
+            nn.Flatten(),
+            nn.Linear(32, num_classes),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.net(x)
+```
+
+We use a very compact convolutional neural network (CNN), which we call DigitNet, to recognize Sudoku digits. The goal is to have a model that is simple enough to run efficiently on Arm64 and mobile devices, but still powerful enough to tell apart the ten classes we care about (0 for blank, and digits 1 through 9).
+
+The network expects each input to be a 28×28 grayscale crop, so it begins with a convolution layer that has one input channel and sixteen filters. This first convolution is responsible for learning very low-level patterns such as strokes or edges. Immediately after, a ReLU activation introduces non-linearity, which allows the network to combine those simple features into more expressive ones. A max-pooling layer then reduces the spatial resolution by half, making the representation more compact and less sensitive to small translations.
+
+At this point, the feature maps are passed through a second convolutional layer with thirty-two filters. This stage learns richer patterns, for example combinations of edges that form loops or intersections that distinguish an “8” from a “0” or a “6”. Another ReLU activation adds the necessary non-linearity to these higher-level features.
+
+Instead of flattening the entire feature map, we apply an adaptive average pooling operation that squeezes each of the thirty-two channels down to a single number. This effectively summarizes the information across the whole image and ensures the model produces a fixed-length representation regardless of the exact input size. After pooling, the features are flattened into a one-dimensional vector.
+
+The final step is a fully connected layer that maps the thirty-two features to ten output values, one for each class. These values are raw scores (logits) that indicate how strongly the model associates the input crop with each digit. During training, a cross-entropy loss will turn these logits into probabilities and guide the model to adjust its weights.
+
+In practice, this means that when you feed in a batch of grayscale Sudoku cells of shape [N, 1, 28, 28], DigitNet transforms them step by step into a batch of [N, 10] outputs, where each row contains the scores for the ten possible classes. Despite its simplicity, this small CNN strikes a balance between speed and accuracy that makes it ideal for Sudoku digit recognition on resource-constrained devices.
+
+## Training a model
+We will now prepare the self-containing script that trains the above model on the data prepared earlier. Start by creating the new file 03_Training.py and modify it as follows:
+```python
+import os, random, numpy as np
+import torch as tr
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+from tqdm import tqdm
+from torch.onnx import dynamo_export
+from torch.export import Dim
+import onnxruntime as ort  
+
+from digitnet_model import DigitNet  
+
+# Configuration
+random.seed(0); np.random.seed(0); tr.manual_seed(0)
+DEVICE = "cpu"           # keep CPU for portability
+DATA_DIR = "data"        # data/train/0..9, data/val/0..9
+ARTI_DIR = "artifacts"
+os.makedirs(ARTI_DIR, exist_ok=True)
+
+BATCH = 256
+EPOCHS = 10
+LR = 1e-3
+WEIGHT_DECAY = 1e-4
+LABEL_SMOOTH = 0.05
+
+# Datasets (force grayscale to match model)
+tfm_train = transforms.Compose([
+    transforms.Grayscale(num_output_channels=1),   # force 1-channel input
+    transforms.ToTensor(),
+    transforms.Normalize((0.5,), (0.5,)),
+    transforms.RandomApply([transforms.GaussianBlur(3)], p=0.15),
+    transforms.RandomAffine(degrees=5, translate=(0.02,0.02), scale=(0.95,1.05)),
+])
+tfm_val = transforms.Compose([
+    transforms.Grayscale(num_output_channels=1),   # force 1-channel input
+    transforms.ToTensor(),
+    transforms.Normalize((0.5,), (0.5,)),
+])
+
+train_ds = datasets.ImageFolder(os.path.join(DATA_DIR, "train"), transform=tfm_train)
+val_ds   = datasets.ImageFolder(os.path.join(DATA_DIR, "val"),   transform=tfm_val)
+
+train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True,  num_workers=0)
+val_loader   = DataLoader(val_ds,   batch_size=BATCH, shuffle=False, num_workers=0)
+
+def evaluate(model: nn.Module, loader: DataLoader) -> float:
+    model.eval()
+    correct = total = 0
+    with tr.no_grad():
+        for x, y in loader:
+            x, y = x.to(DEVICE), y.to(DEVICE)
+            pred = model(x).argmax(1)
+            correct += (pred == y).sum().item()
+            total   += y.numel()
+    return correct / total if total else 0.0
+
+def main():
+    # Sanity: verify loader channels
+    xb, _ = next(iter(train_loader))
+    print("Train batch shape:", xb.shape)  # expect [B, 1, 28, 28]
+
+    model = DigitNet(num_classes=10).to(DEVICE)
+    opt = tr.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
+
+    best_acc, best_state = 0.0, None
+    for ep in range(1, EPOCHS + 1):
+        model.train()
+        for x, y in tqdm(train_loader, desc=f"epoch {ep}/{EPOCHS}"):
+            x, y = x.to(DEVICE), y.to(DEVICE)
+            opt.zero_grad()
+            logits = model(x)
+            loss = F.cross_entropy(logits, y, label_smoothing=LABEL_SMOOTH)
+            loss.backward()
+            opt.step()
+
+        acc = evaluate(model, val_loader)
+        print(f"val acc: {acc:.4f}")
+        if acc > best_acc:
+            best_acc = acc
+            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
+
+    if best_state is not None:
+        model.load_state_dict(best_state)
+    print(f"Best val acc: {best_acc:.4f}")
+
+    # Save PyTorch weights (optional)
+    tr.save(model.state_dict(), os.path.join(ARTI_DIR, "digitnet_best.pth"))
+
+    # Export to ONNX with dynamic batch using the Dynamo API
+    model.eval()
+    dummy = tr.randn(1, 1, 28, 28)
+    onnx_path = os.path.join(ARTI_DIR, "sudoku_digitnet.onnx")
+
+    tr.onnx.export(
+        model,                       # model
+        dummy,                       # input tensor corresponds to arg name 'x'
+        onnx_path,                   # output .onnx
+        input_names=["input"],       # ONNX *display* name (independent of arg name)
+        output_names=["logits"],
+        opset_version=19,
+        do_constant_folding=True,
+        keep_initializers_as_inputs=False,
+        dynamo=True,
+        dynamic_shapes={"x": {0: Dim("N")}}   
+    )
+
+    print("Exported:", onnx_path)
+
+    # quick parity with a big batch (proves dynamic batch works)
+    sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
+    x = tr.randn(512, 1, 28, 28)
+    onnx_logits = sess.run(["logits"], {"input": x.numpy().astype(np.float32)})[0]
+    pt_logits   = model(x).detach().numpy()
+    print("Parity MAE:", np.mean(np.abs(onnx_logits - pt_logits)))
+
+if __name__ == "__main__":
+    main()
+```
+
+This file is a self-contained trainer for the Sudoku digit classifier. It starts by fixing random seeds for reproducibility and sets DEVICE="cpu" so the workflow runs the same on desktops and Arm64 boards. It expects the dataset from the previous step under data/train/0..9 and data/val/0..9, and creates an artifacts/ folder for all outputs.
+
+The script builds two dataloaders (train/val) with a preprocessing stack that forces grayscale (Grayscale(num_output_channels=1)) so inputs match the model’s first convolution, converts to tensors, and normalizes to a centered range. Light augmentations on the training split—small affine jitter and occasional blur—mimic camera variability without distorting the digits. Batch size, epochs, and learning rate are set to conservative defaults so training is smooth on CPU; you can scale them up later.
+
+Then, the script it instantiates DigitNet(num_classes=10) model. The optimizer is AdamW with mild weight decay to control overfitting. The loss is cross-entropy with label smoothing (e.g., 0.05), which reduces over-confidence and helps on easily confused shapes (like 6/8/9).
+
+The training loop runs for a fixed number of epochs, iterating mini-batches from the training set. After each epoch, it evaluates on the validation split and logs the accuracy. The script keeps track of the best model state seen so far (based on val accuracy) and restores it at the end, ensuring the final model corresponds to your best epoch, not just the last one.
+
+The file will create two artifacts:
+1. digitnet_best.pth — the best PyTorch weights (handy for quick experiments, fine-tuning, or debugging later).
+2. sudoku_digitnet.onnx — the exported ONNX model, produced with PyTorch’s Dynamo exporter and a dynamic batch dimension. Dynamic batch means the model accepts input of shape [N, 1, 28, 28] for any N, which is ideal for efficient batched inference on Arm64 and for Android integration.
+
+Right after export, the script runs a parity test: it feeds the same randomly generated batch through both the PyTorch model and the ONNX model (executed by ONNX Runtime) and prints the mean absolute error between their logits. A tiny value confirms the exported graph faithfully matches your trained network.
+
+## Running the script
+To run the training script, type:
+
+```console
+python3 03_Training.py
+```
+
+The script will train, validate, export, and verify the digit recognizer in one go. After it finishes, you’ll have both a portable ONNX model and a PyTorch checkpoint ready for the next step—building the image processor that detects the Sudoku grid, rectifies it, segments cells, and performs batched ONNX inference to reconstruct the board for solving.
+
+Here is a sample run:
+
+```output
+python3 03_Training.py 
+Train batch shape: torch.Size([256, 1, 28, 28])
+epoch 1/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:24<00:00,  7.82it/s]
+val acc: 0.8099
+epoch 2/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:18<00:00,  8.05it/s]
+val acc: 0.8378
+epoch 3/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:17<00:00,  8.09it/s]
+val acc: 0.8855
+epoch 4/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:20<00:00,  7.97it/s]
+val acc: 0.9180
+epoch 5/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:20<00:00,  7.97it/s]
+val acc: 0.9527
+epoch 6/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:22<00:00,  7.88it/s]
+val acc: 0.9635
+epoch 7/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:22<00:00,  7.88it/s]
+val acc: 0.9777
+epoch 8/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:21<00:00,  7.91it/s]
+val acc: 0.9854
+epoch 9/10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:21<00:00,  7.91it/s]
+val acc: 0.9912
+epoch 10/10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 1597/1597 [03:21<00:00,  7.91it/s]
+val acc: 0.9928
+Best val acc: 0.9928
+[torch.onnx] Obtain model graph for `DigitNet([...]` with `torch.export.export(..., strict=False)`...
+[torch.onnx] Obtain model graph for `DigitNet([...]` with `torch.export.export(..., strict=False)`... ✅
+[torch.onnx] Run decomposition...
+[torch.onnx] Run decomposition... ✅
+[torch.onnx] Translate the graph into ONNX...
+[torch.onnx] Translate the graph into ONNX... ✅
+Applied 1 of general pattern rewrite rules.
+Exported: artifacts/sudoku_digitnet.onnx
+Parity MAE: 1.0251999e-05
+```
+
+## Summary
+By running the training script you train the DigitNet CNN on the Sudoku digit dataset, steadily improving accuracy across epochs until the model surpasses 99% validation accuracy. The process builds on the earlier steps where we first defined the model architecture in digitnet_model.py and then prepared a dedicated training script to handle data loading, augmentation, optimization, and evaluation. During training the best-performing model state is saved, and at the end it is exported to the ONNX format with dynamic batch support. A parity check confirms that the ONNX and PyTorch versions produce virtually identical outputs (mean error ~1e-5). You now have a validated ONNX model (artifacts/sudoku_digitnet.onnx) and a PyTorch checkpoint (digitnet_best.pth), both ready for integration into the Sudoku image processing pipeline. Before moving on to grid detection and solving, however, we will first run standalone inference to confirm the model’s predictions on individual digit crops.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/05_inference.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/05_inference.md
new file mode 100644
index 0000000000..3a8fdc1426
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/05_inference.md
@@ -0,0 +1,249 @@
+---
+# User change
+title: "Inference and Model Evaluation"
+
+weight: 6
+
+layout: "learningpathall"
+---
+
+## Objective ##
+In this section, we validate the digit recognizer by running inference on the validation dataset using both the PyTorch checkpoint and the exported ONNX model. We verify that PyTorch and ONNX Runtime produce consistent results, analyze class-level behavior using a confusion matrix, and generate visual diagnostics for debugging and documentation. This step acts as a final verification checkpoint before integrating the model into the full OpenCV-based Sudoku processing pipeline.
+
+Before introducing geometric processing, grid detection, and perspective correction, it is important to confirm that the digit recognizer works reliably in isolation. By validating inference and analyzing errors at the digit level, we ensure that any future issues in the end-to-end system can be attributed to image processing or geometry rather than the classifier itself.
+
+## Inference and Evaluation Script
+Create a new file named 04_Test.py and paste the script below into it. This script evaluates the digit recognizer in a way that closely mirrors deployment conditions. It compares PyTorch and ONNX Runtime inference, measures accuracy on the validation dataset, and generates visual diagnostics that reveal both strengths and remaining failure modes of the model.
+
+```python
+import os, numpy as np, torch
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+
+from digitnet_model import DigitNet
+
+DATA_DIR = "data"
+ARTI_DIR = "artifacts"
+os.makedirs(ARTI_DIR, exist_ok=True)
+
+ONNX_PATH = os.path.join(ARTI_DIR, "sudoku_digitnet.onnx")  # fp32
+
+# Same normalization as training (and force grayscale → 1 channel)
+tfm_val = transforms.Compose([
+    transforms.Grayscale(num_output_channels=1),
+    transforms.ToTensor(),
+    transforms.Normalize((0.5,), (0.5,))
+])
+val_ds = datasets.ImageFolder(os.path.join(DATA_DIR, "val"), transform=tfm_val)
+val_loader = DataLoader(val_ds, batch_size=512, shuffle=False, num_workers=0)
+
+DIGIT_NAMES = [str(i) for i in range(10)]  # 0 = blank, 1..9 = digits
+
+
+def evaluate_pytorch(model, loader):
+    model.eval()
+    correct = total = 0
+    with torch.no_grad():
+        for x, y in loader:
+            pred = model(x).argmax(1)
+            correct += (pred == y).sum().item()
+            total   += y.numel()
+    return correct / total if total else 0.0
+
+
+def confusion_matrix_onnx(onnx_model_path, loader):
+    import onnxruntime as ort
+    sess = ort.InferenceSession(onnx_model_path, providers=["CPUExecutionProvider"])
+    mat = np.zeros((10, 10), dtype=np.int64)
+    total = 0
+    correct = 0
+    for x, y in tqdm(loader, desc="ONNX eval"):
+        # x: torch tensor [N,1,28,28] normalized to [-1,1]
+        inp = x.numpy().astype(np.float32)
+        logits = sess.run(["logits"], {"input": inp})[0]  # [N,10]
+        pred = logits.argmax(axis=1)
+        y_np = y.numpy()
+        for t, p in zip(y_np, pred):
+            mat[t, p] += 1
+        correct += (pred == y_np).sum()
+        total   += y_np.size
+    acc = float(correct) / float(total) if total else 0.0
+    return acc, mat
+
+
+def plot_confusion_matrix(cm, classes=DIGIT_NAMES, normalize=False, title="Confusion matrix", fname=None):
+    """Plot confusion matrix. If normalize=True, rows sum to 1."""
+    cm_plot = cm.astype("float")
+    if normalize:
+        row_sums = cm_plot.sum(axis=1, keepdims=True) + 1e-12
+        cm_plot = cm_plot / row_sums
+
+    plt.figure(figsize=(6, 5))
+    plt.imshow(cm_plot, interpolation="nearest")
+    plt.title(title)
+    plt.colorbar()
+    tick_marks = np.arange(len(classes))
+    plt.xticks(tick_marks, classes)
+    plt.yticks(tick_marks, classes)
+
+    # Label each cell
+    thresh = cm_plot.max() / 2.0
+    for i in range(cm_plot.shape[0]):
+        for j in range(cm_plot.shape[1]):
+            txt = f"{cm_plot[i, j]:.2f}" if normalize else f"{int(cm_plot[i, j])}"
+            plt.text(j, i, txt,
+                     horizontalalignment="center",
+                     verticalalignment="center",
+                     fontsize=7,
+                     color="white" if cm_plot[i, j] > thresh else "black")
+
+    plt.ylabel("True label")
+    plt.xlabel("Predicted label")
+    plt.tight_layout()
+    if fname:
+        plt.savefig(fname, dpi=150)
+        print(f"Saved: {fname}")
+    plt.show()
+
+
+def sample_predictions_onnx(onnx_path, dataset, k=24, seed=0):
+    """Show a grid of sample predictions (mix of correct and misclassified)."""
+    import onnxruntime as ort
+    rng = np.random.default_rng(seed)
+    sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
+
+    # Over-sample candidates then choose some wrong + some right
+    idxs = rng.choice(len(dataset), size=min(k * 2, len(dataset)), replace=False)
+    imgs, ys, preds = [], [], []
+
+    for i in idxs:
+        x, y = dataset[i]  # x: [1,28,28] after transforms; y: int
+        x_np = x.unsqueeze(0).numpy().astype(np.float32)  # [1,1,28,28]
+        logits = sess.run(["logits"], {"input": x_np})[0]  # [1,10]
+        p = int(np.argmax(logits, axis=1)[0])
+        imgs.append(x.squeeze(0).numpy())  # [28,28]
+        ys.append(int(y))
+        preds.append(p)
+
+    mis_idx = [i for i, (t, p) in enumerate(zip(ys, preds)) if t != p]
+    cor_idx = [i for i, (t, p) in enumerate(zip(ys, preds)) if t == p]
+    picked = (mis_idx[:k // 2] + cor_idx[:k - len(mis_idx[:k // 2])])[:k]
+    if not picked:  # fallback
+        picked = list(range(min(k, len(imgs))))
+
+    # Plot grid
+    import math
+    cols = 8
+    rows = math.ceil(len(picked) / cols)
+    plt.figure(figsize=(cols * 1.6, rows * 1.8))
+    for j, idx in enumerate(picked):
+        plt.subplot(rows, cols, j + 1)
+        plt.imshow(imgs[idx], cmap="gray")
+        t, p = ys[idx], preds[idx]
+        title = f"T:{t} P:{p}"
+        color = "green" if t == p else "red"
+        plt.title(title, color=color, fontsize=9)
+        plt.axis("off")
+    plt.tight_layout()
+    out = os.path.join(ARTI_DIR, "samples_grid.png")
+    plt.savefig(out, dpi=150)
+    print(f"Saved: {out}")
+    plt.show()
+
+def main():
+    # Optional: evaluate the best PyTorch checkpoint for reference
+    pt_ckpt = os.path.join(ARTI_DIR, "digitnet_best.pth")
+    if os.path.exists(pt_ckpt):
+        model = DigitNet()
+        model.load_state_dict(torch.load(pt_ckpt, map_location="cpu"))
+        pt_acc = evaluate_pytorch(model, val_loader)
+        print(f"PyTorch val acc: {pt_acc:.4f}")
+    else:
+        print("No PyTorch checkpoint found; skipping PT eval.")
+
+    # Evaluate ONNX fp32
+    if os.path.exists(ONNX_PATH):
+        acc, cm = confusion_matrix_onnx(ONNX_PATH, val_loader)
+        print(f"ONNX fp32 val acc: {acc:.4f}")
+        print("Confusion matrix (rows=true, cols=pred):\n", cm)
+
+        # Plots: counts + normalized
+        plot_confusion_matrix(cm, normalize=False,
+                              title="ONNX fp32 – Confusion (counts)",
+                              fname=os.path.join(ARTI_DIR, "cm_fp32_counts.png"))
+        plot_confusion_matrix(cm, normalize=True,
+                              title="ONNX fp32 – Confusion (row-normalized)",
+                              fname=os.path.join(ARTI_DIR, "cm_fp32_norm.png"))
+
+        # Sample predictions grid
+        try:
+            sample_predictions_onnx(ONNX_PATH, val_ds, k=24)
+        except Exception as e:
+            print("Sample grid skipped:", e)
+    else:
+        print("Missing ONNX model:", ONNX_PATH)
+
+if __name__ == "__main__":
+    main()
+```
+
+The script first loads the validation dataset using the same preprocessing pipeline as training, including forced grayscale conversion to ensure a single input channel. It then optionally evaluates the best PyTorch checkpoint (digitnet_best.pth) to establish a reference accuracy.
+
+Next, the exported ONNX model (sudoku_digitnet.onnx) is loaded using ONNX Runtime and evaluated in batches. Because the model was exported with a dynamic batch dimension, inference can be performed efficiently on larger batches, which is representative of how the model will be used later in the pipeline.
+
+The script expects two things from the earlier steps:
+1. A validation dataset stored under data/val/0..9/…
+2. A trained model exported in previous step and stored under artifacts/
+	* artifacts/digitnet_best.pth (optional, PyTorch weights)
+	* artifacts/sudoku_digitnet.onnx (required, ONNX model)
+
+When you run the script, it first loads the validation dataset using the same preprocessing as training, including forcing grayscale so the input has a single channel. It then optionally evaluates the PyTorch checkpoint to provide a reference accuracy. After that, it runs batched inference with ONNX Runtime, computes an overall accuracy, and builds a confusion matrix (true class vs predicted class) that reveals which digits are being confused.
+
+In addition to printing accuracy metrics, the script generates two types of diagnostic outputs:
+1. Confusion matrix visualizations, saved as:
+	* artifacts/cm_fp32_counts.png (raw counts)
+	* artifacts/cm_fp32_norm.png (row-normalized)
+2. A grid of example predictions, saved as:
+	*artifacts/samples_grid.png
+
+These artifacts provide both quantitative and qualitative insight into model performance.
+
+In the sample grid, each tile shows one crop together with its True label (T:) and Predicted label (P:), with correct predictions highlighted in green and mistakes highlighted in red. This makes it easy to quickly verify that the classifier behaves sensibly and to spot remaining failure modes.
+
+## Running the script
+Run the evaluation script from the project root:
+
+```console
+python3 04_Test.py
+```
+
+In the example below, the PyTorch and ONNX accuracies match exactly, confirming that the export process preserved model behavior.
+
+```console
+python3 04_Test.py 
+PyTorch val acc: 0.9928
+ONNX eval: 100%|███████████████████████████████████████████████████████████| 32/32 [00:01<00:00, 21.06it/s]
+ONNX fp32 val acc: 0.9928
+Confusion matrix (rows=true, cols=pred):
+ [[12623     7     0     0     0     0     0     0     0     0]
+ [    0   420     0     0     0     0     0     0     0     0]
+ [    0     0   331     0     4     0     1     0     0     0]
+ [    0     1     0   332     0     1     0     0     0     0]
+ [    0     0     0     0   460     0     0     0     0     0]
+ [    0     1     0     1     0   486     2     0     0     0]
+ [    1     0     0     0     0    19   387     0     1     2]
+ [    0     1     0     0     0     0     0   375     0     0]
+ [    0     0     0     0     0     6    27     0   297    10]
+ [    0     1     0     0     0    14    10     0     7   372]]
+Saved: artifacts/cm_fp32_counts.png
+```
+
+![img1](figures/01.png)
+The confusion matrix provides more insight than a single accuracy number. Each row corresponds to the true class, and each column corresponds to the predicted class. A strong diagonal indicates correct classification. In this output, blank cells (class 0) are almost always recognized correctly, while the remaining errors occur primarily between visually similar printed digits such as 6, 8, and 9.
+
+This behavior is expected and indicates that the model has learned meaningful digit features. The remaining confusions are rare and can be addressed later through targeted augmentation or higher-resolution crops if needed.
+
+## Summary
+With inference validated and error modes understood, the digit recognizer is now ready to be embedded into the full Sudoku image-processing pipeline, where OpenCV will be used to detect the grid, rectify perspective, segment cells, and run batched ONNX inference to reconstruct and solve complete puzzles.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/06_sudokuprocessor.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/06_sudokuprocessor.md
new file mode 100644
index 0000000000..1f5e686a3b
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/06_sudokuprocessor.md
@@ -0,0 +1,448 @@
+---
+# User change
+title: "Sudoku Processor. From Image to Solution"
+weight: 7
+layout: "learningpathall"
+
+---
+
+## Objective ##
+
+In this section, we integrate all previous components into a complete Sudoku processing pipeline. Starting from a full Sudoku image, we detect and rectify the grid, split it into individual cells, recognize digits using the ONNX model, and finally solve the puzzle using a deterministic solver. By the end of this step, you will have an end-to-end system that takes a photograph of a Sudoku puzzle and produces a solved board, along with visual outputs for debugging and validation.
+
+## Context
+So far, we have:
+1. Generated a synthetic, well-labeled Sudoku digit dataset,
+2. Trained a lightweight CNN (DigitNet) to recognize digits and blanks,
+3. Exported the model to ONNX with dynamic batch support,
+4. Validated inference correctness and analyzed errors using confusion matrices.
+
+At this point, the digit recognizer is reliable in isolation. The remaining challenge is connecting vision with reasoning: extracting the Sudoku grid from an image, mapping each cell to a digit, and applying a solver. This section bridges that gap.
+
+## Overview of the pipeline
+To implement the Sudoku processor, create the file (sudoku_processor.py) and paste the implementation below:
+
+```python
+import cv2 as cv
+import numpy as np
+import onnxruntime as ort
+
+class SudokuProcessor:
+    def __init__(
+        self,
+        onnx_path: str,
+        input_size: int = 28,
+        warp_size: int = 450,
+        blank_class: int = 0,
+        blank_conf_threshold: float = 0.65,
+        providers=("CPUExecutionProvider",),
+    ):
+        """
+        onnx_path: path to sudoku_digitnet.onnx
+        input_size: model input spatial size (28)
+        warp_size: size of rectified square board (e.g., 450 => each cell ~50px)
+        blank_class: class index used for blanks (0)
+        blank_conf_threshold: if model confidence < threshold, treat as blank (helps noisy cells)
+        """
+        self.onnx_path = onnx_path
+        self.input_size = input_size
+        self.warp_size = warp_size
+        self.blank_class = blank_class
+        self.blank_conf_threshold = blank_conf_threshold
+
+        self.sess = ort.InferenceSession(onnx_path, providers=list(providers))
+        self.input_name = self.sess.get_inputs()[0].name  # typically "input"
+        self.output_name = self.sess.get_outputs()[0].name  # typically "logits"
+    
+    def process_image(self, bgr: np.ndarray, overlay: bool = True):
+        """
+        Returns:
+          board (9x9 ints with 0 for blank),
+          solved_board (9x9 ints, or None if unsolved),
+          debug dict (warped, contours, etc.),
+          overlay_bgr (optional solution overlay)
+        """
+        warped, H, quad = self.detect_and_warp_board(bgr)
+        cells = self.split_cells(warped)
+        board, conf = self.recognize_board(cells)
+
+        solved = [row[:] for row in board]
+        ok = solve_sudoku(solved)
+
+        overlay_img = None
+        if overlay and ok:
+            overlay_img = self.overlay_solution(bgr, H, board, solved)
+
+        debug = {
+            "warped": warped,
+            "homography": H,
+            "quad": quad,
+            "confidence": conf,
+        }
+        return board, (solved if ok else None), debug, overlay_img
+
+    # -----------------------------
+    # Board detection / rectification
+    # -----------------------------
+    def detect_and_warp_board(self, bgr: np.ndarray):
+        """
+        Finds the largest Sudoku-like quadrilateral and warps it to a square.
+        Returns warped_board, homography, quad_points.
+        """
+        gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY)
+        blur = cv.GaussianBlur(gray, (5, 5), 0)
+
+        # Strong binary image helps contour finding (works well for printed grids)
+        thr = cv.adaptiveThreshold(
+            blur, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 31, 7
+        )
+
+        # Remove small noise, connect lines a bit
+        kernel = cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
+        thr = cv.morphologyEx(thr, cv.MORPH_CLOSE, kernel, iterations=2)
+
+        contours, _ = cv.findContours(thr, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
+        if not contours:
+            raise RuntimeError("No contours found. Try a clearer image or different thresholding.")
+
+        # Pick the largest contour that approximates to 4 points
+        contours = sorted(contours, key=cv.contourArea, reverse=True)
+        quad = None
+        for c in contours[:20]:
+            peri = cv.arcLength(c, True)
+            approx = cv.approxPolyDP(c, 0.02 * peri, True)
+            if len(approx) == 4:
+                quad = approx.reshape(4, 2).astype(np.float32)
+                break
+
+        if quad is None:
+            raise RuntimeError("Could not find a 4-corner Sudoku grid. Try a more fronto-parallel image.")
+
+        quad = order_quad_points(quad)
+
+        dst = np.array(
+            [[0, 0], [self.warp_size - 1, 0], [self.warp_size - 1, self.warp_size - 1], [0, self.warp_size - 1]],
+            dtype=np.float32,
+        )
+        H = cv.getPerspectiveTransform(quad, dst)
+        warped = cv.warpPerspective(bgr, H, (self.warp_size, self.warp_size))
+
+        return warped, H, quad
+
+    # -----------------------------
+    # Cell splitting / preprocessing
+    # -----------------------------
+    def split_cells(self, warped_bgr: np.ndarray):
+        """
+        Splits a rectified square board into 81 cell images.
+        Returns list of (r, c, cell_bgr).
+        """
+        cells = []
+        step = self.warp_size // 9
+        for r in range(9):
+            for c in range(9):
+                y0, y1 = r * step, (r + 1) * step
+                x0, x1 = c * step, (c + 1) * step
+                cell = warped_bgr[y0:y1, x0:x1].copy()
+                cells.append((r, c, cell))
+        return cells
+
+    def preprocess_cell(self, cell_bgr: np.ndarray):
+        """
+        Produces a 28x28 float32 tensor in the same normalization as training:
+          grayscale -> [0,1] -> normalize to [-1,1] via (x-0.5)/0.5
+        Also tries to suppress grid lines / borders by cropping margins.
+        """
+        g = cv.cvtColor(cell_bgr, cv.COLOR_BGR2GRAY)
+
+        # Crop a margin to remove grid lines/borders
+        h, w = g.shape
+        m = int(0.12 * min(h, w))  # ~12% margin
+        g = g[m:h - m, m:w - m]
+
+        # Binarize & clean (helps isolate printed digits)
+        g_blur = cv.GaussianBlur(g, (3, 3), 0)
+        bw = cv.adaptiveThreshold(g_blur, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 21, 5)
+
+        # Remove small specks
+        bw = cv.morphologyEx(bw, cv.MORPH_OPEN, np.ones((2, 2), np.uint8), iterations=1)
+
+        # If almost empty => likely blank
+        if (bw > 0).sum() < 15:
+            # Return a near-empty input; classifier should produce blank
+            resized = cv.resize(g, (self.input_size, self.input_size), interpolation=cv.INTER_AREA)
+        else:
+            # Use bw mask to focus on digit; keep as grayscale for the model
+            resized = cv.resize(g, (self.input_size, self.input_size), interpolation=cv.INTER_AREA)
+
+        x = resized.astype(np.float32) / 255.0
+        x = (x - 0.5) / 0.5  # [-1,1]
+        x = x[None, None, :, :]  # [1,1,H,W]
+        return x
+
+    # -----------------------------
+    # Inference
+    # -----------------------------
+    def recognize_board(self, cells):
+        """
+        Runs batched ONNX inference on 81 cells and returns:
+          board[9][9] with 0 for blank
+          conf[9][9] with max softmax probability
+        """
+        xs = []
+        coords = []
+        for r, c, cell in cells:
+            coords.append((r, c))
+            xs.append(self.preprocess_cell(cell))
+
+        X = np.concatenate(xs, axis=0).astype(np.float32)  # [81,1,28,28]
+        logits = self.sess.run([self.output_name], {self.input_name: X})[0]  # [81,10]
+        probs = softmax(logits, axis=1)
+        pred = probs.argmax(axis=1)
+        conf = probs.max(axis=1)
+
+        board = [[0 for _ in range(9)] for _ in range(9)]
+        conf_grid = [[0.0 for _ in range(9)] for _ in range(9)]
+        for i, (r, c) in enumerate(coords):
+            p = int(pred[i])
+            cf = float(conf[i])
+
+            # Optional safety: low-confidence => blank
+            if cf < self.blank_conf_threshold:
+                p = self.blank_class
+
+            board[r][c] = p
+            conf_grid[r][c] = cf
+
+        return board, conf_grid
+
+    # -----------------------------
+    # Overlay
+    # -----------------------------
+    def overlay_solution(self, original_bgr, H, board, solved):
+        """
+        Overlays ONLY the filled-in digits (where original board has 0).
+        """
+        invH = np.linalg.inv(H)
+        overlay = original_bgr.copy()
+
+        step = self.warp_size // 9
+        # Create a transparent layer in warped space then map back
+        layer = np.zeros((self.warp_size, self.warp_size, 3), dtype=np.uint8)
+
+        for r in range(9):
+            for c in range(9):
+                if board[r][c] != 0:
+                    continue
+                d = solved[r][c]
+                # text placement in warped coordinates
+                x = int(c * step + step * 0.32)
+                y = int(r * step + step * 0.72)
+                cv.putText(layer, str(d), (x, y), cv.FONT_HERSHEY_SIMPLEX, 1.2, (0, 200, 0), 2, cv.LINE_AA)
+
+        # Warp overlay layer back to original image
+        h0, w0 = original_bgr.shape[:2]
+        back = cv.warpPerspective(layer, invH, (w0, h0))
+
+        # Blend
+        mask = (back.sum(axis=2) > 0).astype(np.uint8) * 255
+        mask3 = cv.merge([mask, mask, mask])
+        overlay = np.where(mask3 > 0, cv.addWeighted(overlay, 0.6, back, 0.4, 0), overlay)
+        return overlay
+
+
+# -----------------------------
+# Solver (backtracking)
+# -----------------------------
+def solve_sudoku(board):
+    pos = find_empty(board)
+    if pos is None:
+        return True
+    r, c = pos
+    for v in range(1, 10):
+        if valid(board, r, c, v):
+            board[r][c] = v
+            if solve_sudoku(board):
+                return True
+            board[r][c] = 0
+    return False
+
+
+def find_empty(board):
+    for r in range(9):
+        for c in range(9):
+            if board[r][c] == 0:
+                return (r, c)
+    return None
+
+
+def valid(board, r, c, v):
+    # row
+    for j in range(9):
+        if board[r][j] == v:
+            return False
+    # col
+    for i in range(9):
+        if board[i][c] == v:
+            return False
+    # box
+    br, bc = 3 * (r // 3), 3 * (c // 3)
+    for i in range(br, br + 3):
+        for j in range(bc, bc + 3):
+            if board[i][j] == v:
+                return False
+    return True
+
+
+# -----------------------------
+# Utilities
+# -----------------------------
+def order_quad_points(pts):
+    """
+    Orders 4 points into: top-left, top-right, bottom-right, bottom-left.
+    """
+    pts = np.array(pts, dtype=np.float32)
+    s = pts.sum(axis=1)
+    diff = np.diff(pts, axis=1).reshape(-1)
+
+    tl = pts[np.argmin(s)]
+    br = pts[np.argmax(s)]
+    tr = pts[np.argmin(diff)]
+    bl = pts[np.argmax(diff)]
+
+    return np.array([tl, tr, br, bl], dtype=np.float32)
+
+
+def softmax(x, axis=1):
+    x = x - np.max(x, axis=axis, keepdims=True)
+    e = np.exp(x)
+    return e / (np.sum(e, axis=axis, keepdims=True) + 1e-12)
+```
+
+The Sudoku processor follows a sequence of steps:
+1. Grid detection – find the outer Sudoku grid in the input image.
+2. Perspective rectification – warp the grid to a square, top-down view.
+3. Cell extraction – split the rectified grid into 81 cell images.
+4. Digit recognition – run batched ONNX inference to classify each cell.
+5. Board reconstruction – assemble a 9×9 numeric board.
+6. Solving – apply a backtracking Sudoku solver.
+7. Visualization – overlay the solution and render clean board images.
+
+We encapsulate the entire pipeline in a reusable class called SudokuProcessor. This class loads the ONNX model once and exposes a single high-level method that processes an input image and returns both intermediate results and final outputs.
+
+Conceptually, the processor:
+* Accepts a BGR image,
+* Returns the recognized board, the solved board (if solvable), and optional visual overlays.
+
+This design keeps inference fast and makes the processor easy to integrate later into an Android application or embedded system.
+
+## Grid detection and rectification
+The first task is to locate the Sudoku grid in the image. We convert the image to grayscale, apply adaptive thresholding, and use contour detection to find large rectangular shapes. The largest contour that approximates a quadrilateral is assumed to be the Sudoku grid.
+
+Once the four corners are identified, we compute a perspective transform and warp the grid into a square image. This rectified representation removes camera tilt and perspective distortion, allowing all subsequent steps to assume a fixed geometry.
+
+We order the four corners consistently (top-left → top-right → bottom-right → bottom-left) before computing the perspective transform.
+
+## Splitting the grid into cells
+After rectification, the grid is divided evenly into a 9×9 array. Each cell is cropped based on its row and column index. At this stage, every cell corresponds to one Sudoku position and is ready for preprocessing and classification.
+
+Each cell undergoes light preprocessing before inference:
+* Conversion to grayscale,
+* Cropping of a small margin to suppress grid lines,
+* Adaptive thresholding and morphological cleanup to isolate printed digits,
+* Resizing to the model’s input size (28×28),
+* Normalization to match the training distribution.
+
+We crop a margin to suppress grid lines, because grid strokes can dominate the digit pixels and cause systematic misclassification. Cells with very little foreground content are treated as blank candidates, reducing false digit detections in empty cells.
+
+## Batched ONNX inference
+All 81 cell tensors are stacked into a single batch and passed to ONNX Runtime in one call. Because the model was exported with a dynamic batch dimension, this batched inference is efficient and mirrors how the model will be used in production.
+
+The output logits are converted to probabilities, and the most likely class is selected for each cell. Optionally, a confidence threshold can be applied so that low-confidence predictions are treated as blanks.
+
+The result is a 9×9 board where:
+* 0 represents a blank cell,
+* 1–9 represent recognized digits.
+
+## Solving the Sudoku
+With the recognized board constructed, we apply a classic backtracking Sudoku solver. This solver deterministically fills empty cells while respecting Sudoku constraints (row, column, and 3×3 block rules).
+
+If the solver succeeds, we obtain a complete solution. If it fails, the failure usually indicates one or more recognition errors, which can be diagnosed using the intermediate visual outputs.
+
+## Visualization and outputs
+The processor saves several artifacts to help debugging and demonstration:
+- `artifacts/warped.png` – rectified top-down view of the Sudoku grid.
+- `artifacts/overlay_solution.png` – solution digits overlaid onto the original image (if solved).
+- (Optional) `artifacts/recognized_board.png`, `artifacts/solved_board.png`, `artifacts/boards_side_by_side.png` – clean board renderings if you enabled those helpers.
+
+The driver script below saves warped.png and overlay_solution.png by default.
+
+## Running the processor
+A small driver script (05_RunSudokuProcessor.py) demonstrates how to use the SudokuProcessor:
+
+```python
+import os
+import cv2 as cv
+
+from sudoku_processor import SudokuProcessor
+
+def print_board(board, title="Board"):
+    print("\n" + title)
+    for r in range(9):
+        row = ""
+        for c in range(9):
+            v = board[r][c]
+            row += (". " if v == 0 else f"{v} ")
+            if c % 3 == 2 and c != 8:
+                row += "| "
+        print(row.strip())
+        if r % 3 == 2 and r != 8:
+            print("-" * 21)
+
+
+def main():
+    # Use any image path you like:
+    # - a real photo 
+    # - a synthetic grid, e.g. data/grids/val/000001_cam.png
+    img_path = "data/grids/val/000001_cam.png"
+    onnx_path = os.path.join("artifacts", "sudoku_digitnet.onnx")
+
+    bgr = cv.imread(img_path)
+    if bgr is None:
+        raise RuntimeError(f"Could not read image: {img_path}")
+
+    proc = SudokuProcessor(onnx_path=onnx_path, warp_size=450, blank_conf_threshold=0.65)
+
+    board, solved, dbg, overlay = proc.process_image(bgr, overlay=True)
+
+    print_board(board, "Recognized board")
+    if solved is None:
+        print("\nSolver failed (board might contain recognition errors).")
+    else:
+        print_board(solved, "Solved board")
+
+    # Save debug outputs
+    cv.imwrite("artifacts/warped.png", dbg["warped"])
+    if overlay is not None:
+        cv.imwrite("artifacts/overlay_solution.png", overlay)
+        print("\nSaved: artifacts/overlay_solution.png")
+    print("Saved: artifacts/warped.png")
+
+if __name__ == "__main__":
+    main()
+```
+
+You simply provide the path to a Sudoku image and the ONNX model, and the script saves all intermediate and final results to the artifacts/ directory.
+
+Representational result is shown below:
+
+![img2](figures/02.png)
+
+## Summary
+By completing this section, you have built a full vision-to-solution Sudoku system:
+1. A trained and validated ONNX digit recognizer,
+2. A robust OpenCV-based image processing pipeline,
+3. A deterministic solver,
+4. Clear visual diagnostics at every stage.
+
+In the next step of the learning path, we will focus on optimization and deployment.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/07_optimisation.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/07_optimisation.md
new file mode 100644
index 0000000000..a8e38c6ca7
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/07_optimisation.md
@@ -0,0 +1,424 @@
+---
+title: "Model Enhancements and Optimizations"
+weight: 8
+layout: "learningpathall"
+---
+
+## Objective
+In this section, we improve the Sudoku system from a working prototype into something that is faster, smaller, and more robust on Arm64-class hardware. We start by measuring a baseline, then apply ONNX Runtime optimizations and quantization, and finally address the most common real bottleneck: image preprocessing. At each step we re-check accuracy and solve rate so performance gains don’t come at the cost of correctness.
+
+## Establish a baseline
+Before applying any optimizations, it is essential to understand where time is actually being spent in the Sudoku pipeline. Without this baseline, it is impossible to tell whether an optimization is effective or whether it simply shifts the bottleneck elsewhere.
+
+In the current system, the total latency of processing a single Sudoku image is composed of four main stages:
+* Grid detection and warping – locating the outer Sudoku grid and rectifying it using a perspective transform. This step relies entirely on OpenCV and depends on image resolution, lighting, and grid clarity.
+* Cell preprocessing – converting each of the 81 cells into a normalized 28×28 grayscale input for the neural network. This includes cropping margins, thresholding, and morphological operations. In practice, this stage is often the dominant cost.
+* ONNX inference – running the digit recognizer on all 81 cells as a single batch. Thanks to dynamic batch support, this step is typically fast compared to preprocessing.
+* Solving – applying a backtracking Sudoku solver to the recognized board. This step is usually negligible in runtime, unless recognition errors lead to difficult or contradictory boards.
+
+To quantify these contributions, we will add simple timing measurements around each stage of the pipeline using a high-resolution clock (time.perf_counter()). For each processed image, we will print a breakdown:
+* warp_ms – time spent on grid detection and perspective rectification
+* preprocess_ms – total time spent preprocessing all 81 cells
+* onnx_ms – time spent running batched ONNX inference
+* solve_ms – time spent solving the Sudoku
+* split_ms – time spent splitting the warped grid into 81 cells
+* total_ms – end-to-end processing time
+
+## Performance measurements
+Open the sudoku_processor.py and add the following import
+
+```python
+import time
+```
+
+Then, modify the process_image as follows
+```python
+def process_image(self, bgr: np.ndarray, overlay: bool = True):
+        """
+        Returns:
+        board (9x9 ints with 0 for blank),
+        solved_board (9x9 ints, or None if unsolved),
+        debug dict (warped, homography, confidence, timing),
+        overlay_bgr (optional solution overlay)
+        """
+        timing = {}
+
+        t_total0 = time.perf_counter()
+
+        # --- Grid detection + warp ---
+        t0 = time.perf_counter()
+        warped, H, quad = self.detect_and_warp_board(bgr)
+        timing["warp_ms"] = (time.perf_counter() - t0) * 1000.0
+
+        # --- Cell splitting ---
+        t0 = time.perf_counter()
+        cells = self.split_cells(warped)
+        timing["split_ms"] = (time.perf_counter() - t0) * 1000.0
+
+        # --- Preprocessing (81 cells) ---
+        t0 = time.perf_counter()
+        xs = []
+        coords = []
+        for r, c, cell in cells:
+            coords.append((r, c))
+            xs.append(self.preprocess_cell(cell))
+        X = np.concatenate(xs, axis=0).astype(np.float32)  # [81,1,28,28]
+        timing["preprocess_ms"] = (time.perf_counter() - t0) * 1000.0
+
+        # --- ONNX inference ---
+        t0 = time.perf_counter()
+        logits = self.sess.run([self.output_name], {self.input_name: X})[0]
+        timing["onnx_ms"] = (time.perf_counter() - t0) * 1000.0
+
+        # --- Postprocess predictions ---
+        probs = softmax(logits, axis=1)
+        pred = probs.argmax(axis=1)
+        conf = probs.max(axis=1)
+
+        board = [[0 for _ in range(9)] for _ in range(9)]
+        conf_grid = [[0.0 for _ in range(9)] for _ in range(9)]
+        for i, (r, c) in enumerate(coords):
+            p = int(pred[i])
+            cf = float(conf[i])
+            if cf < self.blank_conf_threshold:
+                p = self.blank_class
+            board[r][c] = p
+            conf_grid[r][c] = cf
+
+        # --- Solve ---
+        t0 = time.perf_counter()
+        solved = [row[:] for row in board]
+        ok = solve_sudoku(solved)
+        timing["solve_ms"] = (time.perf_counter() - t0) * 1000.0
+
+        # --- Overlay (optional) ---
+        overlay_img = None
+        if overlay and ok:
+            t0 = time.perf_counter()
+            overlay_img = self.overlay_solution(bgr, H, board, solved)
+            timing["overlay_ms"] = (time.perf_counter() - t0) * 1000.0
+        else:
+            timing["overlay_ms"] = 0.0
+
+        timing["total_ms"] = (time.perf_counter() - t_total0) * 1000.0
+
+        debug = {
+            "warped": warped,
+            "homography": H,
+            "quad": quad,
+            "confidence": conf_grid,
+            "timing": timing,
+        }
+
+        return board, (solved if ok else None), debug, overlay_img
+```
+
+Finally, print the timings in the 05_RunSudokuProcessor.py:
+```python
+def main():
+    # Use any image path you like:
+    # - a real photo 
+    # - a synthetic grid, e.g. data/grids/val/000001_cam.png
+    img_path = "data/grids/val/000002_cam.png"
+    onnx_path = os.path.join("artifacts", "sudoku_digitnet.onnx")
+
+    bgr = cv.imread(img_path)
+    if bgr is None:
+        raise RuntimeError(f"Could not read image: {img_path}")
+
+    proc = SudokuProcessor(onnx_path=onnx_path, warp_size=450, blank_conf_threshold=0.65)
+
+    board, solved, dbg, overlay = proc.process_image(bgr, overlay=True)
+
+    print_board(board, "Recognized board")
+    if solved is None:
+        print("\nSolver failed (board might contain recognition errors).")
+    else:
+        print_board(solved, "Solved board")
+
+    # Save debug outputs
+    cv.imwrite("artifacts/warped.png", dbg["warped"])
+    if overlay is not None:
+        cv.imwrite("artifacts/overlay_solution.png", overlay)
+        print("\nSaved: artifacts/overlay_solution.png")
+    print("Saved: artifacts/warped.png")
+
+    tim = dbg["timing"]
+    print(
+        f"warp={tim['warp_ms']:.1f} ms | "
+        f"preprocess={tim['preprocess_ms']:.1f} ms | "
+        f"onnx={tim['onnx_ms']:.1f} ms | "
+        f"solve={tim['solve_ms']:.1f} ms | "
+        f"total={tim['total_ms']:.1f} ms"
+    )
+
+if __name__ == "__main__":
+    main()
+```
+
+The sample output will look as follows:
+```output
+python3 05_RunSudokuProcessor.py
+
+Recognized board
+. . . | 7 . . | 6 . .
+. . 4 | . . . | 1 . 9
+. . . | 1 5 . | . . .
+---------------------
+. . . | . 1 . | . . .
+. . . | . . . | . . .
+3 . . | . . . | . 6 .
+---------------------
+7 . . | . . . | . . .
+. . 9 | . . . | . . .
+. . . | . . . | . . .
+
+Solved board
+1 2 3 | 7 4 9 | 6 5 8
+5 6 4 | 2 3 8 | 1 7 9
+8 9 7 | 1 5 6 | 2 3 4
+---------------------
+2 4 5 | 6 1 3 | 8 9 7
+9 1 6 | 4 8 7 | 3 2 5
+3 7 8 | 5 9 2 | 4 6 1
+---------------------
+7 3 1 | 8 2 5 | 9 4 6
+4 5 9 | 3 6 1 | 7 8 2
+6 8 2 | 9 7 4 | 5 1 3
+
+Saved: artifacts/overlay_solution.png
+Saved: artifacts/warped.png
+warp=11.9 ms | preprocess=3.3 ms | onnx=1.9 ms | solve=3.1 ms | total=48.2 ms
+```
+
+## Folder benchmark
+The single-image measurements introduced earlier are useful for understanding the rough structure of the pipeline and for verifying that ONNX inference is not the main computational bottleneck. In our case, batched ONNX inference typically takes less than 2 ms, while grid detection, warping, and preprocessing dominate the runtime. However, individual measurements can be noisy due to caching effects, operating system scheduling, and Python overhead.
+
+To obtain more reliable performance numbers, we extend the evaluation to multiple images and compute aggregated statistics. This allows us to track not only average performance, but also variability and tail latency, which are particularly important for interactive applications.
+
+To do this, we add two helper functions to 05_RunSudokuProcessor.py, and make sure you have import glob and import numpy as np at the top of the runner script.
+
+The first function, summarize, computes basic statistics from a list of timing measurements:
+* mean – average runtime
+* median – robust central tendency
+* p90 / p95 – tail latency (90th and 95th percentiles), which indicate how bad the slow cases are
+
+```python
+def summarize(values):
+    values = np.asarray(values, dtype=np.float64)
+    return {
+        "mean": float(values.mean()),
+        "median": float(np.median(values)),
+        "p90": float(np.percentile(values, 90)),
+        "p95": float(np.percentile(values, 95)),
+    }
+```
+
+The second function, benchmark_folder, runs the full Sudoku pipeline on a collection of images and aggregates timing results across multiple runs:
+
+```python
+def benchmark_folder(proc, folder_glob, limit=100, warmup=10, overlay=False):
+    paths = sorted(glob.glob(folder_glob))
+    if not paths:
+        raise RuntimeError(f"No images matched: {folder_glob}")
+    paths = paths[:limit]
+
+    # Warmup
+    for p in paths[:min(warmup, len(paths))]:
+        bgr = cv.imread(p)
+        if bgr is None:
+            continue
+        proc.process_image(bgr, overlay=overlay)
+
+    # Benchmark
+    agg = {k: [] for k in ["warp_ms", "preprocess_ms", "onnx_ms", "solve_ms", "total_ms"]}
+    solved_cnt = 0
+    total_cnt = 0
+
+    for p in paths:
+        bgr = cv.imread(p)
+        if bgr is None:
+            continue
+
+        board, solved, dbg, _ = proc.process_image(bgr, overlay=overlay)
+        tim = dbg["timing"]
+
+        for k in agg:
+            agg[k].append(tim[k])
+
+        total_cnt += 1
+        if solved is not None:
+            solved_cnt += 1
+
+    print(f"\nSolved {solved_cnt}/{total_cnt} ({(solved_cnt/total_cnt*100.0 if total_cnt else 0):.1f}%)")
+
+    print("\nTiming summary (ms):")
+    for k in ["warp_ms", "preprocess_ms", "onnx_ms", "solve_ms", "total_ms"]:
+        s = summarize(agg[k])
+        print(f"{k:14s}  mean={s['mean']:.2f}  median={s['median']:.2f}  p90={s['p90']:.2f}  p95={s['p95']:.2f}")
+```
+
+Finally, we invoke the benchmark in the main() function:
+
+```python
+def main():
+    onnx_path = os.path.join("artifacts", "sudoku_digitnet.onnx")
+    
+    proc = SudokuProcessor(onnx_path=onnx_path, warp_size=450, blank_conf_threshold=0.65)
+
+    benchmark_folder(proc, "data/grids/val/*_cam.png", limit=30, warmup=10, overlay=False)
+
+if __name__ == "__main__":
+    main()
+```
+
+This evaluates the processor on a representative subset of camera-like validation grids, prints aggregated timing statistics, and reports the overall solve rate.
+
+Aggregated benchmarks provide a much more accurate picture than single measurements, especially when individual stages take only a few milliseconds. By reporting median and tail latencies, you can see whether occasional slow cases exist and whether an optimization truly improves user-perceived performance. Percentiles are particularly useful when a few slow cases exist (e.g., harder solves), because they reveal tail latency. These results form a solid quantitative baseline that you can reuse to evaluate every optimization that follows.
+
+Here is the sample output of the updated script:
+```output
+python3 05_RunSudokuProcessor.py
+
+Solved 30/30 (100.0%)
+
+Timing summary (ms):
+warp_ms         mean=10.25  median=10.27  p90=10.57  p95=10.59
+preprocess_ms   mean=3.01  median=2.98  p90=3.16  p95=3.21
+onnx_ms         mean=1.27  median=1.24  p90=1.30  p95=1.45
+solve_ms        mean=74.76  median=2.02  p90=48.51  p95=74.82
+total_ms        mean=89.41  median=16.97  p90=62.95  p95=89.43
+```
+
+Notice that solve_ms (and therefore total_ms) has a much larger mean than median. This indicates a small number of outliers where the solver takes significantly longer. In practice, this occurs when one or more digits are misrecognized, forcing the backtracking solver to explore many branches before finding a solution (or failing). For interactive applications, median and p95 latency are more informative than the mean, as they better reflect typical user experience.
+
+## ONNX Runtime session optimizations
+Now that you can measure onnx_ms and total_ms, the first low-effort improvement is to enable ONNX Runtime’s built-in graph optimizations and tune CPU threading. These changes do not modify the model, but can reduce inference overhead and improve throughput.
+
+In sudoku_processor.py, update the ONNX Runtime session initialization in __init__ to use SessionOptions:
+```python
+so = ort.SessionOptions()
+so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+
+self.sess = ort.InferenceSession(onnx_path, sess_options=so, providers=list(providers))
+```
+
+Re-run 05_RunSudokuProcessor.py and compare onnx_ms and total_ms to the baseline. 
+
+```output
+python3 05_RunSudokuProcessor.py
+
+Solved 30/30 (100.0%)
+
+Timing summary (ms):
+warp_ms         mean=10.43  median=10.36  p90=10.89  p95=10.96
+preprocess_ms   mean=3.13  median=3.11  p90=3.34  p95=3.42
+onnx_ms         mean=1.28  median=1.26  p90=1.37  p95=1.47
+solve_ms        mean=78.61  median=2.01  p90=50.15  p95=77.87
+total_ms        mean=93.58  median=17.06  p90=65.10  p95=92.55
+```
+
+This result is expected for such a small model: ONNX inference is already efficient, and the dominant costs lie in image preprocessing and occasional solver backtracking. This highlights why system-level profiling is essential before focusing on model-level optimizations.
+
+## Quantize the model (FP32 -> INT8)
+Quantization is one of the most impactful optimizations for Arm64 and mobile deployments because it reduces both model size and compute cost. For CNNs, the most compatible approach is static INT8 quantization in QDQ format. This uses a small calibration set to estimate activation ranges and typically works well across runtimes.
+
+Create a small script 06_QuantizeModel.py:
+
+```python
+import os, glob
+import numpy as np
+import cv2 as cv
+
+from onnxruntime.quantization import (
+    quantize_static, CalibrationDataReader, QuantFormat, QuantType
+)
+
+ARTI_DIR = "artifacts"
+FP32_PATH = os.path.join(ARTI_DIR, "sudoku_digitnet.onnx") 
+INT8_PATH = os.path.join(ARTI_DIR, "sudoku_digitnet.int8.onnx")
+
+# ---- Calibration data reader ----
+class SudokuCalibReader(CalibrationDataReader):
+    def __init__(self, folder_glob="data/train/0/*.png", limit=500, input_name="input", input_size=28):
+        self.input_name = input_name
+        self.input_size = input_size
+
+        paths = sorted(glob.glob(folder_glob))[:limit]
+        self._iter = iter(paths)
+
+    def get_next(self):
+        try:
+            p = next(self._iter)
+        except StopIteration:
+            return None
+
+        g = cv.imread(p, cv.IMREAD_GRAYSCALE)
+        if g is None:
+            return self.get_next()
+
+        g = cv.resize(g, (self.input_size, self.input_size), interpolation=cv.INTER_AREA)
+        x = g.astype(np.float32) / 255.0
+        x = (x - 0.5) / 0.5
+        x = x[None, None, :, :]  # [1,1,28,28]
+        return {self.input_name: x}
+
+# ---- Run quantization ----
+reader = SudokuCalibReader(folder_glob="data/train/*/*.png", limit=1000)
+
+print("Quantizing (QDQ static INT8)...")
+quantize_static(
+    model_input=FP32_PATH,
+    model_output=INT8_PATH,
+    calibration_data_reader=reader,
+    quant_format=QuantFormat.QDQ,          # key: keep Conv as Conv with Q/DQ wrappers
+    activation_type=QuantType.QInt8,
+    weight_type=QuantType.QInt8,
+    per_channel=True                       # usually helps conv accuracy
+)
+
+print("Saved:", INT8_PATH)
+```
+
+Run python 06_QuantizeModel.py
+
+Then update the runner script to point to the quantized model:
+
+```python
+onnx_path = os.path.join("artifacts", "sudoku_digitnet.int8.onnx")
+```
+
+Re-run the processor and compare:
+* onnx_ms (should improve or remain similar)
+* total_ms
+* solve success (should remain stable)
+
+Also compare file sizes:
+```console
+ls -lh artifacts/sudoku_digitnet.onnx artifacts/sudoku_digitnet.int8.onnx
+```
+Even when inference time changes only modestly, size reduction is typically significant and matters for Android packaging. 
+
+In this pipeline, quantization primarily reduces model size and improves deployability, while runtime speedups may be modest because inference is already a small fraction of the total latency.
+
+## Preprocessing-focused optimizations (highest impact)
+The measurements above show that ONNX inference accounts for only a small fraction of the total runtime. In practice, the largest performance gains come from optimizing image preprocessing.
+
+The most effective improvements include:
+- Converting the rectified board to grayscale **once**, instead of converting each cell independently.
+- Adding an early “blank cell” check to skip expensive thresholding and morphology for empty cells.
+- Using simpler thresholding (e.g., Otsu) on clean images, and reserving adaptive thresholding for difficult lighting conditions.
+- Reducing or conditionally disabling morphological operations when cells already appear clean.
+
+These changes typically reduce `preprocess_ms` more than any model-level optimization, and therefore have the greatest impact on end-to-end latency.
+
+## Summary
+In this section, we transformed the Sudoku solver from a functional prototype into a system with measurable, well-understood performance characteristics. By instrumenting the pipeline with fine-grained timing, we identified where computation is actually spent and established a quantitative baseline.
+
+We showed that:
+- Batched ONNX inference is already efficient (≈1–2 ms per board).
+- Image preprocessing dominates runtime and offers the largest optimization potential.
+- Solver backtracking introduces rare but significant tail-latency outliers.
+- ONNX Runtime optimizations and INT8 quantization improve deployability, even when raw inference speed gains are modest.
+
+Most importantly, we demonstrated a systematic optimization workflow: **measure first, optimize second, and always re-validate correctness**. With performance, robustness, and accuracy validated, the Sudoku pipeline is now ready for its final step—deployment as a fully on-device Android application.
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/08_android.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/08_android.md
new file mode 100644
index 0000000000..b87e820384
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/08_android.md
@@ -0,0 +1,1059 @@
+---
+# User change
+title: "Android Deployment. From Model to App"
+
+weight: 9
+
+layout: "learningpathall"
+---
+
+## Objective ##
+In this section, we transition from a desktop prototype to a fully on-device Android application. The goal is to demonstrate how the optimized Sudoku pipeline—image preprocessing, ONNX inference, and deterministic solving—can be packaged and executed entirely on a mobile device, without relying on any cloud services.
+
+Rather than starting with a live camera feed, we begin with a fixed input bitmap that was generated earlier in the learning path. This approach allows us to focus on correctness, performance, and integration details before introducing additional complexity such as camera permissions, real-time capture, and varying lighting conditions. By keeping the input controlled, we can verify that the Android implementation faithfully reproduces the behavior observed in Python.
+
+Over the course of this section, we will:
+1. Create a new Android project and add the required dependencies.
+2. Bundle the trained ONNX model and a sample Sudoku image with the application.
+3. Implement a minimal user interface that loads the image and triggers the solver.
+4. Re-implement the Sudoku processing pipeline on Android, including preprocessing, batched ONNX inference, and solving.
+5. Display the solved result as an image, confirming that the entire pipeline runs locally on the device.
+
+By the end of this section, you will have a working Android app that takes a Sudoku image, runs neural network inference and solving on-device, and displays the solution. This completes the learning path by showing how a trained and optimized ONNX model can be deployed in a real mobile application, closing the loop from data generation and training to practical, end-user deployment.
+
+## Project creation
+We start by creating a new Android project using Android Studio. This project will host the Sudoku solver application and serve as the foundation for integrating ONNX Runtime and OpenCV.
+
+1. Create a new project:
+* Open Android Studio and click New Project.
+* In the Templates screen, select Phone and Tablet, then choose Empty Views Activity.
+![img3](figures/03.png)
+
+This template creates a minimal Android application without additional UI components, which is ideal for a focused, step-by-step integration.
+
+* Click Next to proceed to the project configuration screen.
+
+2. Configure the project.  In the configuration screen, fill in the fields as follows:
+* Name: SudokuSolverOnnx. This is the application name that will appear in Android Studio and on the device.
+* Package name: com.arm.sudokusolveronnx. This package name clearly reflects the purpose of the app and its use of ONNX on Arm platforms.
+* Save location. Choose a convenient directory on your system (for example, your repositories folder).
+* Language: Kotlin. Kotlin is the recommended language for modern Android development and integrates cleanly with ONNX Runtime APIs.
+* Minimum SDK: API 24 (Android 7.0 – Nougat). This provides wide device coverage while remaining compatible with ONNX Runtime and OpenCV.
+* Build configuration language: Kotlin DSL (build.gradle.kts). We use the Kotlin DSL for Gradle, which is now the recommended option.
+
+![img4](figures/04.png)
+
+* After confirming these settings, click Finish. Android Studio will create the project and generate a basic MainActivity along with the necessary Gradle files.
+
+## View
+We now define the user interface of the Android application. The goal of this view is to remain intentionally simple while clearly exposing the end-to-end Sudoku workflow. The interface will consist of:
+* A button row at the top that allows the user to load a Sudoku image and trigger the solver.
+* A status text area used to display short messages (for example, whether an image has been loaded or the puzzle has been solved).
+* An input image view that displays the selected Sudoku bitmap.
+* An output image view that displays the solved result.
+
+This layout is sufficient to validate that the ONNX model, preprocessing pipeline, and solver are all working correctly on Android before adding more advanced features such as camera input or animations.
+
+To define the view, open the file res/layout/activity_main.xml and replace its contents with the following layout definition:
+```xml
+<?xml version="1.0" encoding="utf-8"?>
+<androidx.constraintlayout.widget.ConstraintLayout
+    xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:app="http://schemas.android.com/apk/res-auto"
+    android:id="@+id/root"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    android:padding="16dp">
+
+    <!-- Pinned header: buttons + status -->
+    <LinearLayout
+        android:id="@+id/header"
+        android:layout_width="0dp"
+        android:layout_height="wrap_content"
+        android:orientation="vertical"
+        app:layout_constraintTop_toTopOf="parent"
+        app:layout_constraintStart_toStartOf="parent"
+        app:layout_constraintEnd_toEndOf="parent">
+
+        <!-- Buttons row -->
+        <LinearLayout
+            android:id="@+id/buttonRow"
+            android:layout_width="match_parent"
+            android:layout_height="wrap_content"
+            android:orientation="horizontal"
+            android:gravity="center_vertical"
+            android:baselineAligned="false">
+
+            <Button
+                android:id="@+id/btnLoadRandom"
+                android:layout_width="0dp"
+                android:layout_height="wrap_content"
+                android:layout_weight="1"
+                android:text="Load image" />
+
+            <Space
+                android:layout_width="12dp"
+                android:layout_height="0dp" />
+
+            <Button
+                android:id="@+id/btnSolve"
+                android:layout_width="0dp"
+                android:layout_height="wrap_content"
+                android:layout_weight="1"
+                android:text="Solve"
+                android:enabled="false" />
+        </LinearLayout>
+
+        <!-- Status -->
+        <TextView
+            android:id="@+id/txtStatus"
+            android:layout_width="match_parent"
+            android:layout_height="wrap_content"
+            android:text="Ready"
+            android:textSize="14sp"
+            android:paddingTop="8dp"
+            android:paddingBottom="8dp" />
+
+    </LinearLayout>
+
+    <!-- Scrollable content (images) -->
+    <ScrollView
+        android:id="@+id/scrollContent"
+        android:layout_width="0dp"
+        android:layout_height="0dp"
+        android:fillViewport="true"
+        app:layout_constraintTop_toBottomOf="@id/header"
+        app:layout_constraintBottom_toBottomOf="parent"
+        app:layout_constraintStart_toStartOf="parent"
+        app:layout_constraintEnd_toEndOf="parent">
+
+        <LinearLayout
+            android:id="@+id/contentRoot"
+            android:layout_width="match_parent"
+            android:layout_height="wrap_content"
+            android:orientation="vertical">
+
+            <!-- Input label -->
+            <TextView
+                android:id="@+id/txtInputLabel"
+                android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
+                android:text="Input"
+                android:textStyle="bold"
+                android:textSize="16sp"
+                android:layout_gravity="center_horizontal"
+                android:paddingTop="8dp"
+                android:paddingBottom="6dp" />
+
+            <!-- Input image -->
+            <ImageView
+                android:id="@+id/imgInput"
+                android:layout_width="match_parent"
+                android:layout_height="420dp"
+                android:contentDescription="Input Sudoku"
+                android:scaleType="centerCrop"
+                android:adjustViewBounds="true"
+                android:background="@android:color/darker_gray" />
+
+            <!-- Solved label -->
+            <TextView
+                android:id="@+id/txtOutputLabel"
+                android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
+                android:text="Solved"
+                android:textStyle="bold"
+                android:textSize="16sp"
+                android:layout_gravity="center_horizontal"
+                android:paddingTop="16dp"
+                android:paddingBottom="6dp" />
+
+            <!-- Output image -->
+            <ImageView
+                android:id="@+id/imgOutput"
+                android:layout_width="match_parent"
+                android:layout_height="420dp"
+                android:contentDescription="Solved Sudoku"
+                android:scaleType="centerCrop"
+                android:adjustViewBounds="true"
+                android:background="@android:color/darker_gray" />
+
+            <!-- Extra bottom padding so last image isn't flush -->
+            <Space
+                android:layout_width="0dp"
+                android:layout_height="16dp" />
+
+        </LinearLayout>
+    </ScrollView>
+
+</androidx.constraintlayout.widget.ConstraintLayout>
+```
+
+This layout uses a ConstraintLayout as the root container to ensure it adapts cleanly across different screen sizes. The UI is split into two parts: a pinned header and a scrollable content area. The pinned header at the top contains a horizontal button row with two equally sized buttons:
+* Load image randomly selects one of the bundled Sudoku bitmaps and displays it in the Input view.
+* Solve triggers the inference and solving pipeline (it starts disabled and becomes enabled after an image is loaded).
+* Directly below the buttons, a status text field provides quick feedback to the user (for example, whether an image has been loaded or the solver is running).
+
+Below the header, the screen contains a ScrollView that holds the image content:
+* The Input section displays the selected Sudoku bitmap.
+* The Solved section displays the output image produced after running inference and solving.
+
+Because the image area is scrollable, the layout remains usable even on smaller screens, while the buttons and status remain accessible at all times.
+
+When rendered, this produces a clear, vertically structured interface with a fixed control panel at the top and large input/output images underneath, as shown in the figure below.
+
+![img](figures/05.png)
+
+At this stage, the UI is intentionally minimal. In the next step, we will connect this view to the application logic in MainActivity, load a sample Sudoku bitmap, and wire up the Load image and Solve buttons to the ONNX-based processing pipeline.
+
+## Preparing input images for the Android app
+Before wiring the application logic, we need to provide the Android app with a small set of Sudoku images that it can load and solve. For this learning path, we deliberately use a fixed collection of pre-generated images (from Preparing a Synthetic Sudoku Digit Dataset) instead of a camera feed. This keeps the Android integration simple and allows us to focus on ONNX inference and solver integration first.
+
+1. Select Sudoku images. From the earlier Python steps, select a small number of generated Sudoku images. These can be either:
+* Clean grids (book-style), or
+* Camera-like grids (with perspective distortion and noise).
+
+For example, you might choose:
+data/grids/val/000000_cam.png 
+data/grids/val/000000_clean.png
+...
+
+Using both clean and camera-like images is useful later for testing robustness.
+
+2. Rename images to Android-friendly names. Android resource names must follow strict rules:
+* lowercase letters only,
+* numbers allowed,
+* underscores allowed,
+* no spaces or hyphens.
+
+Rename your files accordingly, for example:
+* sudoku_01.png
+* sudoku_cam_01.png
+
+3. Copy the renamed PNG files into the following directory of your Android project:
+
+app/src/main/res/drawable/
+
+After copying, Android Studio will automatically generate resource IDs for these images.
+
+4. Once the images are in place:
+* Let Android Studio finish indexing and syncing.
+* Open the Project view and navigate to res/drawable/.
+* Verify that the images appear without errors.
+
+You should now be able to reference these images in Kotlin code using identifiers such as:
+
+R.drawable.sudoku_01
+R.drawable.sudoku_cam_01
+
+In this tutorial, the Load image button will randomly select one of these drawable resources and display it in the app. This provides a deterministic and repeatable input source while validating the full Sudoku pipeline on Android.
+
+At this point, the Android project has all the static resources it needs. In the next step, we will implement MainActivity.kt, wire up the Load image and Solve buttons, and display the selected Sudoku image in the UI.
+
+## Preparing the ONNX model for the Android app
+In addition to the input images, the Android application needs access to the trained ONNX model so that it can run inference directly on the device. Android does not allow arbitrary file access by default, so the model must be bundled with the app as an asset.
+
+1. From the previous optimization steps, you should have at least one ONNX model available in your Python project, for example:
+* sudoku_digitnet.onnx (FP32 model), or
+* sudoku_digitnet.int8.onnx (INT8 quantized model, if supported on your target device).
+
+For the initial Android integration, it is recommended to start with the FP32 model, as it offers the broadest compatibility. You can switch to the quantized model later once everything is working end-to-end.
+
+2. In your Android project, create the following directory if it does not already exist:
+
+app/src/main/assets/
+
+If you prefer to keep assets organized, you can also create a subfolder:
+
+app/src/main/assets/models/
+
+Both approaches work. In the examples that follow, we will assume the model is placed directly under assets/.
+
+3. Prepare the model for android such that it does not contain external resources. Android assets are not normal filesystem paths, so models that reference external.onnx.data files will fail to load unless they are merged into a single ONNX file. To do so, create another Python file 07_PrepareModelForAndroid.py:
+```python
+import onnx
+from onnx import external_data_helper
+
+IN_PATH  = "artifacts/sudoku_digitnet.onnx"
+OUT_PATH = "artifacts/sudoku_digitnet_android.onnx"
+
+model = onnx.load(IN_PATH)
+
+# If the model references external data, load it into the model object
+external_data_helper.load_external_data_for_model(model, base_dir="artifacts")
+
+# Clear external locations so it can be saved as a single file
+for init in model.graph.initializer:
+    if init.data_location == onnx.TensorProto.EXTERNAL:
+        init.data_location = onnx.TensorProto.DEFAULT
+        # Remove external data metadata entries
+        del init.external_data[:]
+
+# Save as a single-file ONNX
+onnx.save_model(model, OUT_PATH, save_as_external_data=False)
+print("Saved:", OUT_PATH)
+```
+
+4. Run the script. Then, copy the selected ONNX model file (sudoku_digitnet_android) into the assets directory, for example:
+```console
+app/src/main/assets/sudoku_digitnet_android.onnx
+```
+
+or, if using a subfolder:
+```console
+app/src/main/assets/models/sudoku_digitnet_android.onnx
+```
+5. After copying the model:
+* Let Android Studio finish indexing and syncing the project.
+* In the Project view, expand the assets folder.
+* Verify that the ONNX file appears without any warnings or errors.
+
+6. Assets are accessed via the Android AssetManager. Later in this tutorial, we will load the model using code similar to:
+```console
+assets.open("sudoku_digitnet_android.onnx")
+```
+If you placed the model in a subfolder, include the relative path:
+```console
+assets.open("models/sudoku_digitnet_android.onnx")
+```
+This input stream will be passed to ONNX Runtime to create an inference session on the device. At this point, the Android project contains:
+* A set of Sudoku images in res/drawable/,
+* A trained ONNX model in assets/.
+
+In the next step, we will implement MainActivity.kt, wire up the Load image and Solve buttons, and verify that the app can successfully load both the image and the ONNX model before running inference.
+
+## Implement MainActivity.kt (Load image + basic UI wiring)
+Open app/src/main/java/com/arm/sudokusolveronnx/MainActivity.kt and replace it with:
+
+```kotlin
+package com.arm.sudokusolveronnx
+
+import android.graphics.Bitmap
+import android.graphics.BitmapFactory
+import android.os.Bundle
+import android.widget.Button
+import android.widget.ImageView
+import android.widget.TextView
+import androidx.appcompat.app.AppCompatActivity
+import kotlin.random.Random
+
+class MainActivity : AppCompatActivity() {
+
+    private lateinit var btnLoadRandom: Button
+    private lateinit var btnSolve: Button
+    private lateinit var txtStatus: TextView
+    private lateinit var imgInput: ImageView
+    private lateinit var imgOutput: ImageView
+
+    private var currentBitmap: Bitmap? = null
+
+    // Clean and camera-like pools (you copied these into res/drawable/)
+    private val sudokuCleanImages = listOf(
+        R.drawable.sudoku_01,
+        R.drawable.sudoku_02,
+        R.drawable.sudoku_03,
+        R.drawable.sudoku_04,
+        R.drawable.sudoku_05,
+        R.drawable.sudoku_06,
+        R.drawable.sudoku_07,
+        R.drawable.sudoku_08,
+        R.drawable.sudoku_09,
+        R.drawable.sudoku_10,
+    )
+
+    private val sudokuCamImages = listOf(
+        R.drawable.sudoku_cam_01,
+        R.drawable.sudoku_cam_02,
+        R.drawable.sudoku_cam_03,
+        R.drawable.sudoku_cam_04,
+        R.drawable.sudoku_cam_05,
+        R.drawable.sudoku_cam_06,
+        R.drawable.sudoku_cam_07,
+        R.drawable.sudoku_cam_08,
+        R.drawable.sudoku_cam_09,
+        R.drawable.sudoku_cam_10,
+    )
+
+    override fun onCreate(savedInstanceState: Bundle?) {
+        super.onCreate(savedInstanceState)
+        setContentView(R.layout.activity_main)
+
+        btnLoadRandom = findViewById(R.id.btnLoadRandom)
+        btnSolve = findViewById(R.id.btnSolve)
+        txtStatus = findViewById(R.id.txtStatus)
+        imgInput = findViewById(R.id.imgInput)
+        imgOutput = findViewById(R.id.imgOutput)
+
+        btnSolve.isEnabled = false
+        txtStatus.text = "Ready"
+
+        btnLoadRandom.setOnClickListener {
+            loadRandomSudokuImage(useCameraLike = true)
+        }
+
+        btnSolve.setOnClickListener {
+            txtStatus.text = "Solve clicked (engine not wired yet)"
+            imgOutput.setImageBitmap(currentBitmap) // temporary: mirror input
+        }
+    }
+
+    private fun loadRandomSudokuImage(useCameraLike: Boolean) {
+        val pool = if (useCameraLike) sudokuCamImages else sudokuCleanImages
+        val resId = pool[Random.nextInt(pool.size)]
+
+        val bmp = BitmapFactory.decodeResource(resources, resId)
+        currentBitmap = bmp
+
+        imgInput.setImageBitmap(bmp)
+        imgOutput.setImageDrawable(null)
+
+        btnSolve.isEnabled = true
+        txtStatus.text = if (useCameraLike) "Loaded camera-like Sudoku" else "Loaded clean Sudoku"
+    }
+}
+```
+
+What this gives you immediately
+* Tap Load image. Random sudoku_cam_XX loads into Input
+* Solve becomes enabled
+* Tap Solve. For now it just mirrors the input to output + updates status (We’ll replace that with the real ONNX+OpenCV solver next.)
+
+Run the app now. This verifies your layout IDs are correct and your drawables are packaged properly.
+
+Here is a clean, reader-facing addition you can include in Step 1 (MainActivity wiring) to explain the expected compile error and how to fix it. It’s written in the same instructional tone as the rest of the learning path.
+
+After implementing MainActivity.kt and running the app for the first time, you may encounter a compile-time error. This is expected and related to the Android SDK level used by the project template.
+
+The error occurs because recent versions of Android Studio and its dependencies (in particular androidx.activity and related libraries) require a newer Compile SDK than the default project configuration provides.
+
+Android Studio templates sometimes lag behind the latest library requirements. In this project, we are using up-to-date AndroidX components, which expect the project to be compiled against Android API level 35.
+
+This does not affect which devices your app can run on. It only affects which APIs are available at compile time.
+
+To fix the error, open the Gradle build file for the app module:
+
+app/build.gradle.kts
+
+Update the android {} block so that compileSdk is set to 35, as shown below:
+
+```text
+plugins {
+    alias(libs.plugins.android.application)
+    alias(libs.plugins.kotlin.android)
+}
+
+android {
+    namespace = "com.arm.sudokusolveronnx"
+    compileSdk = 35
+
+    defaultConfig {
+        applicationId = "com.arm.sudokusolveronnx"
+        minSdk = 24
+        targetSdk = 34
+        versionCode = 1
+        versionName = "1.0"
+
+        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+    }
+
+    buildTypes {
+        release {
+            isMinifyEnabled = false
+            proguardFiles(
+                getDefaultProguardFile("proguard-android-optimize.txt"),
+                "proguard-rules.pro"
+            )
+        }
+    }
+
+    compileOptions {
+        sourceCompatibility = JavaVersion.VERSION_11
+        targetCompatibility = JavaVersion.VERSION_11
+    }
+
+    kotlinOptions {
+        jvmTarget = "11"
+    }
+}
+
+dependencies {
+    implementation(libs.androidx.core.ktx)
+    implementation(libs.androidx.appcompat)
+    implementation(libs.material)
+    implementation(libs.androidx.activity)
+    implementation(libs.androidx.constraintlayout)
+
+    testImplementation(libs.junit)
+    androidTestImplementation(libs.androidx.junit)
+    androidTestImplementation(libs.androidx.espresso.core)
+}
+```
+
+After making this change:
+1. Click Sync Now when Android Studio prompts you.
+2. Rebuild and run the app.
+
+The project should now compile and launch successfully.
+
+At this point, the app should start, display the UI, and allow you to load random Sudoku images. In the next step, we will replace the placeholder logic in the Solve button with the real ONNX- and OpenCV-based Sudoku processing engine.
+
+## Processing pipeline on Android
+With the user interface and static resources in place, we can now wire the full Sudoku processing pipeline on Android. Conceptually, this pipeline mirrors the Python implementation developed earlier in the learning path, but is reimplemented using Android-compatible components.
+
+The pipeline consists of four stages:
+1. Grid detection and rectification (OpenCV). The input bitmap is converted to an OpenCV matrix, the Sudoku grid is detected, and a perspective transform is applied to obtain a top-down, square view of the board.
+2. Digit recognition (ONNX Runtime). The rectified grid is split into 81 cells, each cell is preprocessed to match the training distribution, and all cells are passed as a single batch to the ONNX model for digit recognition.
+3. Solving (Kotlin). The recognized board is solved using a deterministic backtracking algorithm. This step is lightweight but can exhibit occasional tail latency when recognition errors introduce ambiguity.
+4. Rendering and overlay. The solution is rendered back onto the original image by inverse-warping a transparent overlay from the rectified grid space to the input image.
+
+### Dependencies
+To support this pipeline, we add three dependencies:
+* ONNX Runtime for on-device inference,
+* OpenCV for image processing and geometric transformations,
+* Kotlin coroutines to ensure that heavy computation runs off the UI thread.
+
+We open build.gradle.kts and add the following
+```text
+dependencies {
+    implementation("com.microsoft.onnxruntime:onnxruntime-android:1.18.0")
+    implementation("org.opencv:opencv:4.10.0")
+    implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.8.1")
+}
+```
+
+Then sync Gradle. Using the Maven dependency keeps the setup simple for this tutorial.
+
+Then, make sure you have:
+```console
+app/src/main/assets/sudoku_digitnet_android.onnx
+```
+
+### Core components
+The Android implementation is organized into three small, focused components:
+1. SudokuSolver.kt. Implements a classic backtracking Sudoku solver. This logic is deterministic and independent of the machine learning model.
+2. SudokuEngine.kt. Encapsulates the full vision and inference pipeline. It loads the ONNX model from assets, performs grid detection, preprocessing, batched inference, solving, and overlay generation.
+3. BoardRenderer.kt. Provides a utility to render a clean Sudoku grid bitmap. This is useful for debugging and for visualizing results independent of the original image.
+
+This separation keeps the codebase readable and makes it easy to extend or replace individual stages later.
+
+Create these Kotlin files under:
+```console
+app/src/main/java/com/arm/sudokusolveronnx/
+```
+
+* SudokuSolver.kt (backtracking)
+
+```kotlin
+package com.arm.sudokusolveronnx
+
+object SudokuSolver {
+    fun solve(board: Array<IntArray>): Boolean {
+        val pos = findEmpty(board) ?: return true
+        val r = pos.first
+        val c = pos.second
+        for (v in 1..9) {
+            if (isValid(board, r, c, v)) {
+                board[r][c] = v
+                if (solve(board)) return true
+                board[r][c] = 0
+            }
+        }
+        return false
+    }
+
+    private fun findEmpty(board: Array<IntArray>): Pair<Int, Int>? {
+        for (r in 0 until 9) for (c in 0 until 9) if (board[r][c] == 0) return r to c
+        return null
+    }
+
+    private fun isValid(board: Array<IntArray>, r: Int, c: Int, v: Int): Boolean {
+        for (j in 0 until 9) if (board[r][j] == v) return false
+        for (i in 0 until 9) if (board[i][c] == v) return false
+        val br = (r / 3) * 3
+        val bc = (c / 3) * 3
+        for (i in br until br + 3) for (j in bc until bc + 3) if (board[i][j] == v) return false
+        return true
+    }
+}
+```
+
+* SudokuEngine.kt (OpenCV + ORT inference)
+
+```kotlin
+package com.arm.sudokusolveronnx
+
+import ai.onnxruntime.OnnxTensor
+import ai.onnxruntime.OrtEnvironment
+import ai.onnxruntime.OrtSession
+import android.content.Context
+import android.graphics.Bitmap
+import org.opencv.android.Utils
+import org.opencv.core.*
+import org.opencv.imgproc.Imgproc
+import java.nio.FloatBuffer
+
+class SudokuEngine(
+    private val context: Context,
+    private val modelAssetName: String = "sudoku_digitnet_android.onnx",
+    private val warpSize: Int = 450,
+    private val inputSize: Int = 28,
+    private val blankConfThreshold: Float = 0.65f
+) {
+    private val env: OrtEnvironment = OrtEnvironment.getEnvironment()
+    private val session: OrtSession
+
+    init {
+        val modelBytes = context.assets.open(modelAssetName).use { it.readBytes() }
+        val opts = OrtSession.SessionOptions()
+        session = env.createSession(modelBytes, opts)
+    }
+
+    private data class WarpResult(
+        val warped: Mat,
+        val H: Mat  // perspective transform from original -> warped
+    )
+
+    data class Result(
+        val recognized: Array<IntArray>,
+        val solved: Array<IntArray>?,
+        val solvedBitmap: Bitmap?,
+        val overlayBitmap: Bitmap?
+    )
+
+    fun solveBitmap(input: Bitmap): Result {
+        // Bitmap -> Mat (BGR/RGBA depending on Utils, but works for our pipeline)
+        val bgr = Mat()
+        Utils.bitmapToMat(input, bgr)
+
+        val warp = detectAndWarp(bgr) ?: return Result(emptyBoard(), null, null, null)
+
+        val board = recognizeBoard(warp.warped)
+        val solved = board.map { it.clone() }.toTypedArray()
+        val ok = SudokuSolver.solve(solved)
+
+        return if (ok) {
+            val solvedGrid = BoardRenderer.render(solved) // keep if you still want it
+            val overlay = makeOverlayBitmap(input, warp.H, board, solved)
+            Result(board, solved, solvedGrid, overlay)
+        } else {
+            Result(board, null, null, null)
+        }
+    }
+
+    private fun emptyBoard(): Array<IntArray> = Array(9) { IntArray(9) }
+
+    private fun detectAndWarp(bgr: Mat): WarpResult? {
+        val gray = Mat()
+        Imgproc.cvtColor(bgr, gray, Imgproc.COLOR_BGR2GRAY)
+        Imgproc.GaussianBlur(gray, gray, Size(5.0, 5.0), 0.0)
+
+        val thr = Mat()
+        Imgproc.adaptiveThreshold(
+            gray, thr, 255.0,
+            Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C,
+            Imgproc.THRESH_BINARY_INV,
+            31, 7.0
+        )
+
+        val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, Size(3.0, 3.0))
+        Imgproc.morphologyEx(thr, thr, Imgproc.MORPH_CLOSE, kernel, Point(-1.0, -1.0), 2)
+
+        val contours = ArrayList<MatOfPoint>()
+        Imgproc.findContours(thr, contours, Mat(), Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE)
+        if (contours.isEmpty()) return null
+        contours.sortByDescending { Imgproc.contourArea(it) }
+
+        var quad: MatOfPoint2f? = null
+        for (i in 0 until minOf(20, contours.size)) {
+            val c = contours[i]
+            val peri = Imgproc.arcLength(MatOfPoint2f(*c.toArray()), true)
+            val approx = MatOfPoint2f()
+            Imgproc.approxPolyDP(MatOfPoint2f(*c.toArray()), approx, 0.02 * peri, true)
+            if (approx.total().toInt() == 4) {
+                quad = approx
+                break
+            }
+        }
+        if (quad == null) return null
+
+        val pts = orderQuad(quad.toArray())
+        val dst = arrayOf(
+            Point(0.0, 0.0),
+            Point((warpSize - 1).toDouble(), 0.0),
+            Point((warpSize - 1).toDouble(), (warpSize - 1).toDouble()),
+            Point(0.0, (warpSize - 1).toDouble())
+        )
+
+        val M = Imgproc.getPerspectiveTransform(MatOfPoint2f(*pts), MatOfPoint2f(*dst))
+        val warped = Mat()
+        Imgproc.warpPerspective(bgr, warped, M, Size(warpSize.toDouble(), warpSize.toDouble()))
+        return WarpResult(warped = warped, H = M)
+    }
+
+    private fun orderQuad(pts: Array<Point>): Array<Point> {
+        // order: TL, TR, BR, BL
+        val sum = pts.map { it.x + it.y }
+        val diff = pts.map { it.x - it.y }
+        val tl = pts[sum.indices.minBy { sum[it] }]
+        val br = pts[sum.indices.maxBy { sum[it] }]
+        val tr = pts[diff.indices.maxBy { diff[it] }]
+        val bl = pts[diff.indices.minBy { diff[it] }]
+        return arrayOf(tl, tr, br, bl)
+    }
+
+    private fun recognizeBoard(warpedBgr: Mat): Array<IntArray> {
+        val step = warpSize / 9
+        val inputs = FloatArray(81 * 1 * inputSize * inputSize)
+
+        var idx = 0
+        for (r in 0 until 9) {
+            for (c in 0 until 9) {
+                val cell = warpedBgr.submat(r * step, (r + 1) * step, c * step, (c + 1) * step)
+                val tensor = preprocessCell(cell) // FloatArray length = 1*28*28
+                System.arraycopy(tensor, 0, inputs, idx * inputSize * inputSize, inputSize * inputSize)
+                idx++
+            }
+        }
+
+        val shape = longArrayOf(81, 1, inputSize.toLong(), inputSize.toLong())
+        val fb = FloatBuffer.wrap(inputs)
+        val inputTensor = OnnxTensor.createTensor(env, fb, shape)
+
+        val out = session.run(mapOf("input" to inputTensor))
+        val logits = out[0].value as Array<FloatArray> // [81][10]
+        out.close()
+        inputTensor.close()
+
+        val board = Array(9) { IntArray(9) }
+        for (i in 0 until 81) {
+            val probs = softmax(logits[i])
+            var bestK = 0
+            var bestV = probs[0]
+            for (k in 1 until probs.size) {
+                if (probs[k] > bestV) { bestV = probs[k]; bestK = k }
+            }
+            val r = i / 9
+            val c = i % 9
+            board[r][c] = if (bestV < blankConfThreshold) 0 else bestK
+        }
+        return board
+    }
+
+    private fun preprocessCell(cellBgr: Mat): FloatArray {
+        val gray = Mat()
+        Imgproc.cvtColor(cellBgr, gray, Imgproc.COLOR_BGR2GRAY)
+
+        val m = (0.12 * minOf(gray.rows(), gray.cols())).toInt()
+        val cropped = gray.submat(m, gray.rows() - m, m, gray.cols() - m)
+
+        val resized = Mat()
+        Imgproc.resize(cropped, resized, Size(inputSize.toDouble(), inputSize.toDouble()), 0.0, 0.0, Imgproc.INTER_AREA)
+
+        val out = FloatArray(inputSize * inputSize)
+        var k = 0
+        for (y in 0 until inputSize) {
+            for (x in 0 until inputSize) {
+                val v = resized.get(y, x)[0].toFloat() / 255f
+                out[k++] = (v - 0.5f) / 0.5f
+            }
+        }
+        return out
+    }
+
+    private fun softmax(x: FloatArray): FloatArray {
+        var max = x[0]
+        for (v in x) if (v > max) max = v
+        val e = FloatArray(x.size)
+        var sum = 0f
+        for (i in x.indices) {
+            val v = kotlin.math.exp((x[i] - max).toDouble()).toFloat()
+            e[i] = v
+            sum += v
+        }
+        for (i in e.indices) e[i] /= (sum + 1e-12f)
+        return e
+    }
+
+    private fun makeOverlayBitmap(
+        originalBitmap: Bitmap,
+        H: Mat,
+        recognized: Array<IntArray>,
+        solved: Array<IntArray>
+    ): Bitmap {
+        // Convert original to Mat (could be RGBA on Android)
+        val original = Mat()
+        Utils.bitmapToMat(originalBitmap, original)
+
+        // Ensure original is BGR (3 channels)
+        val originalBgr = Mat()
+        if (original.channels() == 4) {
+            Imgproc.cvtColor(original, originalBgr, Imgproc.COLOR_RGBA2BGR)
+        } else {
+            original.copyTo(originalBgr)
+        }
+
+        // Create layer in warped space (BGR)
+        val layer = Mat.zeros(warpSize, warpSize, CvType.CV_8UC3)
+        val step = warpSize / 9
+        for (r in 0 until 9) {
+            for (c in 0 until 9) {
+                if (recognized[r][c] != 0) continue
+                val d = solved[r][c]
+                val x = (c * step + step * 0.32).toInt()
+                val y = (r * step + step * 0.72).toInt()
+                Imgproc.putText(
+                    layer, d.toString(),
+                    Point(x.toDouble(), y.toDouble()),
+                    Imgproc.FONT_HERSHEY_SIMPLEX,
+                    1.2,
+                    Scalar(0.0, 200.0, 0.0), // green in BGR
+                    2,
+                    Imgproc.LINE_AA
+                )
+            }
+        }
+
+        // Inverse warp to original size (BGR)
+        val invH = Mat()
+        Core.invert(H, invH)
+
+        val back = Mat.zeros(originalBgr.size(), CvType.CV_8UC3)
+        Imgproc.warpPerspective(layer, back, invH, originalBgr.size())
+
+        // Mask where back has pixels
+        val mask = Mat()
+        Imgproc.cvtColor(back, mask, Imgproc.COLOR_BGR2GRAY)
+        Imgproc.threshold(mask, mask, 1.0, 255.0, Imgproc.THRESH_BINARY)
+
+        // Blend (same size + same channels)
+        val blended = Mat()
+        Core.addWeighted(originalBgr, 0.6, back, 0.4, 0.0, blended)
+
+        // Copy only where mask is present
+        val outBgr = originalBgr.clone()
+        blended.copyTo(outBgr, mask)
+
+        // Convert back to bitmap (need RGBA for Android Bitmap)
+        val outRgba = Mat()
+        Imgproc.cvtColor(outBgr, outRgba, Imgproc.COLOR_BGR2RGBA)
+
+        val outBmp = Bitmap.createBitmap(originalBitmap.width, originalBitmap.height, Bitmap.Config.ARGB_8888)
+        Utils.matToBitmap(outRgba, outBmp)
+        return outBmp
+    }
+}
+```
+
+* BoardRenderer.kt
+```
+package com.arm.sudokusolveronnx
+
+import android.graphics.*
+
+object BoardRenderer {
+    fun render(board: Array<IntArray>, cell: Int = 80, margin: Int = 24): Bitmap {
+        val size = 9 * cell + 2 * margin
+        val bmp = Bitmap.createBitmap(size, size, Bitmap.Config.ARGB_8888)
+        val canvas = Canvas(bmp)
+        canvas.drawColor(Color.WHITE)
+
+        val thin = Paint().apply { color = Color.BLACK; strokeWidth = 2f; isAntiAlias = true }
+        val thick = Paint().apply { color = Color.BLACK; strokeWidth = 6f; isAntiAlias = true }
+
+        // grid
+        for (i in 0..9) {
+            val p = if (i % 3 == 0) thick else thin
+            val x = (margin + i * cell).toFloat()
+            val y = (margin + i * cell).toFloat()
+            canvas.drawLine(x, margin.toFloat(), x, (margin + 9 * cell).toFloat(), p)
+            canvas.drawLine(margin.toFloat(), y, (margin + 9 * cell).toFloat(), y, p)
+        }
+
+        val textPaint = Paint().apply {
+            color = Color.BLACK
+            textSize = (cell * 0.62f)
+            isAntiAlias = true
+            textAlign = Paint.Align.CENTER
+            typeface = Typeface.create(Typeface.SANS_SERIF, Typeface.BOLD)
+        }
+
+        val fm = textPaint.fontMetrics
+        val textYOffset = (fm.ascent + fm.descent) / 2f
+
+        for (r in 0 until 9) {
+            for (c in 0 until 9) {
+                val v = board[r][c]
+                if (v == 0) continue
+                val cx = margin + c * cell + cell / 2f
+                val cy = margin + r * cell + cell / 2f - textYOffset
+                canvas.drawText(v.toString(), cx, cy, textPaint)
+            }
+        }
+        return bmp
+    }
+}
+```
+
+### Overlay rendering
+Instead of simply rendering a solved grid, the application overlays the missing digits directly onto the original Sudoku image. This is achieved by drawing the solution in the rectified grid space and then mapping it back to the input image using the inverse perspective transform.
+
+Only cells that were originally empty are filled, and the solution digits are rendered in green to distinguish them from the original puzzle. This approach closely matches how real-world Sudoku solver apps present results and provides an intuitive visual confirmation that the pipeline is working correctly.
+
+## MainActivity integration
+MainActivity acts as a thin integration layer between the UI and the processing engine. Its responsibilities are intentionally minimal:
+* loading a random Sudoku image from resources,
+* invoking the solver on a background thread,
+* updating the UI with the solved result or an error message.
+
+All heavy computation is delegated to SudokuEngine, which ensures that the UI remains responsive during processing.
+
+```Kotlin
+package com.arm.sudokusolveronnx
+
+import android.graphics.Bitmap
+import android.graphics.BitmapFactory
+import android.os.Bundle
+import android.widget.Button
+import android.widget.ImageView
+import android.widget.TextView
+import androidx.appcompat.app.AppCompatActivity
+import androidx.lifecycle.lifecycleScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
+import kotlin.random.Random
+import org.opencv.android.OpenCVLoader
+class MainActivity : AppCompatActivity() {
+
+    private lateinit var btnLoadRandom: Button
+    private lateinit var btnSolve: Button
+    private lateinit var txtStatus: TextView
+    private lateinit var imgInput: ImageView
+    private lateinit var imgOutput: ImageView
+    private lateinit var engine: SudokuEngine
+
+    private var currentBitmap: Bitmap? = null
+
+    // Clean and camera-like pools (you copied these into res/drawable/)
+    private val sudokuCleanImages = listOf(
+        R.drawable.sudoku_01,
+        R.drawable.sudoku_02,
+        R.drawable.sudoku_03,
+        R.drawable.sudoku_04,
+        R.drawable.sudoku_05,
+        R.drawable.sudoku_06,
+        R.drawable.sudoku_07,
+        R.drawable.sudoku_08,
+        R.drawable.sudoku_09,
+        R.drawable.sudoku_10,
+    )
+
+    private val sudokuCamImages = listOf(
+        R.drawable.sudoku_cam_01,
+        R.drawable.sudoku_cam_02,
+        R.drawable.sudoku_cam_03,
+        R.drawable.sudoku_cam_04,
+        R.drawable.sudoku_cam_05,
+        R.drawable.sudoku_cam_06,
+        R.drawable.sudoku_cam_07,
+        R.drawable.sudoku_cam_08,
+        R.drawable.sudoku_cam_09,
+        R.drawable.sudoku_cam_10,
+    )
+
+    override fun onCreate(savedInstanceState: Bundle?) {
+        super.onCreate(savedInstanceState)
+        setContentView(R.layout.activity_main)
+
+        btnLoadRandom = findViewById(R.id.btnLoadRandom)
+        btnSolve = findViewById(R.id.btnSolve)
+        txtStatus = findViewById(R.id.txtStatus)
+        imgInput = findViewById(R.id.imgInput)
+        imgOutput = findViewById(R.id.imgOutput)
+
+        btnSolve.isEnabled = false
+
+        val ok = OpenCVLoader.initLocal()
+        txtStatus.text = if (ok) "OpenCV ready" else "OpenCV init failed"
+
+        engine = SudokuEngine(this)
+
+        btnLoadRandom.setOnClickListener {
+            loadRandomSudokuImage(useCameraLike = true)
+        }
+
+        btnSolve.setOnClickListener {
+            val bmp = currentBitmap ?: return@setOnClickListener
+            txtStatus.text = "Solving..."
+            btnSolve.isEnabled = false
+
+            lifecycleScope.launch {
+                val result = withContext(Dispatchers.Default) {
+                    engine.solveBitmap(bmp)
+                }
+
+                if (result.overlayBitmap != null) {
+                    imgOutput.setImageBitmap(result.overlayBitmap)
+                    txtStatus.text = "Solved"
+                } else {
+                    txtStatus.text = "Solve failed (recognition errors)"
+                }
+
+                btnSolve.isEnabled = true
+            }
+        }
+    }
+
+    private fun loadRandomSudokuImage(useCameraLike: Boolean) {
+        val pool = if (useCameraLike) sudokuCamImages else sudokuCleanImages
+        val resId = pool[Random.nextInt(pool.size)]
+
+        val bmp = BitmapFactory.decodeResource(resources, resId)
+        currentBitmap = bmp
+
+        imgInput.setImageBitmap(bmp)
+        imgOutput.setImageDrawable(null)
+
+        btnSolve.isEnabled = true
+        txtStatus.text = if (useCameraLike) "Loaded camera-like Sudoku" else "Loaded clean Sudoku"
+    }
+}
+```
+
+Depending on your OpenCV packaging, you may use initLocal() or initDebug().
+
+## Testing the application
+With the full pipeline integrated, the application can now be tested end-to-end on an Android device or emulator.
+
+To test the app:
+1. Tap Load image to randomly select one of the bundled Sudoku bitmaps (clean or camera-like).
+2. The selected image is displayed in the Input section.
+3. Tap Solve to run the complete on-device pipeline:
+* OpenCV detects and rectifies the Sudoku grid,
+* the ONNX model performs batched digit recognition,
+* the Sudoku solver reconstructs and solves the board,
+* the solution is overlaid back onto the original image.
+
+The figures below show two representative test cases. In each example, the upper image corresponds to the original Sudoku puzzle, while the lower image shows the same puzzle with the missing digits filled in and overlaid in green. This visual comparison confirms that grid detection, digit recognition, solving, and rendering are all functioning correctly on-device.
+
+![img](figures/06.png)
+![img](figures/07.png)
+
+These tests demonstrate that the application is robust to perspective distortion and partial digit placement, and that the model performs reliably when deployed via ONNX Runtime on Android.
+
+## Summary and next steps
+In this learning path, you have built a complete, end-to-end workflow for deploying machine learning models with ONNX on Arm64 and mobile devices. Starting from model development in Python, you moved step by step through export, optimization, and integration, ultimately deploying a fully functional solution that runs entirely on an Android device.
+
+Along the way, you trained and exported a neural network to the ONNX format, explored how to optimize inference for edge deployment, and built a robust vision pipeline using OpenCV. You then brought these components together on Android by integrating ONNX Runtime and implementing a Sudoku solver that performs image preprocessing, neural network inference, and deterministic solving entirely on-device, without any cloud dependency.
+
+It is also important to recognize the current limitations of the approach. While the system performs well on most test images, there are cases where digit recognition may fail or produce ambiguous results—particularly under challenging lighting conditions, strong perspective distortion, or when digits are faint or partially occluded. In such cases, recognition errors can propagate to the solver, leading to longer solve times or, occasionally, failure to find a valid solution. These limitations are typical for lightweight, on-device vision systems and highlight the trade-offs between model complexity, robustness, and performance at the edge.
+
+Despite these constraints, the application stands as a practical and self-contained example of edge AI deployment. It demonstrates how ONNX can serve as a bridge between model development and real-world deployment, enabling the same model to move seamlessly from a desktop environment to a mobile platform.
+
+From here, there are many natural directions for improvement. You could enhance robustness by incorporating additional training data or more advanced preprocessing, extend the app to use live camera input with CameraX, refine the user experience with animations and progress feedback, or experiment with quantized models on devices that support additional execution providers. The same architectural pattern can also be applied beyond Sudoku, to other document- or grid-based vision problems where lightweight, on-device inference is essential.
+
+This concludes the learning path and provides a solid foundation for building, optimizing, and deploying ONNX-based machine learning applications on Arm64 and mobile platforms.
+
+## Companion code
+You can find the companion code in these repositories:
+1. [Sudoku solver](https://github.com/dawidborycki/SudokuSolverOnnx.git)
+2. [Python scripts](https://github.com/dawidborycki/ONNX-LP.git)
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/_index.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/_index.md
new file mode 100644
index 0000000000..b1d1e756f2
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/_index.md
@@ -0,0 +1,70 @@
+---
+title: "ONNX in Action: Building, Optimizing, and Deploying Models on Arm64 and Mobile"
+
+draft: true
+cascade:
+    draft: true
+    
+minutes_to_complete: 240
+
+who_is_this_for: This is an introductory topic for developers who are interested in creating, optimizing, and deploying machine learning models with ONNX. It is especially useful for those targeting Arm64-based devices (such as Raspberry Pi, mobile SoCs, or Android smartphones) and looking to run efficient inference at the edge.
+
+learning_objectives:
+  - Describe what ONNX is, and what it can offer in the ML ecosystem.
+  - Build and export a simple neural network model in Python to ONNX format.
+  - Perform inference and training using ONNX Runtime.
+  - Apply optimization techniques to improve performance.
+  - Deploy an optimized ONNX model inside an Android app.
+
+prerequisites:
+  - A development machine with Python 3.10+ installed.
+  - Basic familiarity with PyTorch or TensorFlow.
+  - An Arm64 device (e.g., Raspberry Pi or Android smartphone).
+  - "[Android Studio](https://developer.android.com/studio) installed for deployment testing."
+
+author: Dawid Borycki
+
+### Tags
+skilllevels: Introductory
+subjects: ML
+armips:
+  - Cortex-A
+  - Neoverse
+operatingsystems:
+  - Windows
+  - Linux
+  - macOS
+tools_software_languages:
+  - Python
+  - PyTorch
+  - TensorFlow
+  - ONNX
+  - Android
+  - Android Studio
+  - Kotlin
+  - Java
+
+further_reading:
+  - resource:
+      title: ONNX
+      link: https://onnx.ai
+      type: documentation
+  - resource:
+      title: ONNX Runtime
+      link: https://onnxruntime.ai
+      type: documentation
+  - resource:
+      title: Getting Started with ONNX Runtime on Mobile
+      link: https://onnxruntime.ai/docs/tutorials/mobile
+      type: tutorial
+  - resource:
+      title: Optimizing Models with ONNX Runtime
+      link: https://onnxruntime.ai/docs/performance/model-optimizations.html
+      type: documentation
+
+### FIXED, DO NOT MODIFY
+# ================================================================================
+weight: 1
+layout: "learningpathall"
+learning_path_main_page: "yes"
+---
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/_next-steps.md b/content/learning-paths/mobile-graphics-and-gaming/onnx/_next-steps.md
new file mode 100644
index 0000000000..c3db0de5a2
--- /dev/null
+++ b/content/learning-paths/mobile-graphics-and-gaming/onnx/_next-steps.md
@@ -0,0 +1,8 @@
+---
+# ================================================================================
+#       FIXED, DO NOT MODIFY THIS FILE
+# ================================================================================
+weight: 21                  # Set to always be larger than the content in this path to be at the end of the navigation.
+title: "Next Steps"         # Always the same, html page title.
+layout: "learningpathall"   # All files under learning paths have this same wrapper for Hugo processing.
+---
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/01.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/01.png
new file mode 100644
index 0000000000..c32ae10bba
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/01.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/02.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/02.png
new file mode 100644
index 0000000000..48021bad66
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/02.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/03.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/03.png
new file mode 100644
index 0000000000..8f61d07fa0
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/03.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/04.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/04.png
new file mode 100644
index 0000000000..a2544c74ef
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/04.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/05.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/05.png
new file mode 100644
index 0000000000..273ac7bbf9
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/05.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/06.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/06.png
new file mode 100644
index 0000000000..8e461219f9
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/06.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/07.png b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/07.png
new file mode 100644
index 0000000000..84b3b41b57
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/onnx/figures/07.png differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/_index.md b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/_index.md
index 45da2dd957..78448659e5 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/_index.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/_index.md
@@ -1,22 +1,22 @@
 ---
-title: Unleashing SME2 Performance - Profile ONNX models with KleidiAI-Optimized ONNX Runtime
+title: Profile ONNX model performance with SME2 using KleidiAI and ONNX Runtime
+
 
-draft: true
-cascade:
-    draft: true
 
 minutes_to_complete: 40
 
-who_is_this_for: This is an advanced topic for software developers, performance engineers, and AI practitioners 
+who_is_this_for: This is an advanced topic for software developers, performance engineers, and AI practitioners. 
 
 learning_objectives: 
-    - Build ONNX runtime library with KleidiAI and SME2 support
-    - Profile performance of ONNX models
-    - Learn how KleidiAI and SME2 accelerates ONNX operators
+    - Build ONNX Runtime with KleidiAI and SME2 support for Android
+    - Profile ONNX model performance using benchmark tools
+    - Analyze how KleidiAI kernels accelerate ONNX operators with SME2
+    - Compare performance improvements between standard and SME2-optimized execution
 
 prerequisites:
-    - Knowledge of KleidiAI and SME2
     - An Android device with Arm SME2 support
+    - Basic understanding of machine learning model inference
+    - Familiarity with Android NDK and cross-compilation
 
 author: Zenon Zhilong Xiu
 
@@ -24,11 +24,10 @@ author: Zenon Zhilong Xiu
 skilllevels: Advanced
 subjects: ML
 armips:
-    - Arm C1 CPU
-    - Arm SME2 unit
+    - Cortex
 tools_software_languages:
     - C++
-    - ONNX runtime
+    - ONNX Runtime
 operatingsystems:
     - Android
     - Linux
@@ -37,15 +36,15 @@ operatingsystems:
 
 further_reading:
     - resource:
-        title: part 1 Arm Scalable Matrix Extension Introduction 
+        title: Arm Scalable Matrix Extension Introduction (Part 1)
         link: https://developer.arm.com/community/arm-community-blogs/b/architectures-and-processors-blog/posts/arm-scalable-matrix-extension-introduction
         type: blog
     - resource:
-        title: part 2 Arm Scalable Matrix Extension Instructions 
+        title: Arm Scalable Matrix Extension Instructions (Part 2)
         link: https://developer.arm.com/community/arm-community-blogs/b/architectures-and-processors-blog/posts/arm-scalable-matrix-extension-introduction-p2
         type: blog
     - resource:
-        title: part4 Arm SME2 Introduction 
+        title: Arm SME2 Introduction (Part 4)
         link: https://developer.arm.com/community/arm-community-blogs/b/architectures-and-processors-blog/posts/part4-arm-sme2-introduction
         type: blog
         
diff --git a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/build_ort.md b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/build_ort.md
index cd19922e64..50cc275325 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/build_ort.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/build_ort.md
@@ -6,61 +6,76 @@ weight: 4
 layout: learningpathall
 ---
 
-## Build ONNX Runtime and benchmark application with KleidiAI and SME2 support for Android
+## Build ONNX Runtime with KleidiAI and SME2 for Android
 
-To run this on an Android device, you must cross-compile ORT using the Android NDK.
+To run this on an Android device, you need to cross-compile ONNX Runtime using the Android NDK.
 
-Prerequisites:
--	Android NDK: Version r26b or newer (r27+ recommended for latest SME2 toolchain support).
--	CMake & Ninja: Ensure these are in your system PATH.
+Before you begin, verify that you have:
+- Android NDK version r26b or newer (r27 or later is recommended for the latest SME2 toolchain support)
+- CMake and Ninja installed and available in your system PATH
 
-### Build `onnxruntime`
+## Build onnxruntime
 
-First, clone the [ONNX Runtime](https://github.com/microsoft/onnxruntime).
+First, clone the [ONNX Runtime](https://github.com/microsoft/onnxruntime). This Learning Path uses version `v1.23.2`:
+```bash
+git clone https://github.com/microsoft/onnxruntime.git onnxruntime.git
+cd onnxruntime.git/
+git checkout v1.23.2
+```
 
-Then run the following from the root of the ONNX Runtime repository:
+Build ONNX Runtime with KleidiAI support enabled. The build script configures cross-compilation for Android arm64-v8a, enables shared library output, and activates KleidiAI integration through the `onnxruntime_USE_KLEIDIAI=ON` flag. Run the following from the root of the ONNX Runtime repository (the `onnxruntime.git/` directory from the previous command):
 ```bash
 ./build.sh --android --android_sdk_path $ANDROID_NDK_HOME --android_ndk_path $ANDROID_NDK_HOME --android_abi arm64-v8a --android_api 27 --config RelWithDebInfo --build_shared_lib --cmake_extra_defines onnxruntime_USE_KLEIDIAI=ON --cmake_generator Ninja --parallel
 ```
 
-Note: The flag `onnxruntime_USE_KLEIDIAI=ON` triggers the inclusion of Arm KleidiAI kernels into the MLAS library.
+{{% notice Note %}}
+- The flag `onnxruntime_USE_KLEIDIAI=ON` triggers the inclusion of Arm KleidiAI kernels into the MLAS library.
+- The build directory is `build/` by default. This can be overridden with the `--build_dir <path_to_your_build_directory>` command line option to `build.sh`.
+{{% /notice %}}
+
+## Profile model performance with onnxruntime_perf_test
 
-## Profiling Performance with onnxruntime_perf_test
 Once the build is complete, you will find the `libonnxruntime.so` shared library and `onnxruntime_perf_test` binary in your build directory.
 
-`onnxruntime_perf_test` is essential for measuring latency and identifying bottlenecks of an ONNX model (named `<your_model>.onnx` hereafter).
+`onnxruntime_perf_test` is essential for measuring latency and identifying bottlenecks of an ONNX model (named `<your_model>.onnx` hereafter). Note that `onnxruntime_perf_test` expects the ONNX model to come with some ancilliary files organized in some directory tree (input data for example).
+
+## Push files to Android device
+
+Transfer the benchmark binary and shared library to your Android device:
 
-### Step 1: Push files to Android Device
 ```bash
 adb push <build_dir>/Android/RelWithDebInfo/onnxruntime_perf_test /data/local/tmp/
 adb push <build_dir>/Android/RelWithDebInfo/libonnxruntime.so  /data/local/tmp/
-adb push <your_model>.onnx /data/local/tmp/
 ```
 
-### Step 2: Run the Performance Test
-The `onnxruntime_perf_test` tool allows you to simulate inference and gather statistics. For example,
+## Run the performance test
+
+The `onnxruntime_perf_test` tool simulates inference and gathers statistics. Run a benchmark with 20 iterations:
 ```bash
 # Execute on the device
-adb shell "/data/local/tmp/onnxruntime_perf_test -e cpu -m times  -r 20 -s -Z  -x 1 /data/local/tmp/<your_model>.onnx"
+adb shell "/data/local/tmp/onnxruntime_perf_test -e cpu -m times  -r 20 -s -Z  -x 1 /data/local/tmp/<your_model>/<your_model>.onnx"
 ```
+### Command options explained
+
+The benchmark command uses several flags to control execution:
 
-The command example set the arguments of the application as,
--	`-e cpu` specifies the provider as cpu provider
--	`-m times` specifies the test mode as “times”
--	`-r 20` specifies the repeated times as 20
--	`-Z` disallows thread from spinning during runs to reduce cpu usage
--	`-s` shows statistics result
--	`-x 1` sets the number of threads used to parallelize the execution within nodes as 1
+- `-e cpu`: Use the CPU execution provider
+- `-m times`: Run in timing mode to measure latency
+- `-r 20`: Repeat the test 20 times for consistent results
+- `-Z`: Prevent thread spinning to reduce CPU usage
+- `-s`: Display statistics after the run
+- `-x 1`: Use a single thread for parallel execution within nodes
 
-You can try other arguments setting if you would like to.
+You can adjust these settings based on your performance testing needs.
 
-### Step 3: Deep Dive into Operator Profiling
-To see exactly how many milliseconds are spent on each operator, use the profiling flag `-p`.
+## Deep dive into operator profiling
+
+To see exactly how many milliseconds each operator consumes, use the profiling flag `-p`. This generates a JSON trace file:
 ```bash
-adb shell "/data/local/tmp/onnxruntime_perf_test -p profile.json -e cpu -m times  -r 5 -s -Z  -x 1 /data/local/tmp/<your_model>.onnx"
+adb shell "/data/local/tmp/onnxruntime_perf_test -p /data/local/tmp/profile.json -e cpu -m times  -r 5 -s -Z  -x 1 /data/local/tmp/<your model>/<your_model>.onnx"
 adb pull /data/local/tmp/profile.json
 ```
 
-The argument `-p` enables performance profiling during the benchmark run. When you provide this flag followed by a filename, ONNX Runtime will generate a JSON file containing a detailed trace of the model execution.
-You can view the results by opening [perfetto tool]( https://ui.perfetto.dev/), and loading the generated JSON file. This allows you to see a visual timeline of which operations took the most time.
-You also can convert the JSON file to a CSV sheet by creating a python script.
+The `-p` flag enables performance profiling during the benchmark run. When you provide this flag followed by a filename, ONNX Runtime generates a JSON file containing a detailed trace of model execution.
+
+You can view the results by opening [perfetto tool](https://ui.perfetto.dev/) and loading the generated JSON file. This shows a visual timeline of which operations took the most time. You can also convert the JSON file to a CSV sheet by creating a Python script.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/images/resnet50v2_sme_onnx_streamline_1xgelas_annotation.png b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/images/resnet50v2_sme_onnx_streamline_annotation.png
similarity index 100%
rename from content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/images/resnet50v2_sme_onnx_streamline_1xgelas_annotation.png
rename to content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/images/resnet50v2_sme_onnx_streamline_annotation.png
diff --git a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/kleidiai_integration.md b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/kleidiai_integration.md
index ed135ea088..2b11f945bc 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/kleidiai_integration.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/kleidiai_integration.md
@@ -6,21 +6,22 @@ weight: 3
 layout: learningpathall
 ---
 
-## Integration of KleidiAI to ONNX runtime MLAS
-ONNX runtime is built with KleidiAI support:
-1.	Detection: At runtime, MLAS checks the CPU capabilities for SME2 support.
-2.	Dispatch: If SME2 is detected, MLAS overrides its default kernels. For example, a Gemm (General Matrix Multiplication) operation that would normally use standard vector instructions (such as NEON) is dispatched to a KleidiAI SME2 micro-kernel.
+## Integration of KleidiAI to ONNX Runtime MLAS
+ONNX Runtime automatically detects and uses KleidiAI when SME2 support is available:
 
-Currently, KleidiAI in MLAS provides `ArmKleidiAI::MlasConv`, `ArmKleidiAI::MlasGemmBatch` and `ArmKleidiAI::MlasDynamicQGemmBatch` kernels.
+- Detection: MLAS checks the CPU capabilities for SME2 support at runtime.
+- Dispatch: when SME2 is detected, MLAS replaces its default kernels with KleidiAI micro-kernels. For example, a Gemm operation that normally uses NEON instructions dispatches to a KleidiAI SME2 micro-kernel instead.
+
+Currently, KleidiAI in MLAS provides `ArmKleidiAI::MlasConv`, `ArmKleidiAI::MlasGemmBatch`, and `ArmKleidiAI::MlasDynamicQGemmBatch` kernels.
 
 ### The ArmKleidiAI::MlasConv kernel  
-Usually, 2D fp32 convolution operators with batch_size=1 and multiple filters (filter kernel is equal or greater than (3,3)) are dispatched to the `ArmKleidiAI::MlasConv` kernel. 
+ORT dispatches 2D fp32 (32-bit floating point) convolution operators with batch_size=1 and multiple filters (filter kernel equal to or greater than (3,3)) to the `ArmKleidiAI::MlasConv` kernel. 
 
 For example, the figure below shows a (7,7) Conv node.
 
-![Diagram illustrating an example of 7x7 Conv alt-text#center](images/conv_nodes_7x7.jpg "An example of (7,7) Conv node")
+![Diagram showing a 7x7 convolution node with input tensor of size 112x112x64, filter size 7x7x64, and output tensor size 56x56x64 with stride 2#center](images/conv_nodes_7x7.jpg "An example of a (7,7) Conv node")
 
-ArmKleidiAI::MlasConv kernel makes use of KleidiAI’s indirect matrix multiplication (imatmul) micro kernel to accelerate the convolution.
+`ArmKleidiAI::MlasConv` kernel uses KleidiAI's indirect matrix multiplication (imatmul) micro kernel to accelerate the convolution.
 
 The function calls are shown as below.
 ```text
@@ -46,11 +47,11 @@ onnxruntime::InferenceSession::Run
 ```
 
 ### The ArmKleidiAI::MlasGemmBatch kernel
-It performs a batched fp32 matrix multiplication (GEMM or GemV) operation using KleidiAI matmul micro kernels. fp32 Conv operators with (1,1) filter kernels also uses this kernel. 
+This kernel performs a batched fp32 matrix multiplication (GEMM or GemV) operation using KleidiAI matmul micro kernels. fp32 Conv operators with (1,1) filter kernels also use this kernel. 
 
 For example, the figure below shows a (1,1) Conv node.
 
-![Diagram illustrating an example of 1x1 Conv alt-text#center](images/conv_nodes_1x1.jpg "An example of (1,1) FusedConv node")
+![Diagram showing a 1x1 convolution node with input tensor of size 56x56x64, filter size 1x1x64, and output tensor size 56x56x256 fused with BatchNormalization and Relu operations alt-txt#center](images/conv_nodes_1x1.jpg "An example of a (1,1) FusedConv node")
 
 The function calls of fp32 Conv operators with (1,1) filter kernels are shown below.
 
@@ -79,7 +80,7 @@ onnxruntime::InferenceSession::Run
 
 For example, the figure below shows a Gemm node.
 
-![Diagram illustrating an example of Gemm node alt-text#center](images/gemm_node.jpg "An example of Gemm node")
+![Diagram showing a Gemm (General Matrix Multiplication) node with input matrices A and B, performing matrix multiplication with optional bias addition alt-txt#center](images/gemm_node.jpg "An example of a Gemm node")
 
 The function calls of fp32 Gemm operators are shown below.
 ```text
@@ -101,5 +102,11 @@ onnxruntime::InferenceSession::Run
 ```
 
 ### The ArmKleidiAI::MlasDynamicQGemmBatch kernel
-This kernel is for matrix multiplication with float output of dynamic quantized A and symmetric quantized B. 
-It uses KleidiAI *kai_kernel_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa* micro kernel.
+This kernel performs matrix multiplication with float output of dynamically quantized A and symmetrically quantized B. 
+It uses the KleidiAI `kai_kernel_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa` micro kernel.
+
+## What you've accomplished and what's next
+
+You now understand how KleidiAI integrates into ONNX Runtime's MLAS backend and which operators benefit from SME2 acceleration. You've learned about the three main kernel types: `MlasConv` for convolution operations, `MlasGemmBatch` for matrix multiplications and 1x1 convolutions, and `MlasDynamicQGemmBatch` for quantized operations. You've also seen the complete function call stacks showing how ONNX Runtime dispatches to KleidiAI kernels.
+
+Next, you'll build ONNX Runtime with KleidiAI support enabled and prepare the benchmark tools for profiling on Android.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/overview.md b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/overview.md
index 189dc30138..1b687fda74 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/overview.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/overview.md
@@ -1,47 +1,71 @@
 ---
-title: ONNX runtime overview
+title: ONNX Runtime architecture with SME2 acceleration
 weight: 2
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
 ---
 
-## ONNX runtime overview 
-With the rise of on-device AI, squeezing performance from CPUs has become critical. Arm’s Scalable Matrix Extension 2 (SME2) represents a leap forward, offering significant speedups for matrix-heavy workloads like Transformers and CNNs.
-This learning path will walk you through the technical steps to integrate KleidiAI—Arm's specialized micro-kernel library with SME2 support—into ONNX Runtime (ORT) and profile its performance using onnxruntime_perf_test on Android devices.
+## Integrating KleidiAI micro-kernels into ONNX Runtime
+With the rise of on-device AI, squeezing performance from CPUs has become critical. Arm's Scalable Matrix Extension 2 (SME2) represents a leap forward, offering significant speedups for matrix-heavy workloads like Transformers and CNNs.
 
-### Understanding the ONNX Runtime Software Stack
-Firstly, let us look at the internal architecture of the ONNX Runtime.
-![Diagram illustrating ONNX runtime components alt-text#center](images/ort_overview.jpg "The ONNX runtime overview")
+This Learning Path walks you through the technical steps to integrate KleidiAI-Arm's specialized micro-kernel library with SME2 support-into ONNX Runtime (ORT) and profile its performance using onnxruntime_perf_test on Android devices.
 
-#### 1. In-Memory Graph
-When loading an ONNX model, ORT parses the protobuf file and creates an In-Memory Graph. This is a live representation of the model’s structure, consisting of:
--	Nodes: Representing operations (e.g., MatMul, Conv, Add).
--	Edges: Representing the flow of data (tensors) between those operations.
+## How does ONNX Runtime process AI models?
+
+ONNX Runtime's internal architecture consists of four main components that work together to execute AI models efficiently:
+
+- In-Memory Graph: represents the model structure as nodes (operations) and edges (data flows)
+- Graph Partitioner: assigns operations to appropriate hardware accelerators
+- Graph Runner: orchestrates execution and manages data flow between operations
+- Execution Provider: hardware-specific backends that run the actual computations
+
+![Diagram showing the four main components of ONNX Runtime: In-Memory Graph at the top, followed by Graph Partitioner, Graph Runner, and Execution Provider at the bottom, with data flow indicated between layers alt-txt#center](images/ort_overview.jpg "The ONNX Runtime overview")
+
+### How does ONNX Runtime represent models in memory?
+When ORT loads an ONNX model, it parses the protobuf file and builds an in-memory representation of the model's structure. This graph consists of:
+
+- Nodes: operations like MatMul, Conv, and Add
+- Edges: tensor data flowing between operations
 
 During this stage, ORT performs Graph Optimizations like constant folding and node fusion.
-#### 2. Graph Partitioner
-The Graph Partitioner decides which part of the model runs on which hardware. It analyzes the computational graph and matches nodes to the registered Execution Providers.
-It clusters adjacent nodes assigned to the same EP into "Subgraphs".
-#### 3. Graph Runner 
-Once the graph is partitioned, the Graph Runner is responsible for the actual execution of the operators in the correct order. It manages the flow of data (Tensors) between nodes.
-In ORT, parallelism is split into two distinct levels to maximize hardware utilization: Intra-op (inside an operator/node, splitting a single heavy operation/node into smaller chunks) and Inter-op (between different operators, running multiple independent operators at the same time).
-
-#### 4. Execution Provider (EP)
-An Execution Provider is the abstraction layer that interfaces with specific hardware or libraries. 
-Each EP provides a set of "Kernels" (optimized math functions) for specific operators.
-Examples: 
--	CPU: Default CPU, Intel DNNL, XNNPACK etc.
--	GPU: NVIDIA CUDA/TensorRT, AMD MIGraphX, DirectML etc.
--	Others: NPU, Qualcomm QNN etc.
+### How does ONNX Runtime assign operations to hardware?
+The Graph Partitioner decides which part of the model runs on which hardware. It analyzes the computational graph and matches nodes to the registered Execution Providers. It clusters adjacent nodes assigned to the same EP into "Subgraphs".
+### How does ONNX Runtime execute operations?
+Once the graph is partitioned, the Graph Runner executes the operators in the correct order. It manages the flow of data (Tensors) between nodes. In ORT, parallelism splits into two distinct levels to maximize hardware utilization: Intra-op (inside an operator/node, splitting a single heavy operation/node into smaller chunks) and Inter-op (between different operators, running multiple independent operators at the same time).
+
+### What are ONNX Runtime execution providers?
+An Execution Provider is the abstraction layer that interfaces with specific hardware or libraries. Each EP provides optimized math functions (called "Kernels") for specific operators.
+
+ORT supports multiple Execution Providers across different hardware types:
+
+**CPU-based providers:**
+- Default CPU provider
+- Intel DNNL
+- XNNPACK
+
+**GPU-based providers:**
+- NVIDIA CUDA/TensorRT
+- AMD MIGraphX
+- DirectML
+
+**Specialized accelerators:**
+- NPU
+- Qualcomm QNN
 
 If a specialized EP doesn't support a specific operator, ORT automatically falls back to the CPU provider.
 
-Default CPU provider uses Microsoft Linear Algebra Subprogram (MLAS). MLAS is a minimal version of BLAS library which implements an optimized version of linear algebra operations such as general matrix multiply (GEMM) in low-level languages with various processor support. For aarch64, MLAS already utilizes dotprod, i8mm, fp16, bf16 vector instructions for acceleration. 
+The default CPU provider uses Microsoft Linear Algebra Subprograms (MLAS). MLAS is a minimal version of BLAS library that implements an optimized version of linear algebra operations such as general matrix multiply (GEMM) in low-level languages with various processor support. For aarch64, MLAS already uses dotprod, i8mm, fp16, and bf16 vector instructions for acceleration. 
+
+The KleidiAI-optimized MLAS can delegate high-performance matrix operations to KleidiAI micro kernels. KleidiAI provides micro-kernels specifically tuned for SME2, allowing ORT to leverage the latest hardware features.
+
+This Learning Path focuses on Arm CPU Execution Provider.
+
+## What you've accomplished and what's next
 
-The KleidiAI-optimized MLAS can delegate high-performance matrix operations to KleidiAI micro kernels. KleidiAI provides micro-kernels specifically tuned for SME2, allowing ORT to instantly leverage the latest hardware features.
+You now understand how ONNX Runtime processes models through its layered architecture-from the in-memory graph to execution providers. You've learned how the Graph Partitioner assigns operations to hardware, how the Graph Runner orchestrates execution, and how Execution Providers like the CPU provider use optimized kernels. You also know that MLAS serves as the default CPU backend and that KleidiAI can optimize it with SME2-specific kernels.
 
-This learning path focuses on Arm CPU Execution Provider.
+Next, you'll explore how KleidiAI integrates into MLAS and which specific operators benefit from SME2 acceleration.
 
 
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/profiling_example.md b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/profiling_example.md
index 15cdd2f583..7975e1d6e2 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/profiling_example.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/performance_onnxruntime_kleidiai_sme2/profiling_example.md
@@ -1,28 +1,34 @@
 ---
-title: Profiling – Use Resnet50v2 fp32 model as an example
+title: Profile ONNX model performance
 weight: 5
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
 ---
 
-## Profile an ONNX model – Using Resnet50v2 as an example 
-The Resnet50v2 fp32 ONNX model can be downloaded from Hugging Face or Modescope.
+## Profile ONNX model performance using ResNet-50 v2
 
-The Android device that we used is a VIVO X300 phone with MTK D9500 processor, which has Arm C1-Ultra, C1-Premium and C1-Pro CPU cores with SME2 support on it. We chose a C1-Pro CPU core running at 2.0GHz to run the `onnxruntime_perf_test` benchmark application. You can use any other Android device with SME2 support.
+Resnet50v2 serves as the example model in this Learning Path. Download the model package containing the ONNX model and its input data from the [ONNX model repository](https://github.com/onnx/models/tree/main/validated/vision/classification/resnet/model), then transfer it to your Android device:
 
-To compare the performance of running Resnet50v2 on ORT with SME2 and without SME2 support, we built two versions of ORT, one with SME2 support (set `onnxruntime_USE_KLEIDIAI=ON` when building ORT), the other without SME2 support(`onnxruntime_USE_KLEIDIAI=OFF` when building ORT).
+```bash
+wget https://github.com/onnx/models/raw/refs/heads/main/validated/vision/classification/resnet/model/resnet50-v2-7.tar.gz -O resnet50-v2-7.tar.gz
+adb push resnet50-v2-7.tar.gz /data/local/tmp/
+adb shell tar xfz /data/local/tmp/resnet50-v2-7.tar.gz -C /data/local/tmp/
+```
+
+The Android device used in this example is a VIVO X300 phone with MTK D9500 processor, which has Arm C1-Ultra, C1-Premium, and C1-Pro CPU cores with SME2 support. A C1-Pro CPU core running at 2.0GHz was selected to run the `onnxruntime_perf_test` benchmark application. You can use any Android device with SME2 support.
 
-Run following command on the device,
+To compare the performance of running Resnet50v2 on ORT with and without SME2 support, build two versions of ORT: one with SME2 support (set `onnxruntime_USE_KLEIDIAI=ON` when building ORT), and another without SME2 support (`onnxruntime_USE_KLEIDIAI=OFF` when building ORT).
+
+Run the benchmark on the device, pinning execution to a single C1-Pro core:
 ```bash
-taskset 1 ./onnxruntime_perf_test -e cpu -r 5 -m times -s -Z  -x 1 ./resnet50v2.onnx  -p resnet50v2.onnx_1xC1-Pro_profile
+adb shell "taskset 1 /data/local/tmp/onnxruntime_perf_test -e cpu -r 5 -m times -s -Z  -x 1 /data/local/tmp/resnet50-v2-7/resnet50-v2-7.onnx -p /data/local/tmp/resnet50v2.onnx_1xC1-Pro_profile"
 ```
 
-The `taskset 1` in the command sets the CPU affinity of `onnxruntime_perf_test` benchmark to CPU core 0, which is a C1-Pro CPU core. 
-`-x 1` in the command sets the number of threads used to parallelize the execution within nodes as 1 (single thread).
+The `taskset 1` command sets the CPU affinity of `onnxruntime_perf_test` to CPU core 0, which is a C1-Pro CPU core. The `-x 1` flag sets the number of threads used to parallelize execution within nodes to 1 (single thread).
 
-Here is output from running `onnxruntime_perf_test` with ORT with SME2 support as below.
-```text
+Here is the output from running `onnxruntime_perf_test` with ORT with SME2 support:
+```output
 Setting intra_op_num_threads to 1
 Disabling intra-op thread spinning between runs
 Session creation time cost: 0.217932 s
@@ -46,8 +52,8 @@ P99 Latency: 0.101519 s
 P999 Latency: 0.101519 s
 ``` 
 
-Here is output from running `onnxruntime_perf_test` with ORT without SME2 support as below.
-```text
+Here is the output from running `onnxruntime_perf_test` with ORT without SME2 support:
+```output
 Setting intra_op_num_threads to 1
 Disabling intra-op thread spinning between runs
 Session creation time cost: 0.227282 s
@@ -70,37 +76,43 @@ P95 Latency: 0.34682 s
 P99 Latency: 0.34682 s
 P999 Latency: 0.34682 s
 ```
+
 ## Performance analysis
 
-### Using [perfetto tool](https://ui.perfetto.dev/)
+#### Visualize profiling data with Perfetto
 
-We can use [perfetto tool](https://ui.perfetto.dev/) to view the two JSON profile files.
+You can use [Perfetto](https://ui.perfetto.dev/) to view the two JSON profile files.
 
-The figure below is a screenshot of the view of the Non-KleidiAI version of JSON profile file.  
-The selected part(one `model_run/SequentialExecutor`) in the figure includes information of one inference execution.
+The figure below is a screenshot of the Non-KleidiAI version of the JSON profile file. The selected part (one `model_run/SequentialExecutor`) in the figure includes information of one inference execution.
 
-![Figure showing profile file of Non-KleidiAI version alt-text#center](images/resnet50v2_no_sme_prefetto.png "prefetto view of Non-KleidiAI version of ORT")
+![Screenshot of Perfetto tool showing the execution timeline for Non-KleidiAI version, with multiple operator execution blocks displayed horizontally across time, showing Conv, BatchNormalization, Relu, and other operations alt-txt#center](images/resnet50v2_no_sme_prefetto.png "Perfetto view of Non-KleidiAI version of ORT")
 
-The figure below is a screenshot of the view of the KleidiAI(with SME2) version of JSON profile file.  
-The selected part (one model_run/SequentialExecutor) in the figure includes information of one inference execution.
-![Figure showing profile file of KleidiAI with SME2 version alt-text#center](images/resnet50v2_sme_prefetto.png "prefetto view of KleidiAI with SME2 version of ORT")
+The figure below is a screenshot of the KleidiAI (with SME2) version of the JSON profile file. The selected part (one `model_run/SequentialExecutor`) in the figure includes information of one inference execution.
 
-We also convert the two JSON profile files to CSV sheets, then we combine the individual operator execution time of the Non-KleidiAI and KleidiAI(with SME2) version to a single chart. 
-![Figure showing operator time of both versions of ORT alt-text#center](images/resnet50v2_with_sme_without_sme_2.png "Operator execution time comparison")
+![Screenshot of Perfetto tool showing the execution timeline for KleidiAI with SME2 version, demonstrating shorter execution times for Conv operations compared to the Non-KleidiAI version alt-txt#center](images/resnet50v2_sme_prefetto.png "Perfetto view of KleidiAI with SME2 version of ORT")
+
+You can also convert the two JSON profile files to CSV sheets with an external Python script and combine the individual operator execution times of the Non-KleidiAI and KleidiAI (with SME2) versions into a single chart.
+
+![Bar chart comparing operator execution times between Non-KleidiAI and KleidiAI SME2 versions, showing significant performance improvements for Conv operators with SME2 enabled alt-txt#center](images/resnet50v2_with_sme_without_sme_2.png "Operator execution time comparison")
 
 It shows that ORT with KleidiAI (with SME2) kernels uplifts the performance significantly, especially for convolution operators.
 
-### Using Arm Streamline
+#### Analyze performance with Arm Streamline
+
+Arm Streamline is a graphical performance analysis tool that transforms sampling data, instruction trace, and system trace into reports presenting the data in both visual and statistical forms. It uses hardware performance counters with kernel metrics to provide an accurate representation of system resources. You can learn more about Arm Streamline on [developer.arm.com](https://developer.arm.com/Tools%20and%20Software/Streamline%20Performance%20Analyzer/) and install it with the [Streamline Install Guide](/install-guides/streamline/). This section shows what this performance analysis looks like with Arm Streamline but doesn't dive into the details of actually using the tool.
+
+In the timeline view of Streamline, you can see SME2 floating point Outer Product and Accumulate (MOPA) instructions used intensively during inference:
+
+![Screenshot of Arm Streamline timeline view showing intensive SME2 MOPA instruction usage during inference, with performance counter graphs displaying instruction counts and cycle metrics over time alt-txt#center](images/resnet50v2_sme_onnx_streamline_annotation.png "SME2 instructions and cycles shown in Streamline")
 
-If we use Arm Streamline tools and PMU counters for further investigation, in the timeline view of Streamline, we can see SME2 floating point Outer Product and Accumulate (MOPA) instruction is used intensively during the inference.
+You can combine (with an external script) the function call views of ORT without and with KleidiAI (with SME2) into a single figure:
 
-![Figure showing SME2 instructions and cycles alt-text#center](images/resnet50v2_sme_onnx_streamline_1xgelas_annotation.png "SME2 instructions and cycles shown in Streamline")
+![Comparison chart showing function call percentages between Non-KleidiAI and KleidiAI versions in Streamline, highlighting the performance difference between default MLAS kernels and KleidiAI SME2 kernels alt-txt#center](images/function_call_compare.png "Function call percentage of both versions of ORT in Streamline")
 
-Then we combine the function call view of ORT without KleidiAI and with KleidiAI(with SME2) from Streamline to a single figure,
+It shows that KleidiAI kernels provide a significant performance uplift for convolution operators compared to the default MLAS kernels (`MlasSgemmKernelAdd` and `MlasSgemmKernelZero`).
 
-![Figure showing function call percentage of both versions of ORT alt-text#center](images/function_call_compare.png "Function call percentage of both versions of ORT in Streamline ")
+## What you've accomplished and what's next
 
-It shows that KleidiAI kernels provide a significant performance uplift for convolution operators compared to the default MLSA kernels (`MlasSgemmKernelAdd` and `MlasSgemmKernelZero`).
+You've successfully profiled Resnet50v2 on Android with and without KleidiAI SME2 optimizations. You measured a 3.4x performance improvement (from 339ms to 99ms average inference time) when enabling SME2 kernels. You explored profiling tools including perfetto for operator-level analysis and Arm Streamline for hardware counter insights. The results clearly demonstrate that KleidiAI kernels dramatically accelerate convolution operators compared to standard MLAS implementations.
 
-## Summary
-By enabling KleidiAI (SME2) into ONNX Runtime, you unlock the massive parallel processing power of Arm SME2. This turns the Arm CPU from a "fallback" into a high-performance AI engine capable of running LLMs and complex vision models locally on devices.
+By enabling KleidiAI (SME2) in ONNX Runtime, you unlock the parallel processing power of Arm SME2, transforming the Arm CPU from a fallback option into a high-performance AI engine capable of running LLMs and complex vision models efficiently on device. You can now apply these techniques to profile and optimize your own ONNX models on Arm platforms with SME2 support.
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/Streamline.png b/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/Streamline.png
deleted file mode 100644
index e02ea645ce..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/Streamline.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/app-profiling-streamline.md b/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/app-profiling-streamline.md
index 118c9176e6..079ddc31bc 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/app-profiling-streamline.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/profiling-ml-on-arm/app-profiling-streamline.md
@@ -20,7 +20,7 @@ The profiler looks at performance metrics such as memory, CPU activity and cycle
 
 It can also provide a timeline-view of these counters to show any changes in the application's performance, which can reveal bottlenecks, and help you to identify where to focus your optimization efforts.
 
-![Streamline image alt-text#center](Streamline.png "Figure 1. Streamline Timeline View")
+![Streamline image alt-text#center](streamline.png "Figure 1. Streamline Timeline View")
 
 ## Get started with an example Android Application 
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/2-preparation.md b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/2-preparation.md
index 2b726eca43..2d862f487b 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/2-preparation.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/2-preparation.md
@@ -86,7 +86,7 @@ Once the sample project has been imported, you will see some errors and warnings
 
 3. Select _Switch Platform_, as you did in the [previous Learning Path](/learning-paths/mobile-graphics-and-gaming/get-started-with-unity-on-android/3-test-on-android/)
 
-   ![Build-Profile-Menu#center](images/bp-menu.png "Figure 5. Build Profile menu.")
+   ![Build-Profile-Menu#center](images/bp-menu.webp "Figure 5. Build Profile menu.")
 
 Unity will take a moment to build the assets for the Android platform.
 
@@ -126,7 +126,7 @@ You will now deploy the sample to your Android device. Your device must already
 
 2. Tick the _Development Build_ option
  
-  ![Import sample project#center](images/bp-menu-build.png "Figure 9. Development Build.")
+  ![Import sample project#center](images/bp-menu-build.webp "Figure 9. Development Build.")
 
 3. Select _Add Open Scenes_ to add the demo scene to your _Scenes in Build_ list
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/4-profiling.md b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/4-profiling.md
index 4d063cc3fb..3c9f549988 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/4-profiling.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/4-profiling.md
@@ -39,7 +39,7 @@ In the sample, the performance issues will be apparent soon after launch. To pro
 
 2. Ensure _Autoconnect Profiler_ is ticked
 
-    ![Build settings to autoconnect profiler#center](images/ac.png "Figure 1. Build settings with connected Android device and Autoconnect Profiler enabled.")
+    ![Build settings to autoconnect profiler#center](images/ac.webp "Figure 1. Build settings with connected Android device and Autoconnect Profiler enabled.")
 
 3. Select _Build and Run_
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/ac.png b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/ac.png
deleted file mode 100644
index 8ce4dc25ee..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/ac.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/ac.webp b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/ac.webp
new file mode 100644
index 0000000000..e46af824f2
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/ac.webp differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu-build.png b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu-build.png
deleted file mode 100644
index fc1dafdcfd..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu-build.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu-build.webp b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu-build.webp
new file mode 100644
index 0000000000..7b05beb57a
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu-build.webp differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu.png b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu.png
deleted file mode 100644
index 4aa1c4b3b2..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu.webp b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu.webp
new file mode 100644
index 0000000000..9f8a205de7
Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/profiling-unity-apps-on-android/images/bp-menu.webp differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/RQuery_diagram.svg b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/RQuery_diagram.svg
deleted file mode 100644
index 683c43ca72..0000000000
--- a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/RQuery_diagram.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg width="2873" height="933" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><g transform="translate(-475 -957)"><rect x="1861" y="1072" width="718" height="354" stroke="#0091BD" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF"/><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1987.13 1170)">Acceleration </text><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2045.96 1280)">Structure </text><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2055.12 1390)">Traversal</text><path d="M1068 1251 1376.5 1045 1685 1251 1376.5 1457Z" stroke="#0091BD" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="83" transform="matrix(1 0 0 1 1224.13 1279)">Proceed</text><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="64" transform="matrix(1 0 0 1 1498.38 1279)">?</text><path d="M480 1131C480 1098.42 506.416 1072 539.001 1072L831.999 1072C864.584 1072 891 1098.42 891 1131L891 1367C891 1399.58 864.584 1426 831.999 1426L539.001 1426C506.416 1426 480 1399.58 480 1367Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 619.167 1225)">Ray </text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 571.111 1335)">Query</text><path d="M1137 1625C1137 1597.39 1159.39 1575 1187 1575L1570 1575C1597.61 1575 1620 1597.39 1620 1625L1620 1825C1620 1852.61 1597.61 1875 1570 1875L1187 1875C1159.39 1875 1137 1852.61 1137 1825Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1245.92 1701)">Handle </text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1271.72 1811)">result</text><path d="M2756 1131C2756 1098.42 2782.42 1072 2815 1072L3284 1072C3316.58 1072 3343 1098.42 3343 1131L3343 1367C3343 1399.58 3316.58 1426 3284 1426L2815 1426C2782.42 1426 2756 1399.58 2756 1367Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2839.28 1225)">Confirm hit </text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2814.07 1335)">or terminate</text><path d="M0-5.15625 114.067-5.15625 114.067 1678.28 52.1928 1678.28 52.1928 1667.97 108.911 1667.97 103.754 1673.13 103.754 0 108.911 5.15625 0 5.15625ZM57.349 1688.6 26.4115 1673.13 57.349 1657.66Z" fill="#0091BD" transform="matrix(-1.83697e-16 -1 -1 1.83697e-16 3049.13 1071.41)"/><path d="M891.07 1243.84 1041.63 1245.88 1041.49 1256.19 890.93 1254.16ZM1036.62 1235.5 1067.34 1251.39 1036.2 1266.44Z" fill="#0091BD"/><path d="M1381.16 1456.91 1382.81 1549.17 1372.5 1549.35 1370.84 1457.09ZM1393.03 1543.83 1378.11 1575.03 1362.09 1544.38Z" fill="#0091BD"/><path d="M0.0697883-5.15578 150.632-3.11778 150.492 7.19378-0.0697883 5.15578ZM145.616-13.4991 176.341 2.38694 145.197 17.4355Z" fill="#0091BD" transform="matrix(1 0 0 -1 1685 1251.39)"/><path d="M2579 1243.84 2729.56 1243.84 2729.56 1254.16 2579 1254.16ZM2724.4 1233.53 2755.34 1249 2724.4 1264.47Z" fill="#0091BD"/></g></svg>
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/RTPipeline_diagram.svg b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/RTPipeline_diagram.svg
deleted file mode 100644
index 0572f83772..0000000000
--- a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/RTPipeline_diagram.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg width="3464" height="1025" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="449" y="1155" width="3464" height="1025"/></clipPath></defs><g clip-path="url(#clip0)" transform="translate(-449 -1155)"><rect x="1225" y="1240" width="1263" height="851" stroke="#0091BD" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF"/><path d="M454 1555.4 562 1482 994 1482 994 1849 454 1849Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 656.994 1678)">Ray </text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 513.994 1788)">Generation</text><path d="M1859 1384.8 1971.4 1325 2421 1325 2421 1624 1859 1624Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2000.52 1535)">Any</text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2141.48 1535)">-</text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2169.55 1535)">Hit</text><path d="M1859 1779.8 1971.4 1720 2421 1720 2421 2019 1859 2019Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1916.86 1930)">Intersection</text><path d="M3414 1299.8 3510.6 1240 3897 1240 3897 1539 3414 1539Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 3454.14 1450)">Closest</text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 3720.09 1450)">-</text><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 3748.16 1450)">Hit</text><path d="M3414 1851 3510.6 1791 3897 1791 3897 2091 3414 2091Z" stroke="#FF6B00" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#FF6B00" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 3569.93 2002)">Miss</text><rect x="1287" y="1325" width="570" height="662" fill="#FFFFFF"/><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1339.08 1577)">Acceleration </text><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1397.91 1687)">Structure </text><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 1407.07 1797)">Traversal</text><path d="M2728 1665.5 2951.5 1516 3175 1665.5 2951.5 1815Z" stroke="#0091BD" stroke-width="10.3125" stroke-miterlimit="8" fill="#FFFFFF" fill-rule="evenodd"/><text fill="#0091BD" font-family="Calibri,Calibri_MSFontService,sans-serif" font-weight="400" font-size="92" transform="matrix(1 0 0 1 2875.35 1697)">Hit?</text><path d="M2945.84 1516.29 2945.84 1384.84 3388.39 1384.84 3388.39 1395.16 2951 1395.16 2956.16 1390 2956.16 1516.29ZM3383.23 1374.53 3414.17 1390 3383.23 1405.47Z" fill="#0091BD"/><path d="M0-5.15625 130.798-5.15625 130.798 437.388 120.486 437.388 120.486 0 125.642 5.15625 0 5.15625ZM141.111 432.232 125.642 463.169 110.173 432.232Z" fill="#0091BD" transform="matrix(1.83697e-16 1 1 -1.83697e-16 2951 1815)"/><path d="M2488 1660.84 2701.9 1660.84 2701.9 1671.16 2488 1671.16ZM2696.75 1650.53 2727.69 1666 2696.75 1681.47Z" fill="#0091BD"/><path d="M994.5 1664.21 1203.17 1664.21 1203.17 1668.79 994.5 1668.79ZM1198.58 1652.75 1226.08 1666.5 1198.58 1680.25Z" fill="#0091BD"/><path d="M3661.35 2091 3661.35 2132.25 3651.04 2132.25 3651.04 2091ZM3661.35 2163.19 3661.35 2179.3 3625.9 2179.3 3625.9 2168.99 3656.19 2168.99 3651.04 2174.15 3651.04 2163.19ZM3594.97 2179.3 3553.72 2179.3 3553.72 2168.99 3594.97 2168.99ZM3522.78 2179.3 3481.53 2179.3 3481.53 2168.99 3522.78 2168.99ZM3450.59 2179.3 3409.34 2179.3 3409.34 2168.99 3450.59 2168.99ZM3378.4 2179.3 3337.15 2179.3 3337.15 2168.99 3378.4 2168.99ZM3306.22 2179.3 3264.97 2179.3 3264.97 2168.99 3306.22 2168.99ZM3234.03 2179.3 3192.78 2179.3 3192.78 2168.99 3234.03 2168.99ZM3161.84 2179.3 3120.59 2179.3 3120.59 2168.99 3161.84 2168.99ZM3089.65 2179.3 3048.4 2179.3 3048.4 2168.99 3089.65 2168.99ZM3017.47 2179.3 2976.22 2179.3 2976.22 2168.99 3017.47 2168.99ZM2945.28 2179.3 2904.03 2179.3 2904.03 2168.99 2945.28 2168.99ZM2873.09 2179.3 2831.84 2179.3 2831.84 2168.99 2873.09 2168.99ZM2800.9 2179.3 2759.65 2179.3 2759.65 2168.99 2800.9 2168.99ZM2728.72 2179.3 2687.47 2179.3 2687.47 2168.99 2728.72 2168.99ZM2656.53 2179.3 2615.28 2179.3 2615.28 2168.99 2656.53 2168.99ZM2584.34 2179.3 2543.09 2179.3 2543.09 2168.99 2584.34 2168.99ZM2512.15 2179.3 2470.9 2179.3 2470.9 2168.99 2512.15 2168.99ZM2439.97 2179.3 2398.72 2179.3 2398.72 2168.99 2439.97 2168.99ZM2367.78 2179.3 2326.53 2179.3 2326.53 2168.99 2367.78 2168.99ZM2295.59 2179.3 2254.34 2179.3 2254.34 2168.99 2295.59 2168.99ZM2223.4 2179.3 2182.15 2179.3 2182.15 2168.99 2223.4 2168.99ZM2151.22 2179.3 2109.97 2179.3 2109.97 2168.99 2151.22 2168.99ZM2079.03 2179.3 2037.78 2179.3 2037.78 2168.99 2079.03 2168.99ZM2006.84 2179.3 1965.59 2179.3 1965.59 2168.99 2006.84 2168.99ZM1934.65 2179.3 1893.4 2179.3 1893.4 2168.99 1934.65 2168.99ZM1862.47 2179.3 1851.84 2179.3 1851.84 2138.36 1862.16 2138.36 1862.16 2174.15 1857 2168.99 1862.47 2168.99ZM1841.53 2122.59 1857 2091.65 1872.47 2122.59Z" fill="#0091BD"/><path d="M0-5.15625 41.25-5.15625 41.25 5.15625 0 5.15625ZM72.1875-5.15625 87.6562-5.15625 87.6562 30.9375 77.3438 30.9375 77.3438 0 82.5 5.15625 72.1875 5.15625ZM87.6562 61.875 87.6562 103.125 77.3438 103.125 77.3438 61.875ZM87.6562 134.062 87.6562 175.312 77.3438 175.312 77.3438 134.062ZM87.6562 206.25 87.6562 247.5 77.3438 247.5 77.3438 206.25ZM87.6562 278.438 87.6562 319.688 77.3438 319.688 77.3438 278.438ZM87.6562 350.625 87.6562 391.875 77.3438 391.875 77.3438 350.625ZM87.6562 422.812 87.6562 464.062 77.3438 464.062 77.3438 422.812ZM87.6562 495 87.6562 536.25 77.3438 536.25 77.3438 495ZM87.6562 567.188 87.6562 608.438 77.3438 608.438 77.3438 567.188ZM87.6562 639.375 87.6562 680.625 77.3438 680.625 77.3438 639.375ZM87.6562 711.562 87.6562 752.812 77.3438 752.812 77.3438 711.562ZM87.6562 783.75 87.6562 825 77.3438 825 77.3438 783.75ZM87.6562 855.938 87.6562 897.188 77.3438 897.188 77.3438 855.938ZM87.6562 928.125 87.6562 969.375 77.3438 969.375 77.3438 928.125ZM87.6562 1000.31 87.6562 1041.56 77.3438 1041.56 77.3438 1000.31ZM87.6562 1072.5 87.6562 1113.75 77.3438 1113.75 77.3438 1072.5ZM87.6562 1144.69 87.6562 1185.94 77.3438 1185.94 77.3438 1144.69ZM87.6562 1216.88 87.6562 1258.12 77.3438 1258.12 77.3438 1216.88ZM87.6562 1289.06 87.6562 1330.31 77.3438 1330.31 77.3438 1289.06ZM87.6562 1361.25 87.6562 1402.5 77.3438 1402.5 77.3438 1361.25ZM87.6562 1433.44 87.6562 1474.69 77.3438 1474.69 77.3438 1433.44ZM87.6562 1505.62 87.6562 1546.88 77.3438 1546.88 77.3438 1505.62ZM87.6562 1577.81 87.6562 1619.06 77.3438 1619.06 77.3438 1577.81ZM87.6562 1650 87.6562 1691.25 77.3438 1691.25 77.3438 1650ZM87.6562 1722.19 87.6562 1763.44 77.3438 1763.44 77.3438 1722.19ZM87.6562 1794.38 87.6562 1804.35 46.0679 1804.35 46.0679 1794.04 82.5 1794.04 77.3438 1799.19 77.3438 1794.38ZM35.5208 1814.66 4.58333 1799.19 35.5208 1783.72Z" fill="#0091BD" transform="matrix(-1.83697e-16 -1 -1 1.83697e-16 3658.19 1242.58)"/></g></svg>
\ No newline at end of file
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/bonza_shadows_only.png b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/bonza_shadows_only.png
deleted file mode 100644
index fa26e407e7..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/bonza_shadows_only.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/city_shadows_only.png b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/city_shadows_only.png
deleted file mode 100644
index 73f8c4b155..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/city_shadows_only.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/immortalis_shadows_only.png b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/immortalis_shadows_only.png
deleted file mode 100644
index fad7783e8f..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/images/immortalis_shadows_only.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/rt03_ray_traversal.md b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/rt03_ray_traversal.md
index c4b1b6cae5..1463568b22 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/rt03_ray_traversal.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/ray_tracing/rt03_ray_traversal.md
@@ -18,7 +18,7 @@ Once ray traversal is complete, one of two things can happen. If the ray does no
 
 If instead you have a confirmed hit, the GPU invokes our `Closest-Hit` shader. In this shader, you can then determine what data you need from the object that you intersected, and return that data to the `Ray Generation` shader. For example, you might illuminate the object and later store the generated color into an output image.
 
-![Diagram of Ray Tracing Pipeline #center](images/RTPipeline_diagram.svg "Diagram of Ray Tracing Pipeline")
+![Diagram of Ray Tracing Pipeline #center](images/rtpipeline_diagram.svg "Diagram of Ray Tracing Pipeline")
 
 ### Ray query
 
@@ -32,7 +32,7 @@ This makes it easy to add ray tracing to existing shaders, but it means that you
 * In non-opaque geometry, you need to use `rayQueryConfirmIntersectionEXT` to confirm non-opaque candidates. 
 * If you only have opaque geometry, you can call `rayQueryProceedEXT` and ignore its return value.
 
-![Diagram of Ray Query #center](images/RQuery_diagram.svg "Diagram of Ray Query")
+![Diagram of Ray Query #center](images/rquery_diagram.svg "Diagram of Ray Query")
 
 * Once the ray traversal is complete, you can use `rayQueryGetIntersectionTypeEXT` to query if you have hit something, or missed. 
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/03-install.md b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/03-install.md
index 6ecc568c55..26506960d5 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/03-install.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/03-install.md
@@ -42,7 +42,7 @@ The installation instructions in the next section go through the steps shown abo
 
     1. Click the _Settings_ icon and choose _Add Modules_
 
-        ![Unity Hub Installs](images/UnityHubInstalls.png "Figure 1. List of Unity versions installed")
+        ![Unity Hub Installs](images/unityhubinstalls.png "Figure 1. List of Unity versions installed")
 
     1. Under **Dev Tools**, choose _Microsoft Visual Studio Community 2022_ if you wish to install it. This integrates well with Unity as a code editor, but any editor will do for editing scripts
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/UnityHubInstalls.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/UnityHubInstalls.png
deleted file mode 100644
index 331827c3c2..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/UnityHubInstalls.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/agent-dr-arm-cs-script.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/agent-dr-arm-cs-script.png
deleted file mode 100644
index abb47e622c..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/agent-dr-arm-cs-script.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/battle-env-controller-cs-script.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/battle-env-controller-cs-script.png
deleted file mode 100644
index 20af197bd2..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/battle-env-controller-cs-script.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/boss-battle-script.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/boss-battle-script.png
deleted file mode 100644
index 0731e3aee2..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/boss-battle-script.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/boss-battle-separation-script.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/boss-battle-separation-script.png
deleted file mode 100644
index 6405234238..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/boss-battle-separation-script.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-dialog.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-dialog.png
deleted file mode 100644
index c05a026661..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-dialog.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-menu.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-menu.png
deleted file mode 100644
index 9496570a31..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-menu.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-platform-switched.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-platform-switched.png
deleted file mode 100644
index 8fda5db149..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/build-settings-platform-switched.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/profiler-dialog.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/profiler-dialog.png
deleted file mode 100644
index 1254bec504..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/profiler-dialog.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/terminal-steps-status.png b/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/terminal-steps-status.png
deleted file mode 100644
index 117b93dc36..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/using_unity_machine_learning_agents/images/terminal-steps-status.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/2-generate-apk.md b/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/2-generate-apk.md
index 6e4de6e791..e6384b9844 100644
--- a/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/2-generate-apk.md
+++ b/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/2-generate-apk.md
@@ -44,7 +44,7 @@ The build will be executed, and then the app will be copied and installed on the
 
 After opening the app, you will see the splash screen:
 
-![Loading screenshot](Loading_page.png)
+![Loading screenshot](loading_page.png)
 
 Finally, you can use the UI to chat with the app. Try uploading an image and ask a question on it.
 
diff --git a/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/Loading_page.png b/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/Loading_page.png
deleted file mode 100644
index db0a530ff4..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/Loading_page.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/chat_page.png b/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/chat_page.png
deleted file mode 100644
index a4bdb2d947..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/vision-llm-inference-on-android-with-kleidiai-and-mnn/chat_page.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/upload_old.png b/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/upload_old.png
deleted file mode 100644
index 30d7a4e478..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/upload_old.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_use_multimodal_1.png b/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_use_multimodal_1.png
deleted file mode 100644
index dc75319530..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_use_multimodal_1.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_use_multimodal_2.png b/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_use_multimodal_2.png
deleted file mode 100644
index d7fee1b46a..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_use_multimodal_2.png and /dev/null differ
diff --git a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_view1_old.jpg b/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_view1_old.jpg
deleted file mode 100644
index b55828737c..0000000000
Binary files a/content/learning-paths/mobile-graphics-and-gaming/voice-assistant/voice_assistant_view1_old.jpg and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/_index.md b/content/learning-paths/servers-and-cloud-computing/_index.md
index 064b18ede8..1f115bd3e5 100644
--- a/content/learning-paths/servers-and-cloud-computing/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/_index.md
@@ -8,7 +8,7 @@ key_ip:
 maintopic: true
 operatingsystems_filter:
 - Android: 3
-- Linux: 210
+- Linux: 211
 - macOS: 15
 - Windows: 14
 pinned_modules:
@@ -19,11 +19,11 @@ pinned_modules:
     - migration
 subjects_filter:
 - CI-CD: 12
-- Containers and Virtualization: 37
-- Databases: 22
+- Containers and Virtualization: 36
+- Databases: 23
 - Libraries: 9
 - ML: 36
-- Performance and Architecture: 79
+- Performance and Architecture: 80
 - Storage: 2
 - Web: 17
 subtitle: Optimize cloud native apps on Arm for performance and cost
@@ -73,8 +73,9 @@ tools_software_languages_filter:
 - BOLT: 2
 - bpftool: 1
 - Buildkite: 1
-- C: 11
+- C: 12
 - C#: 2
+- C++: 1
 - Capstone: 1
 - Cargo: 1
 - cassandra-stress: 1
@@ -95,7 +96,7 @@ tools_software_languages_filter:
 - Daytona: 1
 - Demo: 3
 - Django: 2
-- Docker: 29
+- Docker: 30
 - Docker Buildx: 1
 - Envoy: 3
 - Erlang: 1
@@ -118,12 +119,12 @@ tools_software_languages_filter:
 - GitHub CLI: 1
 - GitHub Copilot: 1
 - GitLab: 2
-- GKE: 1
+- GKE: 2
 - glibc: 1
 - Go: 4
 - Golang: 1
 - Google Axion: 3
-- Google Benchmark: 1
+- Google Benchmark: 2
 - Google Cloud: 2
 - Google Cloud Pub/Sub: 1
 - Google Dataflow: 1
@@ -148,7 +149,7 @@ tools_software_languages_filter:
 - KEDA: 1
 - Kedify: 1
 - Keras: 2
-- KinD: 2
+- KinD: 1
 - kube-bench: 1
 - kubectl: 1
 - Kubernetes: 15
@@ -176,8 +177,8 @@ tools_software_languages_filter:
 - NEON: 7
 - Networking: 1
 - Nexmark: 1
+- NGINX: 5
 - nginx: 1
-- NGINX: 4
 - Node.js: 5
 - node.js: 1
 - npm: 3
@@ -190,22 +191,22 @@ tools_software_languages_filter:
 - OpenShift: 1
 - Orchard Core: 1
 - PAPI: 1
-- perf: 6
+- perf: 7
 - Perf: 1
 - PHP: 1
 - PHPBench: 1
 - pika: 1
-- PostgreSQL: 5
+- PostgreSQL: 6
 - Profiling: 1
 - Puppet: 1
-- Python: 38
+- Python: 39
 - Python 3.11: 1
 - PyTorch: 11
 - QEMU: 1
 - RabbitMQ: 1
 - RAG: 1
 - Rails: 1
-- Redis: 4
+- Redis: 5
 - redis-benchmark: 1
 - Remote.It: 2
 - RME: 9
@@ -224,6 +225,7 @@ tools_software_languages_filter:
 - SVE: 5
 - SVE2: 2
 - Sysbench: 1
+- taskset: 1
 - Tekton: 1
 - Telemetry: 1
 - TensorFlow: 3
@@ -252,7 +254,7 @@ tools_software_languages_filter:
 weight: 1
 cloud_service_providers_filter:
 - AWS: 19
-- Google Cloud: 37
+- Google Cloud: 36
 - Microsoft Azure: 19
 - Oracle: 2
 ---
diff --git a/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_functions.png b/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_functions.png
deleted file mode 100644
index 7960beff9e..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_functions.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_output.png b/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_output.png
deleted file mode 100644
index 1b4018939e..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_output.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_prompt.webp b/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_prompt.webp
deleted file mode 100644
index b1fd3062ab..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/ai-agent-on-cpu/test_prompt.webp and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/arm-cpp-memory-model/multi-copy-atomic.png b/content/learning-paths/servers-and-cloud-computing/arm-cpp-memory-model/multi-copy-atomic.png
deleted file mode 100644
index 02517ca016..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/arm-cpp-memory-model/multi-copy-atomic.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/arm-mcp-server/_index.md b/content/learning-paths/servers-and-cloud-computing/arm-mcp-server/_index.md
index 8db58e6c67..05165bd3df 100644
--- a/content/learning-paths/servers-and-cloud-computing/arm-mcp-server/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/arm-mcp-server/_index.md
@@ -1,5 +1,6 @@
 ---
 title: Automate x86-to-Arm application migration using Arm MCP Server
+description: Learn how to automate x86-to-Arm application migration using the Arm MCP Server, with AI-assisted compatibility checks, C++ code refactoring, and Docker-based validation on Arm cloud platforms.
 
 minutes_to_complete: 20
 
diff --git a/content/learning-paths/servers-and-cloud-computing/arm_linux_page_size/images/pte.png b/content/learning-paths/servers-and-cloud-computing/arm_linux_page_size/images/pte.png
deleted file mode 100644
index 21ba1088a3..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/arm_linux_page_size/images/pte.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/cca-kata/_index.md b/content/learning-paths/servers-and-cloud-computing/cca-kata/_index.md
index 62768c26d6..af0317ae1e 100644
--- a/content/learning-paths/servers-and-cloud-computing/cca-kata/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/cca-kata/_index.md
@@ -1,22 +1,18 @@
 ---
-title: Run Confidential Containers with Encrypted Images using Arm CCA and Trustee
+title: Run Confidential Containers with encrypted images using Arm CCA and Trustee
 
-draft: true
-cascade:
-    draft: true
-    
 minutes_to_complete: 60
 
-who_is_this_for: This Learning Path is for software developers who want to understand how Confidential Containers can be run in Arm CCA Realm.
+who_is_this_for: This Learning Path is for developers who want to understand how Confidential Containers run in Arm CCA Realms.
 
 learning_objectives:
-  - Gain an overview of Confidential Containers and their role in confidential computing.
-  - Understand how Trustee services are used with Arm CCA attestation to authorize and unlock confidential workloads.
-  - Deploy a Confidential Container from an encrypted image inside an Arm CCA Realm using an Armv9-A AEM Base Fixed Virtual Platform (FVP) with RME support.
+  - Gain an overview of Confidential Containers and their role in confidential computing
+  - Understand how Trustee services are used with Arm CCA attestation to authorize and unlock confidential workloads
+  - Deploy a Confidential Container from an encrypted image inside an Arm CCA Realm using an Armv9-A AEM Base Fixed Virtual Platform (FVP) with RME support
 
 prerequisites:
-  - An AArch64 or x86_64 computer running Linux or macOS. Cloud-based instances can also be used; see the [Arm cloud service providers](/learning-paths/servers-and-cloud-computing/csp/).
-  - Completion of the ["Run an end-to-end Attestation with Arm CCA and Trustee"](/learning-paths/servers-and-cloud-computing/cca-trustee) Learning Path
+  - An AArch64 or x86_64 computer running Linux or macOS. Cloud-based instances can also be used; see the [Arm cloud service providers](/learning-paths/servers-and-cloud-computing/csp/)
+  - Completion of the [Run an end-to-end Attestation with Arm CCA and Trustee](/learning-paths/servers-and-cloud-computing/cca-trustee) Learning Path
 
 author:
   - Anton Antonov
diff --git a/content/learning-paths/servers-and-cloud-computing/cca-kata/cca-kata.md b/content/learning-paths/servers-and-cloud-computing/cca-kata/cca-kata.md
index bf47ba9fb7..40e09b0498 100644
--- a/content/learning-paths/servers-and-cloud-computing/cca-kata/cca-kata.md
+++ b/content/learning-paths/servers-and-cloud-computing/cca-kata/cca-kata.md
@@ -8,19 +8,13 @@ weight: 2 # 1 is first, 2 is second, etc.
 layout: "learningpathall"
 ---
 
-
 ## Confidential Containers
 
-["Confidential Containers"](https://github.com/confidential-containers/confidential-containers) is an open-source community
-project focused on enabling cloud native confidential computing by leveraging Trusted Execution Environments (TEEs) to protect container workloads and the data they process.
+[Confidential Containers](https://github.com/confidential-containers/confidential-containers) is an open-source community project focused on enabling cloud-native confidential computing by using Trusted Execution Environments (TEEs) to protect container workloads and the data they process.
 
 ## Design overview
 
-Confidential computing systems are typically defined by what runs inside the Trusted Execution Environment (TEE) and what remains outside of it.
-In the Confidential Containers architecture, the TEE contains:
-  * The workload pod
-  * Helper processes and daemons required to support the workload pod.
-Everything else outside of the TEE, including the hypervisor, other pods, and the control plane, is considered untrusted. This trust boundary is fundamental to how Confidential Containers protect workload confidentiality.
+Confidential computing systems are defined by what runs inside the Trusted Execution Environment (TEE) and what remains outside it. In the Confidential Containers architecture, the TEE contains the workload pod and helper processes required to support it. Everything outside the TEE, including the hypervisor, other pods, and the control plane, is considered untrusted. This trust boundary is fundamental to how Confidential Containers protect workload confidentiality.
 
 ### Kata Containers
 
@@ -35,36 +29,29 @@ To enable this, additional components such as **image-rs** are included in the g
 
 On the host side, a **nydus snapshotter** intercepts the image pull process and redirects control to **image-rs** running inside the guest.
 The diagram below illustrates the interaction between **containerd**, the **nydus snapshotter**, and **image-rs**.
-![Image pulling alt-text#center](image_pulling.png "Image pulling")
+![Diagram showing the image pulling flow: containerd on the host communicates with nydus snapshotter, which redirects the pull request to image-rs running inside the guest VM to retrieve and unpack the container image securely alt-txt#center](image_pulling.png "Image pulling flow")
 
 ### Attestation
 
-Confidential Containers also include components that enable attestation, which is a core requirement for confidential computing.
-Many guest operations depend on attestation. For example, before an encrypted container image can be unpacked, the guest must prove its identity and integrity in order to retrieve the decryption key.
-Inside the guest the Confidential Data Hub (CDH) and Attestation Agent (AA) manage attestation flows and secret handling.
-Like image pulling components, these services extend beyond traditional Kata deployments and are part of the Confidential Containers ["guest components"](https://github.com/confidential-containers/guest-components) repository.
+Confidential Containers include components that enable attestation, a core requirement for confidential computing. Many guest operations depend on attestation. For example, before an encrypted container image can be unpacked, the guest must prove its identity and integrity to retrieve the decryption key.
+
+Inside the guest, the Confidential Data Hub (CDH) and Attestation Agent (AA) manage attestation flows and secret handling. Like image pulling components, these services extend beyond traditional Kata deployments and are part of the Confidential Containers [guest components](https://github.com/confidential-containers/guest-components) repository.
 
-The CDH and AA communicate with an external trusted service using the Key Broker Service (KBS) protocol.
-Confidential Containers provide [Trustee](https://github.com/confidential-containers/trustee) as an attestation service and key management engine that:
-   * Verifies the guest Trusted Computing Base (TCB)
-   * Evaluates attestation evidence
-   * Releases secrets only when policy requirements are met
+The CDH and AA communicate with an external trusted service using the Key Broker Service (KBS) protocol. Confidential Containers provide [Trustee](https://github.com/confidential-containers/trustee) as an attestation service and key management engine that verifies the guest Trusted Computing Base (TCB), evaluates attestation evidence, and releases secrets only when policy requirements are met.
 
 The diagram below shows a simplified view of the attestation flow.
-![Attestation alt-text#center](attestation.png "Attestation")
+![Diagram showing the attestation flow: the Attestation Agent inside the guest VM communicates with Trustee services (KBS, AS, RVPS) on the host to verify the realm's identity and retrieve secrets like decryption keys alt-txt#center](attestation.png "Attestation flow")
 
 In this Learning Path, attestation is used to obtain the encryption key required to decrypt a container image.
 Learn more about how Trustee services are used to evaluate the trustworthiness of a CCA Realm and how attestation policy gates secrets release in the
 ["Run an end-to-end Attestation with Arm CCA and Trustee"](/learning-paths/servers-and-cloud-computing/cca-trustee) Learning Path.
 
-### Full Architecture Overview
-
-By combining, Kata Containers, Guest-side image pulling and attestation and secret management, you arrive at the complete Confidential Containers architecture shown below:
+### Full architecture overview
 
-![Confidential Containers alt-text#center](confidential_containers.png "Confidential Containers")
+By combining Kata Containers, guest-side image pulling, and attestation and secret management, you arrive at the complete Confidential Containers architecture shown below:
 
+![Complete Confidential Containers architecture diagram showing the host environment with containerd, nydus snapshotter, and Trustee services, plus the guest VM running inside an Arm CCA Realm with the Attestation Agent, CDH, and image-rs components that enable secure container deployment alt-txt#center](confidential_containers.png "Complete Confidential Containers architecture")
 
-For convenience, both the Confidential Containers software stack and Trustee services are packaged as Docker containers. These can be run on any suitable AArch64 or x86_64 development host.
-Because the confidential workload itself runs inside an Arm CCA Realm, this Learning Path uses the Arm Fixed Virtual Platform (FVP) along with the Arm CCA reference software stack to provide the required environment.
+Both the Confidential Containers software stack and Trustee services are packaged as Docker containers. These can run on any suitable AArch64 or x86_64 development host. Because the confidential workload runs inside an Arm CCA Realm, this Learning Path uses the Arm Fixed Virtual Platform (FVP) along with the Arm CCA reference software stack to provide the required environment.
 
-Proceed to the next section to run a confidential container using the components and architecture described here.
+In the next section, you'll run a confidential container using the components and architecture described here.
diff --git a/content/learning-paths/servers-and-cloud-computing/cca-kata/flow.md b/content/learning-paths/servers-and-cloud-computing/cca-kata/flow.md
index 32b141feec..b455405bc6 100644
--- a/content/learning-paths/servers-and-cloud-computing/cca-kata/flow.md
+++ b/content/learning-paths/servers-and-cloud-computing/cca-kata/flow.md
@@ -1,6 +1,6 @@
 ---
 # User change
-title: Run confidential containers with encrypted images using Arm CCA and Trustee
+title: Run a confidential container with an encrypted image
 
 weight: 3 # 1 is first, 2 is second, etc.
 
diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/roles.png b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/roles.png
deleted file mode 100644
index 3e88f8ab66..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/roles.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/roles.webp b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/roles.webp
new file mode 100644
index 0000000000..446d73dc22
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/roles.webp differ
diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/pub_sub_creation.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/pub_sub_creation.md
index fb2d1684ca..eb4ba49802 100644
--- a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/pub_sub_creation.md
+++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/pub_sub_creation.md
@@ -88,7 +88,7 @@ Grant the following roles to the **Compute Engine default service account**:
 - Assign the roles listed above
 - Save
 
-![Google Cloud Console IAM page displaying assigned roles for Compute Engine service account including Dataflow and Pub/Sub permissions alt-txt#center](images/roles.png "Required IAM Roles")
+![Google Cloud Console IAM page displaying assigned roles for Compute Engine service account including Dataflow and Pub/Sub permissions alt-txt#center](images/roles.webp "Required IAM Roles")
 
 VM OAuth scopes are limited by default. IAM roles are authoritative.
 
diff --git a/content/learning-paths/servers-and-cloud-computing/copilot-extension/images/githubconfig-makepublic.png b/content/learning-paths/servers-and-cloud-computing/copilot-extension/images/githubconfig-makepublic.png
deleted file mode 100644
index ff10cb5c7d..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/copilot-extension/images/githubconfig-makepublic.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/aws.md b/content/learning-paths/servers-and-cloud-computing/csp/aws.md
index 83d3d3fcdb..1b1df45548 100644
--- a/content/learning-paths/servers-and-cloud-computing/csp/aws.md
+++ b/content/learning-paths/servers-and-cloud-computing/csp/aws.md
@@ -58,7 +58,7 @@ For now, select `Ubuntu` images from the `Quick Start` list of available images,
 
 In the `Architecture` pull-down menu, select `64-bit (Arm)` to ensure an Arm-based instance type is used.
 
-![alt-text #center](./images/ArmArch.png "Select '64-bit (Arm)' Architecture")
+![alt-text #center](./images/armarch.png "Select '64-bit (Arm)' Architecture")
 
 ### Select instance type
 
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/azure.md b/content/learning-paths/servers-and-cloud-computing/csp/azure.md
index 2efeebe30d..a88622e533 100644
--- a/content/learning-paths/servers-and-cloud-computing/csp/azure.md
+++ b/content/learning-paths/servers-and-cloud-computing/csp/azure.md
@@ -59,13 +59,13 @@ This is the location of the server where your VM will reside. While it is genera
 
 These are reliability and security settings. They can generally be left as default.
 
-![az5 #center](images/AzureAvailability.png "Select Availability options and Security type if necessary")
+![az5 #center](images/azureavailability.png "Select Availability options and Security type if necessary")
 
 ### Image
 
 This is the operating system that will run on your VM. Select the appropriate one from the pull-down. Some will have additional pricing associated with them. 
 
-![az6 #center](images/AzureImage.png "Select an image available for Arm VMs")
+![az6 #center](images/azureimage.png "Select an image available for Arm VMs")
 
 You can then select a particular version of your preferred OS from the `Select` pull-down of that OS tab.
 
@@ -99,7 +99,7 @@ This section defines how users [connect](https://learn.microsoft.com/en-us/azure
 
  `SSH public key` is the most common and recommended choice.
 
-![alt-text #center](images/AzureKey.png "Select the 'SSH public key' authentication type")
+![alt-text #center](images/azurekey.png "Select the 'SSH public key' authentication type")
 
  ### Username
 
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/ArmArch.png b/content/learning-paths/servers-and-cloud-computing/csp/images/ArmArch.png
deleted file mode 100644
index 7cb22107b5..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/ArmArch.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/AzureAvailability.png b/content/learning-paths/servers-and-cloud-computing/csp/images/AzureAvailability.png
deleted file mode 100644
index f23175e2a2..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/AzureAvailability.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/AzureImage.png b/content/learning-paths/servers-and-cloud-computing/csp/images/AzureImage.png
deleted file mode 100644
index f746a3cc61..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/AzureImage.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/AzureKey.png b/content/learning-paths/servers-and-cloud-computing/csp/images/AzureKey.png
deleted file mode 100644
index 9310d50bc9..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/AzureKey.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/axion-instance.png b/content/learning-paths/servers-and-cloud-computing/csp/images/axion-instance.png
deleted file mode 100644
index 99efa10bc8..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/axion-instance.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/axion-series.webp b/content/learning-paths/servers-and-cloud-computing/csp/images/axion-series.webp
deleted file mode 100644
index 722507a947..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/axion-series.webp and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/csp/images/gcp_instance.png b/content/learning-paths/servers-and-cloud-computing/csp/images/gcp_instance.png
deleted file mode 100644
index 52c01949a0..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/csp/images/gcp_instance.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/EBS.png b/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/EBS.png
deleted file mode 100644
index 13f8422e2e..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/EBS.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/IOPS.png b/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/IOPS.png
deleted file mode 100644
index 37d9d49bde..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/IOPS.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/introduction.md b/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/introduction.md
index 7eb99aa93d..1f96f43e61 100644
--- a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/introduction.md
+++ b/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/introduction.md
@@ -45,7 +45,7 @@ This architecture has the following benefits:
 
 IOPS measures how many random read/write requests your storage system can perform per second. It depends on the block size or device type. For example, AWS does not show IOPS values for traditional HDD volumes, as shown in the image below:
 
-![iops_hdd alt-text#center](./IOPS.png "Example where IOPS values are not provided.")
+![iops_hdd alt-text#center](./iops.png "Example where IOPS values are not provided.")
 
 #### Throughput and bandwidth
 
diff --git a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/using-fio.md b/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/using-fio.md
index 76c18c664e..6b94134037 100644
--- a/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/using-fio.md
+++ b/content/learning-paths/servers-and-cloud-computing/disk-io-benchmark/using-fio.md
@@ -32,7 +32,7 @@ To add the required EBS volumes to your EC2 instance:
 
 Both block devices have the same 8 GiB capacity, but the `io2` is optimized for throughput, while `gp2` is general-purpose. 
 
-![EBS alt-text#center](./EBS.png "Multi-volume storage information.")
+![EBS alt-text#center](./ebs.png "Multi-volume storage information.")
 
 In this section, you’ll measure real-world performance to help guide your storage selection.
 
diff --git a/content/learning-paths/servers-and-cloud-computing/distributed-inference-with-llama-cpp/_index.md b/content/learning-paths/servers-and-cloud-computing/distributed-inference-with-llama-cpp/_index.md
index c66f54ae2f..731c9dd421 100644
--- a/content/learning-paths/servers-and-cloud-computing/distributed-inference-with-llama-cpp/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/distributed-inference-with-llama-cpp/_index.md
@@ -1,5 +1,6 @@
 ---
 title: Run distributed inference with llama.cpp on Arm-based AWS Graviton4 instances
+description: Run distributed LLM inference with llama.cpp across multiple AWS Graviton4 instances, covering multi-node setup, coordination, and performance trade-offs.
 
 minutes_to_complete: 30
 
diff --git a/content/learning-paths/servers-and-cloud-computing/fexpa/_index.md b/content/learning-paths/servers-and-cloud-computing/fexpa/_index.md
index 8a4bb96348..2103975ffd 100644
--- a/content/learning-paths/servers-and-cloud-computing/fexpa/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/fexpa/_index.md
@@ -1,21 +1,18 @@
 ---
-title: Accelerate the exponential function
+title: Optimize exponential functions with FEXPA
 
-draft: true
-cascade:
-    draft: true
     
 minutes_to_complete: 15
 
-who_is_this_for: This is an introductory topic for developers interested in implementing the exponential function and optimizing it. The Scalable Vector Extension (SVE), introduced with the Armv8-A architecture, includes a dedicated instruction, FEXPA. Although initially not supported in SME, the FEXPA instruction has been made available in Scalable Matrix Extension (SME) version 2.2.
+who_is_this_for: This is an introductory topic for developers interested in accelerating exponential function computations using Arm's Scalable Vector Extension (SVE). The FEXPA instruction provides hardware acceleration for exponential calculations on Arm Neoverse processors.
 
 learning_objectives: 
     - Implement the exponential function using SVE intrinsics
     - Optimize the function with FEXPA
 
 prerequisites:
-    - Access to an [AWS Graviton4, Google Axion, or Azure Cobalt 100 virtual machine from a cloud service provider](/learning-paths/servers-and-cloud-computing/csp/).
-    - Some familiarity with SIMD programming and SVE intrinsics.
+    - Access to an [AWS Graviton4, Google Axion, or Azure Cobalt 100 virtual machine from a cloud service provider](/learning-paths/servers-and-cloud-computing/csp/)
+    - Some familiarity with SIMD programming and SVE intrinsics
 
 author: 
 - Arnaud Grasset
@@ -54,3 +51,4 @@ weight: 1                       # _index.md always has weight of 1 to order corr
 layout: "learningpathall"       # All files under learning paths have this same wrapper
 learning_path_main_page: "yes"  # This should be surfaced when looking for related content. Only set for _index.md of learning path content.
 ---
+
diff --git a/content/learning-paths/servers-and-cloud-computing/fexpa/conclusion.md b/content/learning-paths/servers-and-cloud-computing/fexpa/conclusion.md
index 9f62a198aa..25f5f3533e 100644
--- a/content/learning-paths/servers-and-cloud-computing/fexpa/conclusion.md
+++ b/content/learning-paths/servers-and-cloud-computing/fexpa/conclusion.md
@@ -1,18 +1,19 @@
 ---
-title: Conclusion
+title: Review benefits and next steps
 weight: 5
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
 ---
 
-## Conclusion
-The SVE FEXPA instruction can speed-up the computation of the exponential functions by implementing table lookup and bit manipulation. The exponential function is the core of the Softmax function that, with the shift toward Generative AI, has become a critical component of modern neural network architectures.
+## Summary
 
-An implementation of the exponential function based on FEXPA can achieve a specified target precision using a polynomial of lower degree than that required by alternative implementations. Moreover, SME support for FEXPA lets you embed the exponential approximation directly into the matrix computation path and that translates into:
+The SVE FEXPA instruction speeds up the computation of exponential functions by implementing table lookup and bit manipulation. The exponential function is the core of the Softmax function that, with the shift toward Generative AI, has become a critical component of modern neural network architectures.
+
+An implementation of the exponential function based on FEXPA can achieve a specified target precision using a polynomial of lower degree than alternative implementations. SME support for FEXPA lets you embed the exponential approximation directly into the matrix computation path, which translates into:
 - Fewer instructions (no back-and-forth to scalar/SVE code)
 - Potentially higher aggregate throughput (more exponentials per cycle)
 - Lower power & bandwidth (data being kept in the SME engine)
 - Cleaner fusion with GEMM/GEMV workloads
 
-All of which makes all exponential heavy workloads significantly faster on ARM CPUs.
+These improvements make exponential-heavy workloads significantly faster on Arm CPUs.
diff --git a/content/learning-paths/servers-and-cloud-computing/fexpa/fexpa.md b/content/learning-paths/servers-and-cloud-computing/fexpa/fexpa.md
index 52d7531a47..753d94cddb 100644
--- a/content/learning-paths/servers-and-cloud-computing/fexpa/fexpa.md
+++ b/content/learning-paths/servers-and-cloud-computing/fexpa/fexpa.md
@@ -1,5 +1,5 @@
 ---
-title: FEXPA
+title: Optimize with FEXPA instruction
 weight: 4
 
 ### FIXED, DO NOT MODIFY
@@ -8,9 +8,9 @@ layout: learningpathall
 
 ## The FEXPA instruction
 
-Arm introduced in SVE an instruction called FEXPA: the Floating Point Exponential Accelerator. 
+Arm introduced an instruction in SVE called FEXPA: the Floating Point Exponential Accelerator. 
 
-Let’s segment the IEEE 754 floating-point representation fraction part into several sub-fields (Index, Exp and Remaining bits) with respective length of _Idxb_, _Expb_ and _Remb_ bits.
+The IEEE 754 floating-point representation fraction part can be segmented into several sub-fields (Index, Exp and Remaining bits) with respective length of _Idxb_, _Expb_ and _Remb_ bits.
 
 | IEEE 754 precision       | Idxb | Expb | Remb |
 |-------------------------|------|------|------|
@@ -46,7 +46,7 @@ With a table of size 2^L, the evaluation interval for the approximation polynomi
 
 ## Exponential implementation with FEXPA
 
-FEXPA can be used to rapidly perform the table lookup. With this instruction a degree-2 polynomial is sufficient to obtain the same accuracy as the degree-4 polynomial implementation from the previous section.
+Use FEXPA to rapidly perform the table lookup. With this instruction, a degree-2 polynomial is sufficient to obtain the same accuracy as the degree-4 polynomial implementation from the previous section.
 
 ### Add the FEXPA implementation
 
@@ -93,7 +93,7 @@ void exp_sve_fexpa(float *x, float *y, size_t n) {
 ```
 
 {{% notice Arm Optimized Routines %}}
-This implementation can be found in [ARM Optimized Routines](https://github.com/ARM-software/optimized-routines/blob/ba35b32/math/aarch64/sve/sv_expf_inline.h).
+This implementation can be found in [Arm Optimized Routines](https://github.com/ARM-software/optimized-routines/blob/ba35b32/math/aarch64/sve/sv_expf_inline.h).
 {{% /notice %}}
 
 
@@ -146,11 +146,20 @@ SVE+FEXPA (degree-2)          0.000414            5.95x
 
 The benchmark shows the performance progression:
 
-1. **SVE with degree-4 polynomial**: Provides up to 4x speedup through vectorization
-2. **SVE with FEXPA and degree-2 polynomial**: Achieves an additional 1-2x improvement
+- SVE with degree-4 polynomial provides up to 4x speedup through vectorization
+- SVE with FEXPA and degree-2 polynomial achieves an additional 1-2x improvement
 
 The FEXPA instruction delivers this improvement by:
 - Replacing manual bit manipulation with a single hardware instruction (`svexpa()`)
 - Enabling a simpler polynomial (degree-2 instead of degree-4) while maintaining accuracy
 
 Both SVE implementations maintain comparable accuracy (errors in the 10^-9 to 10^-10 range), demonstrating that specialized hardware instructions can significantly improve performance without sacrificing precision.
+
+## What you've accomplished and what's next
+
+In this section, you:
+- Implemented exponential function optimization using the FEXPA instruction
+- Reduced polynomial degree from four to two while maintaining accuracy
+- Achieved up to 6x speedup over the baseline implementation
+
+Next, you'll review the key benefits and applications of FEXPA optimization.
diff --git a/content/learning-paths/servers-and-cloud-computing/fexpa/implementation.md b/content/learning-paths/servers-and-cloud-computing/fexpa/implementation.md
index 725c98c78b..08eab4ce6b 100644
--- a/content/learning-paths/servers-and-cloud-computing/fexpa/implementation.md
+++ b/content/learning-paths/servers-and-cloud-computing/fexpa/implementation.md
@@ -1,5 +1,5 @@
 ---
-title: First implementation
+title: Implement exponential with SVE intrinsics
 weight: 3
 
 ### FIXED, DO NOT MODIFY
@@ -8,11 +8,11 @@ layout: learningpathall
 
 ## Implement the exponential function
 
-Based on the theory covered in the previous section, you can implement the exponential function using SVE intrinsics with polynomial approximation. This Learning Path was tested using a AWS Graviton4 instance type `r8g.medium`.
+Based on the theory covered in the previous section, implement the exponential function using SVE intrinsics with polynomial approximation. This Learning Path was tested using an AWS Graviton4 instance type `r8g.medium`.
 
 ## Set up your environment
 
-To run the example, you will need `gcc`. 
+To run the example, you need `gcc`. 
 
 ```bash
 sudo apt update
@@ -230,4 +230,11 @@ The benchmark demonstrates the performance benefit of using SVE intrinsics for v
 
 The accuracy check confirms that the polynomial approximation maintains high precision, with errors typically in the range of 10^-9 to 10^-10 for single-precision floating-point values.
 
-Continue to the next section to dive into the FEXPA intrinsic implementation, providing further performance uplifts.
\ No newline at end of file
+## What you've accomplished and what's next
+
+In this section, you:
+- Implemented a vectorized exponential function using SVE intrinsics
+- Applied range reduction and polynomial approximation techniques
+- Achieved up to 4x speedup over the scalar baseline
+
+Next, you'll optimize further using the FEXPA instruction for additional performance gains.
diff --git a/content/learning-paths/servers-and-cloud-computing/fexpa/theory.md b/content/learning-paths/servers-and-cloud-computing/fexpa/theory.md
index 269aab1bd3..de558e72ff 100644
--- a/content/learning-paths/servers-and-cloud-computing/fexpa/theory.md
+++ b/content/learning-paths/servers-and-cloud-computing/fexpa/theory.md
@@ -1,5 +1,5 @@
 ---
-title: Theory
+title: Learn exponential function optimization techniques
 weight: 2
 
 ### FIXED, DO NOT MODIFY
@@ -7,10 +7,10 @@ layout: learningpathall
 ---
 
 ## The exponential function
-The exponential function is a fundamental mathematical function used across a wide range of algorithms for signal processing, High-Performance Computing and Machine Learning. Optimizing its computation has been the subject of extensive research for decades. The precision of the computation depends both on the selected approximation method and on the inherent rounding errors associated with finite-precision arithmetic, and it is directly traded off against performance when implementing the exponential function. 
+The exponential function is a fundamental mathematical function used across a wide range of algorithms for signal processing, High-Performance Computing and Machine Learning. Researchers have extensively studied optimizing its computation for decades. The precision of the computation depends both on the selected approximation method and on the inherent rounding errors associated with finite-precision arithmetic, and it is directly traded off against performance when implementing the exponential function. 
 
 ## Range reduction
-Polynomial approximations are among the most widely used methods for software implementations of the exponential function. The accuracy of a Taylor series approximation for exponential function can be improved with the polynomial’s degree but will always deteriorate as the evaluation point moves further from the expansion point. By applying range reduction techniques, the approximation of the exponential function can however be restricted to a very narrow interval where the function is well-conditioned. This approach consists in reformulating the exponential function in the following way:
+Polynomial approximations are among the most widely used methods for software implementations of the exponential function. The accuracy of a Taylor series approximation for exponential function can be improved with the polynomial's degree but deteriorates as the evaluation point moves further from the expansion point. By applying range reduction techniques, you can restrict the approximation of the exponential function to a very narrow interval where the function is well-conditioned. This approach reformulates the exponential function in the following way:
 
 $$e^x=e^{k×ln2+r}=2^k \times e^r$$
 
@@ -22,16 +22,16 @@ Since k is an integer, the evaluation of 2^k can be efficiently performed using
 
 $$e^x \approx 2^k \times p(r)$$
 
-It is important to note that the polynomial p(r) is evaluated exclusively over the interval [-ln2/2, +ln2/2]. So, the computational complexity can be optimized by selecting the polynomial degree based on the required precision of p(r) within this narrow range. Rather than relying on a Taylor polynomial, a minimax polynomial approximation can be used to minimize the maximum approximation error over the considered interval.
+The polynomial p(r) is evaluated exclusively over the interval [-ln2/2, +ln2/2]. So, the computational complexity can be optimized by selecting the polynomial degree based on the required precision of p(r) within this narrow range. Rather than relying on a Taylor polynomial, a minimax polynomial approximation can be used to minimize the maximum approximation error over the considered interval.
 
 ## Decomposition of the input
-The decomposition of an input value as x = k × ln2 + r can be done in 2 steps:
+Decompose an input value as x = k × ln2 + r in two steps:
 - Compute k as: k = round(x⁄ln2), where round(.) is the round-to-nearest function
 - Compute r as: r = x - k × ln2
 
-Rounding of k is performed by adding an adequately chosen large number to a floating-point value and subtracting it just afterward (the original value is rounded due to the finite precision of floating-point representation). Although explicit rounding instructions are available in both SVE and SME, this method remains advantageous as the addition of the constant can be fused with the multiplication by the reciprocal of ln2. This approach assumes however that the floating-point rounding mode is set to round-to-nearest, which is the default mode in Armv9-A. By integrating the bias into the constant, 2^k can also be directly computed by shifting the intermediate value.
+Rounding of k is performed by adding an adequately chosen large number to a floating-point value and subtracting it just afterward (the original value is rounded because of the finite precision of floating-point representation). Although explicit rounding instructions are available in both SVE and SME, this method remains advantageous because the addition of the constant can be fused with the multiplication by the reciprocal of ln2. This approach assumes that the floating-point rounding mode is set to round-to-nearest, which is the default mode in Armv9-A. By integrating the bias into the constant, 2^k can also be directly computed by shifting the intermediate value.
 
-Rounding error during the second step will introduce a global error as we will have:
+A rounding error during the second step introduces a global error as you have:
 
 $$ x \approx k \times ln2 + r $$
 
@@ -44,15 +44,15 @@ $$ (-1)^s \times 2^{(exponent - bias)} \times (1.fraction)_2 $$
 
 where s is the sign bit and 1.fraction represents the significand.
 
-The value 2^k can be encoded by setting both the sign and fraction bits to zero and assigning the exponent field the value k + bias. If k is an 8-bits integer, 2^k can be efficiently computed by adding the bias value and positioning the result into the exponent bits of a 32-bit floating-point number using a logical shift.
+The value 2^k can be encoded by setting both the sign and fraction bits to zero and assigning the exponent field the value k + bias. If k is an 8-bit integer, 2^k can be efficiently computed by adding the bias value and positioning the result into the exponent bits of a 32-bit floating-point number using a logical shift.
 
-Taking this approach a step further, a fast approximation of exponential function can be achieved using bits manipulation techniques alone. Specifically, adding a bias to an integer k and shifting the result into the exponent field can be accomplished by computing an integer i as follows:  
+Taking this approach a step further, you can achieve a fast approximation of exponential function using bit manipulation techniques alone. Specifically, adding a bias to an integer k and shifting the result into the exponent field can be accomplished by computing an integer i as follows:  
 
 $$i=2^{23} \times (k+bias) = 2^{23} \times k+2^{23} \times bias$$
 
 This formulation assumes a 23-bit significand, but the method can be generalized to other floating-point precisions.
 
-Now, consider the case where k is a real number. The fractional part of k will propagate into the significand bits of the resulting 2^k approximation. However, this side effect is not detrimental, it effectively acts as a form of linear interpolation, thereby improving the overall accuracy of the approximation. To approximate the exponential function, the following identity can be used:
+Now, consider the case where k is a real number. The fractional part of k propagates into the significand bits of the resulting 2^k approximation. However, this side effect isn't detrimental; it effectively acts as a form of linear interpolation, thereby improving the overall accuracy of the approximation. To approximate the exponential function, use the following identity:
 
 $$e^x = 2^{x⁄ln2}$$
 
@@ -60,4 +60,11 @@ As previously discussed, this value can be approximated by computing a 32-bit in
 
 $$i = 2^{23} \times x⁄ln2 + 2^{23} \times bias = a \times x + b $$
 
-Continue to the next section to make a C-based implementation of the exponential function.
\ No newline at end of file
+## What you've accomplished and what's next
+
+In this section, you learned the mathematical foundations for optimizing exponential functions:
+- Range reduction techniques that narrow the evaluation interval
+- How to decompose inputs using k × ln2 + r reformulation
+- Bit manipulation techniques for computing scaling factors
+
+Next, you'll implement these concepts in C using SVE intrinsics.
diff --git a/content/learning-paths/servers-and-cloud-computing/gcc-lto/_index.md b/content/learning-paths/servers-and-cloud-computing/gcc-lto/_index.md
index a459bfb1de..9b864e9b87 100644
--- a/content/learning-paths/servers-and-cloud-computing/gcc-lto/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/gcc-lto/_index.md
@@ -7,15 +7,16 @@ cascade:
     
 minutes_to_complete: 10
 
-who_is_this_for: This is an introductory topic for developers wishing to optimize code performance via link-time optimization using the GCC toolchain.
+who_is_this_for: This is an introductory topic intended for developers who want to learn about improving application performance
+  using link-time optimization (LTO) with the GCC toolchain.
 
 learning_objectives:
-    - Understand the key concepts behind LTO
-    - Understand how to employ the optimization in GCC
-    - Develop some intuition as to the potential performance gains achievable
+    - Understand the key concepts behind link-time optimization (LTO)
+    - Learn how to enable and use LTO with the GCC compiler
+    - Develop intuition about the types of performance improvements LTO can provide
 
 prerequisites:
-    - A recent release of the GCC toolchain
+    - A recent version of the GCC toolchain
 
 author: Victor Do Nascimento
 
@@ -32,11 +33,11 @@ operatingsystems:
 
 further_reading:
     - resource:
-        title: GCC Wiki
+        title: GCC Wiki Link-Time Optimization
         link: https://gcc.gnu.org/wiki/LinkTimeOptimization
         type: website
     - resource:
-        title: Gentoo Wiki
+        title: Gentoo Wiki LTO
         link: https://wiki.gentoo.org/wiki/LTO
         type: website
 
diff --git a/content/learning-paths/servers-and-cloud-computing/gcc-lto/background.md b/content/learning-paths/servers-and-cloud-computing/gcc-lto/background.md
index b23b1b094d..e49b5cbbc6 100644
--- a/content/learning-paths/servers-and-cloud-computing/gcc-lto/background.md
+++ b/content/learning-paths/servers-and-cloud-computing/gcc-lto/background.md
@@ -9,21 +9,21 @@ layout: learningpathall
 ## A Brief Introduction to Link-Time Optimization
 
 ### Optimizations and Their Scope of Operation
-Different optimizations carried out by the compiler may be categorized by the scope within which they operate.
+Compiler optimizations can be categorized by the scope of code they are able to analyze and transform.
 
-Some optimizations such as dead code elimination operate within the limited scope of a single function.  If some defined variable is known not to be used within its scope, it can be thrown away without knowledge of what the program does outside of its scope.
+Some optimizations operate entirely within the scope of a single function. For example, dead code elimination can remove variables or instructions that are unused within a function body, without requiring any knowledge of how the rest of the program behaves.
 
-Others, however, require knowledge of the rest of the code.  A function known to be called with a constant as one if its parameters is likely to benefit from inter-procedural constant propagation, for example.  Any such optimization must, however, be conservative.
+Other optimizations require visibility beyond a single function. For instance, if a function is consistently called with a constant value for one of its parameters, the compiler may be able to apply interprocedural constant propagation. Because such optimizations depend on assumptions about how code is used elsewhere, the compiler must apply them conservatively.
 
-A function not visible outside of the file in which it is defined - its translational unit - will have enough information by default for the compiler to make such decisions.  For a function exposed in a dynamically-linked library, on the other hand, it is impossible to make compile-time conclusions such as those required for inter-procedural constant propagation. Consequently, such optimizations cannot be made.
+Functions that are not visible outside the translation unit in which they are defined (for example, those declared static) give the compiler enough information to make these decisions at compile time. In contrast, functions that are exported from a shared library may be called from unknown code, making it impossible for the compiler to safely apply many interprocedural optimizations during compilation.
 
-Between these two extremes lie functions defined for use throughout a program's various components, but whose use will be fully defined once the final executable is generated.
-
-It is for these cases that link-time optimization (LTO) is able to provide executables with maximal performance gains.
+Between these two extremes are functions that are used across multiple translation units within a program, but whose complete usage becomes known only when the final executable is produced. These cases are where link-time optimization (LTO) is most effective, as it allows the compiler to analyze the whole program as a single unit and apply optimizations that would otherwise be unavailable.
 
 ### Link-Time Optimization and Intermediate Code Representation
-Typically, when a translational unit is finished compiling, GCC emits an object file - A binary object containing a largely complete section of executable code minus potentially unresolved symbols, together with data and metadata needed for the final linking.  This having committed to particular instructions and thrown away the compiler's intermediate representation greatly reduces any ability the compiler might have had to further optimize the code when objects are combined into the final executable.
+Under normal compilation, once a translation unit has been processed, GCC emits an object file (.o). This object file contains machine code for the translation unit, along with relocation information, symbol metadata, and data required for linking. At this stage, the compiler has already committed to specific instruction sequences and discarded most of its internal analysis state.
+
+By committing early to machine code, the compiler significantly limits its ability to perform optimizations that depend on cross-file visibility once the linker combines object files into a final executable.
 
-Given the loss of optimization opportunities that comes with the move to a particular assembly sequence, requesting LTO on a single compilation unit causes GCC to alter the format of output following the completion of compilation, retaining an intermediate representation of the code. GCC dumps its internal representation (GIMPLE) to disk as bytecode, so that all the different compilation units that make up a single executable can later be optimized as a single module.
+When link-time optimization(LTO) is enabled, GCC changes this behavior. Instead of discarding its internal representation after compilation, GCC preserves an intermediate representation of the program. Specifically, GCC serializes its GIMPLE intermediate representation into a bytecode format and embeds it into special sections of the object file.
 
-Once all the different LTO-enabled object files have been emitted, link time optimization can be executed. Link time optimization is implemented as a GCC front end for a bytecode representation of GIMPLE that is emitted in special sections of `.o` files.
+At link time, the compiler is invoked again, this time acting as an LTO-aware front end that reads the GIMPLE bytecode from all participating object files. With visibility into the entire program, GCC can perform whole-program optimizations before generating the final machine code.
diff --git a/content/learning-paths/servers-and-cloud-computing/gcc-lto/performance-uplift.md b/content/learning-paths/servers-and-cloud-computing/gcc-lto/performance-uplift.md
index bf9c7a3051..84cbed9451 100644
--- a/content/learning-paths/servers-and-cloud-computing/gcc-lto/performance-uplift.md
+++ b/content/learning-paths/servers-and-cloud-computing/gcc-lto/performance-uplift.md
@@ -1,6 +1,6 @@
 ---
 title: Potential Gains
-weight: 3
+weight: 4
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
@@ -8,15 +8,19 @@ layout: learningpathall
 
 ## Comparing Performance
 
-The potential benefits to be gained from the use of LTO can be highlighted via performance comparison of the Specint2017 benchmark run on a Neoverse V2 CPU, compiled with and without LTO using GCC 15.1.
+The potential benefits of link-time optimization can be illustrated through a performance comparison using the SPEC CPU®2017 integer (SPECint2017) benchmark suite. In this example, benchmarks were compiled with GCC 15.1, with and without LTO, and executed on an Arm Neoverse V2 CPU.
 
-There was an uplift in the geometric mean of scores across different benchmarks, wherein we see an improvement of ~3.4%, with the biggest winners being`gmcf` (+11%), `deepsjeng` (9.9%), `leela` (6.6%).
+Across the benchmark suite, enabling LTO resulted in an improvement in the geometric mean score of approximately 3.4%. Some workloads benefited more noticeably, including:
+  * `gmcf` (+11%)
+  * `deepsjeng` (+9.9%)
+  * `leela` (+6.6%)
+These results highlight how workloads with significant cross-module interaction can benefit from the additional visibility provided by LTO.
 
 ![SPECint LTO performance gains#center](specint_lto_improv.png "Figure 1. Performance uplift to Specint2017")
 
 ### Code-size Considerations
 
-As demonstrated above the overall performance of many executables is greatly improved by the optimization, but this is not the only obeservable gain to be had as a consequence of the optimization.
+While performance improvements are often the primary motivation for enabling LTO, its impact is not limited to execution speed. Link-time optimization can also significantly affect the final code size of an executable.
 
 As shown in figure 2, the use of LTO can have considerable impact on the final code size of the resulting executable.
 
@@ -24,13 +28,15 @@ As shown in figure 2, the use of LTO can have considerable impact on the final c
 
 #### Potential Code Size Reduction
 
-One example where LTO can lead to a decrease in code size is cross-translation-unit dead code elimination, made possible by the global visibility of functions and variables and their uses in an executable. Without link-time information, non-`static` functions and variables are treated conservatively and kept around in the binary, in case of uses at link-time. With LTO, a final decision can be made and unused functions and variables eliminated.
+One common source of code size reduction with LTO is cross-translation-unit dead code elimination. With whole-program visibility, the compiler can determine whether non-static functions or global variables are ever referenced anywhere in the final executable.
+Without LTO, such symbols must be retained conservatively, as they may be referenced by other translation units or during linking. With LTO enabled, the compiler can make a definitive decision and eliminate unused functions and variables, reducing the size of the resulting binary.
 
 #### Potential Code Size Increase
 
-While the this global visibility of the code can often lead to a shrinking of the resulting binary, other choices deemed profitable by the compler can lead to an increase in code size. For example:
-
-- Knowing a loop will execute `n` times in particular instances may lead to more loop unrolling than otherwise.
-- Knowing a function regularly calls another (smaller) function may cause the compiler to inline the callee into the caller's body.
-
-While all these decisions inherently lead to an increase in code size it is worth noting that while, just like inter-procedural constant propagation mentioned earlier, these transformations may be valid and beneficial in 90% of a function's use, we must retain compatibility with the remaining 10% of use cases.  In order for the compiler to optimize functions as per highly-recurrent use cases, it makes clones of the functions it wishes to transform such that the original function form is still present for use in less frequent cases. Where this is done, the resultant code duplication can further increase code size.
+While global visibility often enables code size reductions, some LTO-driven optimizations can lead to larger binaries when they are deemed profitable for performance.
+Examples include:
+  * Aggressive loop unrolling when iteration counts are known in specific call paths
+  * Increased function inlining when call relationships are well understood
+    
+In addition, LTO enables function cloning. When a function exhibits multiple common usage patterns, the compiler may generate specialized versions optimized for frequent cases, while retaining a generic version for less common ones. Although this approach preserves correctness and improves performance for hot paths, it can introduce code duplication and increase overall binary size.
+As with other interprocedural optimizations, these trade-offs reflect the compiler’s attempt to balance performance gains against code size growth, and the net effect depends heavily on the structure of the application.
diff --git a/content/learning-paths/servers-and-cloud-computing/gcc-lto/request-lto.md b/content/learning-paths/servers-and-cloud-computing/gcc-lto/request-lto.md
index b273c41d32..fe2d97e641 100644
--- a/content/learning-paths/servers-and-cloud-computing/gcc-lto/request-lto.md
+++ b/content/learning-paths/servers-and-cloud-computing/gcc-lto/request-lto.md
@@ -8,31 +8,55 @@ layout: learningpathall
 
 ## Deploying LTO
 
-### A simple use-case
-To rely on GCC's default configuration for link-time optimization, using the feature is as simple as passing gcc the `-flto` flag when invoking it from the command line.
+### A Simple Use Case
+Using link-time optimization with GCC can be as simple as passing the `-flto` flag during compilation and linking.
 
-For the step-wise build of an executable, we'd have:
-```sh
+For a traditional, stepwise build of an executable, you would compile each translation unit with LTO enabled:
+```bash
 gcc -c -O2 -flto component-1.c
 gcc -c -O2 -flto component-2.c
 gcc -o myprog -flto -O2 component-1.o component-2.o
 ```
-This could be simplified to a one-liner, as follows:
-```sh
+In this case, each object file contains LTO information, and the final link step performs whole-program optimization before generating machine code.
+
+For small programs, this can be simplified into a single command:
+```bash
 gcc -o myprog -flto -O2 component-1.c component-2.c
 ```
+Both approaches produce an executable that benefits from link-time optimization across all translation units.
 
 ### Modifying LTO behaviour
 #### Flexible object files
 
-By default, requesting `-flto` when compiling individual object files to be  linked later, we are effectively committing to using LTO every time the object is to be linked into an executable. As such the resulting object files contain only GCC's internal intermediate representation of the code. Such objects are referred to as being _slim_.
-
-This constraint can be relaxed and _fat_ LTO-enabled objects generated, as can be achieved using the `-ffat-lto-objects` flag.  Using this flag causes the final object binary contents that would be generated in the absence of LTO to be emitted alongside intermediate bytecode and can be useful for compatibility purposes.
+When compiling with `-flto`, GCC normally emits **slim LTO objects**. These object files contain only GCC’s internal intermediate representation and no conventional machine code. As a result, they can only be linked by an LTO-capable linker invocation.
+In some cases—such as when building libraries intended for broader reuse, it may be desirable to retain conventional object code alongside the LTO data. This can be achieved using the `-ffat-lto-objects` flag, for example:
+```bash
+gcc -c -O2 -flto -ffat-lto-objects component-1.c
+```
+With this option enabled, GCC emits both:
+  * The intermediate representation used for LTO, and
+  * The non-LTO object code that would normally be produced
+    
+Such fat LTO objects provide greater compatibility at the cost of increased object file size.
 
 #### Parallelization
-Link-time optimization may be sped up by execution in parallel. This behavior can be controlled by augmenting the `-flto` flag with an argument.
+Link-time optimization can be computationally expensive, especially for larger programs. GCC supports parallelizing LTO to reduce build times.
 
-While `-flto=auto` can be used for automatic parallelization, `-flto=<nthread>` allows us to manually specify the desired number of parallel jobs.  Requesting parallelization causes the whole program to be split into multiple partitions of similar size, with the compiler trying to minimize the number of references which cross partition boundaries and which would otherwise lead to missed optimizations.
+This behavior is controlled through the `-flto` flag:
+
+  * -flto=auto enables automatic parallelization based on available system resources
+  * -flto=<n> explicitly specifies the number of parallel LTO jobs to run
+    
+For example:  
+```bash
+gcc -O2 -flto=4 -o myprog component-1.c component-2.c
+```
+When parallelization is enabled, GCC partitions the program into multiple units of roughly equal size. The compiler attempts to minimize cross-partition references, which could otherwise reduce the effectiveness of certain whole-program optimizations.
 
 #### Caching
-During code development, it is possible to cache the outputs of translational units inside LTO, thus significantly shortening edit-compile cycles. This can be achieved using the `-flto-incremental=<path>` flag.
+During iterative development, repeatedly recompiling with LTO can increase build times. GCC provides support for caching intermediate LTO results to speed up incremental builds.
+This can be enabled using the `-flto-incremental=<path>` option. For example:
+```bash
+gcc -O2 -flto -flto-incremental=lto-cache -c component-1.c
+```
+When enabled, GCC stores intermediate results in the specified directory, allowing subsequent builds to reuse previous work where possible and significantly reduce edit–compile cycles.
diff --git a/content/learning-paths/servers-and-cloud-computing/geekbench/_index.md b/content/learning-paths/servers-and-cloud-computing/geekbench/_index.md
index 4a2efae52b..a953c88570 100644
--- a/content/learning-paths/servers-and-cloud-computing/geekbench/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/geekbench/_index.md
@@ -1,5 +1,6 @@
 ---
 title: Get started with Geekbench on Arm
+description: Run Geekbench on Arm systems to benchmark CPU performance, interpret the results, and compare different Arm configurations.
 
 minutes_to_complete: 15
 
diff --git a/content/learning-paths/servers-and-cloud-computing/glibc-with-lse/_index.md b/content/learning-paths/servers-and-cloud-computing/glibc-with-lse/_index.md
index 2a1bfbced6..893dca8c60 100644
--- a/content/learning-paths/servers-and-cloud-computing/glibc-with-lse/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/glibc-with-lse/_index.md
@@ -1,5 +1,6 @@
 ---
 title: Learn about glibc with Large System Extensions (LSE) for performance improvement
+description: Rebuild and benchmark glibc with LSE atomics on Arm servers, then evaluate scalability using MongoDB workloads and guidance on when LSE delivers a measurable uplift.
 
 minutes_to_complete: 60
 
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md
index b3968c92bd..93322b4bcc 100644
--- a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md
@@ -1,16 +1,22 @@
 ---
 title: Install and validate Helm on Google Cloud C4A Arm-based VMs
 
-minutes_to_complete: 45
+draft: true
+cascade:
+    draft: true
+
+minutes_to_complete: 60
 
 who_is_this_for: This is an introductory topic intended for developers who want to get hands-on experience using Helm on Linux Arm64 systems, specifically Google Cloud C4A virtual machines powered by Axion processors.
 
 
 learning_objectives:
   - Provision an Arm-based SUSE Linux Enterprise Server (SLES) virtual machine on Google Cloud (C4A with Axion processors)
-  - Install Helm and kubectl on a SUSE Arm64 (C4A) instance
-  - Create and validate a local Kubernetes cluster (KinD) on Arm64
-  - Verify Helm functionality by performing install, upgrade, and uninstall workflows
+  - Install and configure Helm and kubectl on a SUSE Arm64 (C4A) instance
+  - Create and connect to a Google Kubernetes Engine (GKE) cluster running on Arm-based nodes
+  - Deploy PostgreSQL, Redis, and NGINX on GKE using official Helm charts
+  - Validate Helm workflows by performing install, upgrade, rollback, and uninstall operations
+  - Verify application readiness and service access for PostgreSQL, Redis, and NGINX on GKE
   - Observe Helm behavior under concurrent CLI operations on an Arm64-based Kubernetes cluster
 
 prerequisites:
@@ -32,8 +38,11 @@ armips:
 tools_software_languages:
   - Helm
   - Kubernetes
-  - KinD
   - kubectl
+  - GKE
+  - PostgreSQL
+  - Redis
+  - NGINX
     
 operatingsystems:
   - Linux
@@ -57,6 +66,11 @@ further_reading:
       link: https://kubernetes.io/docs/
       type: documentation
 
+  - resource:
+      title: Bitnami Helm Charts
+      link: https://github.com/bitnami/charts
+      type: documentation    
+
 weight: 1
 layout: "learningpathall"
 learning_path_main_page: "yes"
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md
index e1aa5e051a..4e029ecd4b 100644
--- a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md
+++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md
@@ -1,6 +1,6 @@
 ---
 title: Benchmark Helm concurrency on a Google Axion C4A virtual machine
-weight: 6
+weight: 10
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/gke-cluster-for-helm.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/gke-cluster-for-helm.md
new file mode 100644
index 0000000000..5c46f3d2f3
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/gke-cluster-for-helm.md
@@ -0,0 +1,134 @@
+---
+title: Prepare GKE Cluster for Helm Deployments
+weight: 6
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Overview
+This section explains how to prepare a **Google Kubernetes Engine (GKE) cluster** for deploying Helm charts.
+The prepared GKE cluster is used to deploy the following services using custom Helm charts:
+
+- PostgreSQL
+- Redis
+- NGINX
+
+This setup differs from the earlier KinD-based local cluster, which was intended only for local validation.
+
+## Prerequisites
+
+Before starting, ensure the following are already completed:
+
+- Docker installed
+- kubectl installed
+- Helm installed
+- Google Cloud account available
+
+If Helm and kubectl are not installed, complete the **Install Helm** section first.
+
+### Verify kubectl Installation
+Confirm that kubectl is available:
+
+```console
+kubectl version --client
+```
+You should see an output similar to:
+```output
+Client Version: version.Info{Major:"1", Minor:"26+", GitVersion:"v1.26.15-dispatcher", GitCommit:"5490d28d307425a9b05773554bd5c037dbf3d492", GitTreeState:"clean", BuildDate:"2024-04-18T22:39:37Z", GoVersion:"go1.21.9", Compiler:"gc", Platform:"linux/arm64"}
+Kustomize Version: v4.5.7
+```
+
+### Install Google Cloud SDK (gcloud)
+The Google Cloud SDK is required to create and manage GKE clusters.
+
+**Download and extract:**
+
+```console
+wget https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-460.0.0-linux-arm.tar.gz
+tar -xvf google-cloud-sdk-460.0.0-linux-arm.tar.gz
+```
+
+**Install gcloud:**
+
+```console
+./google-cloud-sdk/install.sh
+```
+Restart the shell or reload the environment if prompted.
+
+### Initialize gcloud
+Authenticate and configure the Google Cloud CLI:
+
+```console
+./google-cloud-sdk/bin/gcloud init
+```
+
+During initialization:
+
+- Log in using a Google account
+- Select the correct project
+- Choose default settings when unsure
+
+### Set the Active Project
+Ensure the correct GCP project is selected:
+
+```console
+gcloud config set project YOUR_PROJECT_ID
+```
+
+### Enable Kubernetes API
+Enable the required API for GKE:
+
+```console
+gcloud services enable container.googleapis.com
+```
+
+### Create a GKE Cluster
+Create a Kubernetes cluster that will host Helm deployments.
+
+```console
+gcloud container clusters create helm-arm64-cluster \
+  --zone us-central1-a \
+  --machine-type c4a-standard-4 \
+  --num-nodes 2
+```
+
+- This creates a standard GKE cluster
+- Node count and machine type can be adjusted later
+- Arm64 compatibility depends on available node types in the region
+
+### Configure kubectl Access to GKE
+Fetch cluster credentials:
+
+```console
+gcloud container clusters get-credentials helm-arm64-cluster \
+  --zone us-central1-a
+```
+
+### Verify Cluster Access
+Confirm Kubernetes access:
+
+```console
+kubectl get nodes
+```
+
+You should see an output similar to:
+```output
+NAME                                                STATUS   ROLES    AGE     VERSION
+gke-helm-arm64-cluster-default-pool-f4ab8a2d-5h6f   Ready    <none>   5h54m   v1.33.5-gke.1308000
+gke-helm-arm64-cluster-default-pool-f4ab8a2d-5ldp   Ready    <none>   5h54m   v1.33.5-gke.1308000
+```
+
+- Nodes in Ready state
+- Kubernetes control plane accessible
+
+### Outcome
+At this point:
+
+- Google Cloud SDK is installed and configured
+- GKE cluster is running
+- kubectl is connected to the cloud cluster
+- Helm is ready to deploy applications on GKE
+
+The environment is now prepared to deploy Helm charts.
+
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/nginx-browser.png b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/nginx-browser.png
new file mode 100644
index 0000000000..6415e5363d
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/nginx-browser.png differ
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/nginx-helm.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/nginx-helm.md
new file mode 100644
index 0000000000..a094881360
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/nginx-helm.md
@@ -0,0 +1,172 @@
+---
+title: NGINX Deployment Using Custom Helm Chart
+weight: 9
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## NGINX Deployment Using Custom Helm Chart
+This document explains how to deploy NGINX as a frontend service on Kubernetes using a custom Helm chart.
+
+## Goal
+After completing this guide, the environment will include:
+
+- NGINX deployed using Helm
+- Public access using a LoadBalancer service
+- External IP available for browser access
+- Foundation for connecting backend services (Redis, PostgreSQL)
+
+### Create Helm Chart
+Generates a Helm chart skeleton that will be customized for NGINX.
+
+```console
+helm create my-nginx
+```
+
+### Resulting structure
+
+```text
+my-nginx/
+├── Chart.yaml
+├── values.yaml
+└── templates/
+```
+
+### Configure values.yaml
+Defines configurable parameters such as:
+
+- NGINX image
+- Service type
+- Public port
+
+Replace the contents of `my-nginx/values.yaml` with:
+```yaml
+image:
+  repository: nginx
+  tag: latest
+
+service:
+  type: LoadBalancer
+  port: 80
+```
+
+That matters
+
+- Centralizes configuration
+- Allows service exposure without editing templates
+- Simplifies future changes
+
+### Deployment Definition (deployment.yaml)
+Defines how the NGINX container runs inside Kubernetes, including:
+
+- Container image
+- Pod labels
+- Port exposure
+
+Replace `my-nginx/templates/deployment.yaml` completely:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "my-nginx.fullname" . }}
+
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: {{ include "my-nginx.name" . }}
+
+  template:
+    metadata:
+      labels:
+        app: {{ include "my-nginx.name" . }}
+
+    spec:
+      containers:
+        - name: nginx
+          image: nginx
+          ports:
+            - containerPort: 80
+```
+
+### Service Definition (service.yaml)
+Exposes NGINX to external traffic using a Kubernetes LoadBalancer.
+
+Replace `my-nginx/templates/service.yaml` with:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "my-nginx.fullname" . }}
+spec:
+  type: LoadBalancer
+  ports:
+    - port: 80
+  selector:
+    app: {{ include "my-nginx.name" . }}
+```
+
+Why LoadBalancer:
+
+- Provides a public IP
+- Required for browser access
+- Common pattern for frontend services
+
+### Install & Access
+
+```console
+helm install nginx ./my-nginx
+```
+
+```output
+NAME: nginx
+LAST DEPLOYED: Tue Jan  6 07:55:52 2026
+NAMESPACE: default
+STATUS: deployed
+REVISION: 1
+NOTES:
+1. Get the application URL by running these commands:
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace default svc -w nginx-my-nginx'
+  export SERVICE_IP=$(kubectl get svc --namespace default nginx-my-nginx --template "{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}")
+  echo http://$SERVICE_IP:80
+```
+
+### Access NGINX from Browser
+Get External IP
+
+```console
+kubectl get svc
+```
+
+Wait until EXTERNAL-IP is assigned.
+
+```output
+NAME                       TYPE           CLUSTER-IP       EXTERNAL-IP     PORT(S)        AGE
+kubernetes                 ClusterIP      34.118.224.1     <none>          443/TCP        3h22m
+nginx-my-nginx             LoadBalancer   34.118.239.19    34.63.103.125   80:31501/TCP   52s
+postgres-app-my-postgres   ClusterIP      34.118.225.2     <none>          5432/TCP       13m
+redis-my-redis             ClusterIP      34.118.234.155   <none>          6379/TCP       6m53s
+```
+
+**Open in browser:**
+
+```bash
+http://<EXTERNAL-IP>    
+```
+
+You should see the default NGINX welcome page as shown below:
+
+![NGINX default welcome page in a web browser on an GCP VM alt-text#center](images/nginx-browser.png)
+
+### Outcome
+This deployment achieves the following:
+
+- NGINX deployed using a custom Helm chart
+- Public access enabled via LoadBalancer
+- External IP available for frontend access
+- Ready to route traffic to backend services
+
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/postgresql-helm.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/postgresql-helm.md
new file mode 100644
index 0000000000..7e9003091e
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/postgresql-helm.md
@@ -0,0 +1,296 @@
+---
+title: PostgreSQL Deployment Using Custom Helm Chart
+weight: 7
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+
+## PostgreSQL Deployment Using Custom Helm Chart
+This document explains how to deploy **PostgreSQL** on Kubernetes using a **custom Helm chart** with persistent storage.
+
+### Goal
+After completing this guide, the environment will include:
+- PostgreSQL running inside Kubernetes
+- Persistent storage using PVC
+- Secure credentials using Kubernetes Secrets
+- Ability to connect using psql
+- A clean, reusable Helm chart
+
+### Prerequisites
+Ensure Kubernetes and Helm are working:
+
+```console
+kubectl get nodes
+helm version
+```
+
+If these commands fail, fix them first before continuing.
+
+### CREATE WORKING DIRECTORY
+Creates a dedicated folder to store all Helm charts for microservices.
+
+```console
+mkdir helm-microservices
+cd helm-microservices
+```
+
+### Create Helm Chart
+Generates a Helm chart skeleton that will be customized for PostgreSQL.
+
+```console
+helm create my-postgres
+```
+
+**Directory structure:**
+
+```text
+helm-microservices/
+└── my-postgres/
+    ├── Chart.yaml
+    ├── values.yaml
+    └── templates/
+```
+
+### Clean the chart
+The default Helm chart contains several files that are not required for a basic PostgreSQL deployment. Removing these files prevents confusion and template errors.
+Inside `my-postgres/templates/`, delete the following:
+
+- hpa.yaml
+- ingress.yaml
+- serviceaccount.yaml
+- tests/
+- NOTES.txt
+- httproute.yaml
+
+Only PostgreSQL-specific templates will be maintained.
+
+### Configure values.yaml (Main Configuration File)
+`values.yaml` centralizes all configurable settings, including:
+
+- Container image details
+- Database credentials
+- Persistent storage configuration
+
+Replace the entire contents of `my-postgres/values.yaml` with the following:
+
+```yaml
+replicaCount: 1
+
+image:
+  repository: postgres
+  tag: "15"
+  pullPolicy: IfNotPresent
+
+postgresql:
+  username: admin
+  password: admin123
+  database: mydb
+
+persistence:
+  enabled: true
+  size: 10Gi
+  mountPath: /var/lib/postgresql
+  dataSubPath: data
+```
+
+This matters
+
+- Ensures consistent configuration
+- Avoids Helm template evaluation errors
+- Simplifies upgrades and maintenance
+
+### Create secret.yaml (Database Credentials)
+Stores PostgreSQL credentials securely using Kubernetes Secrets.
+Create the following file:
+
+`my-postgres/templates/secret.yaml`
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "my-postgres.fullname" . }}
+type: Opaque
+stringData:
+  POSTGRES_USER: {{ .Values.postgresql.username }}
+  POSTGRES_PASSWORD: {{ .Values.postgresql.password }}
+  POSTGRES_DB: {{ .Values.postgresql.database }}
+```
+
+That matters
+
+- Prevents hard-coding credentials
+- Follows Kubernetes security best practices
+
+### Create pvc.yaml (Persistent Storage)
+Requests persistent storage so PostgreSQL data remains available even if the pod restarts.
+Create the following file:
+
+`my-postgres/templates/pvc.yaml`
+
+```yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "my-postgres.fullname" . }}-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: {{ .Values.persistence.size }}
+```
+
+That matters
+- Without a PVC, PostgreSQL data would be lost whenever the pod restarts.
+
+### deployment.yaml (PostgreSQL Pod Definition)
+Defines how PostgreSQL runs inside Kubernetes, including:
+- Container image
+- Environment variables
+- Volume mounts
+- Pod configuration
+
+Replace the existing `my-postgres/templates/deployment.yaml` file completely.
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "my-postgres.fullname" . }}
+
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: {{ include "my-postgres.name" . }}
+
+  template:
+    metadata:
+      labels:
+        app: {{ include "my-postgres.name" . }}
+
+    spec:
+      containers:
+        - name: postgres
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+
+          ports:
+            - containerPort: 5432
+
+          envFrom:
+            - secretRef:
+                name: {{ include "my-postgres.fullname" . }}
+
+          env:
+            - name: PGDATA
+              value: "{{ .Values.persistence.mountPath }}/{{ .Values.persistence.dataSubPath }}"
+
+          volumeMounts:
+            - name: postgres-data
+              mountPath: {{ .Values.persistence.mountPath }}
+
+      volumes:
+        - name: postgres-data
+          persistentVolumeClaim:
+            claimName: {{ include "my-postgres.fullname" . }}-pvc
+```
+
+- PGDATA avoids the common lost+found directory issue
+- Persistent storage is mounted safely
+- Secrets inject credentials at runtime
+
+### service.yaml (Internal Access)
+Enables internal cluster communication so other services can connect to PostgreSQL.
+Replace `my-postgres/templates/service.yaml` with:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "my-postgres.fullname" . }}
+spec:
+  type: ClusterIP
+  ports:
+    - port: 5432
+      targetPort: 5432
+  selector:
+    app: {{ include "my-postgres.name" . }}
+```
+
+**ClusterIP**
+- PostgreSQL should remain accessible only inside the Kubernetes cluster.
+
+### Install PostgreSQL Using Helm
+
+```console
+cd helm-microservices
+helm uninstall postgres || true
+helm install postgres-app ./my-postgres
+```
+
+**Check:**
+
+```console
+kubectl get pods
+kubectl get pvc
+```
+
+You should see an output similar to:
+```output
+NAME                                        READY   STATUS    RESTARTS   AGE
+postgres-app-my-postgres-6dbc8759b6-jgpxs   1/1     Running   0          40s
+
+>kubectl get pvc
+NAME                           STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   VOLUMEATTRIBUTESCLASS   AGE
+postgres-app-my-postgres-pvc   Bound    pvc-5f3716df-39bb-4683-990a-c5cd3906fbce   10Gi       RWO            standard-rwo   <unset>                 33s
+```
+
+### Test PostgreSQL
+Connect to PostgreSQL
+
+```console
+kubectl exec -it <postgres-pod> -- psql -U admin -d mydb
+```
+
+You should see an output similar to:
+```output
+psql (15.15 (Debian 15.15-1.pgdg13+1))
+Type "help" for help.
+
+mydb=#
+```
+
+**Run test queries:**
+
+```psql
+CREATE TABLE test (id INT);
+INSERT INTO test VALUES (1);
+SELECT * FROM test;
+```
+
+You should see an output similar to:
+```output
+mydb=# CREATE TABLE test (id INT);
+INSERT INTO test VALUES (1);
+SELECT * FROM test;
+CREATE TABLE
+INSERT 0 1
+ id
+----
+  1
+(1 row)
+```
+
+### Outcome
+You have successfully:
+
+- Created a custom Helm chart
+- Deployed PostgreSQL on Kubernetes
+- Enabled persistent storage
+- Used Secrets for credentials
+- Verified database functionality
+
diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/redis-helm.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/redis-helm.md
new file mode 100644
index 0000000000..c193587405
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/redis-helm.md
@@ -0,0 +1,166 @@
+---
+title: Redis Deployment Using Custom Helm Chart
+weight: 8
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+
+## Redis Deployment Using Custom Helm Chart
+This document explains how to deploy Redis on Kubernetes using a custom Helm chart.
+
+## Goal
+After completing this guide, the environment will include:
+
+- Redis running on Kubernetes
+- Deployment managed using Helm
+- Internal access using a ClusterIP Service
+- Basic connectivity validation using redis-cli
+
+### Create Helm Chart
+Generates a Helm chart skeleton that will be customized for Redis.
+
+```console
+helm create my-redis
+```
+
+### Resulting structure
+
+```text
+my-redis/
+├── Chart.yaml
+├── values.yaml
+└── templates/
+```
+
+### Clean Templates
+The default Helm chart includes several files that are not required for a basic Redis deployment. Removing them avoids unnecessary complexity and template errors.
+Inside `my-redis/templates/`, delete the following:
+
+- ingress.yaml
+- hpa.yaml
+- serviceaccount.yaml
+- tests/
+- NOTES.txt
+
+Only Redis-specific templates will be maintained.
+
+### Configure values.yaml
+`values.yaml` stores all configurable parameters, including:
+
+- Redis image version
+- Service type and port
+- Replica count
+
+Replace the entire contents of `my-redis/values.yaml` with:
+
+```yaml
+replicaCount: 1
+
+image:
+  repository: redis
+  tag: "7"
+  pullPolicy: IfNotPresent
+
+service:
+  type: ClusterIP
+  port: 6379
+```
+
+That matters
+
+- Centralizes configuration
+- Simplifies future updates
+- Prevents Helm template evaluation issues
+
+### Deployment Definition (deployment.yaml)
+Defines how the Redis container runs inside Kubernetes, including:
+
+- Container image
+- Port configuration
+- Pod labels and selectors
+
+Replace the existing `my-redis/templates/deployment.yaml` completely.
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "my-redis.fullname" . }}
+
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: {{ include "my-redis.name" . }}
+
+  template:
+    metadata:
+      labels:
+        app: {{ include "my-redis.name" . }}
+
+    spec:
+      containers:
+        - name: redis
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+          ports:
+            - containerPort: 6379
+```
+
+- Redis runs as a single pod
+- No persistence is configured (suitable for learning and caching use cases)
+
+### Service Definition (service.yaml)
+Creates an internal Kubernetes service to allow other pods to connect to Redis.
+Replace `my-redis/templates/service.yaml` with:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "my-redis.fullname" . }}
+spec:
+  type: ClusterIP
+  ports:
+    - port: 6379
+  selector:
+    app: {{ include "my-redis.name" . }}
+```
+
+**ClusterIP**
+
+- Redis is intended for internal communication only within the cluster.
+
+### Install Redis Using Helm
+Validates that Redis is running and responding correctly.
+
+```console
+helm install redis ./my-redis
+kubectl get svc
+kubectl exec -it <redis-pod> -- redis-cli ping
+```
+
+You should see an output similar to:
+```output
+NAME                                        READY   STATUS    RESTARTS   AGE
+postgres-app-my-postgres-6dbc8759b6-jgpxs   1/1     Running   0          6m38s
+redis-my-redis-75c88646fb-6lz8v             1/1     Running   0          13s
+
+>kubectl get svc
+redis-my-redis             ClusterIP      34.118.234.155   <none>        6379/TCP       6m14s
+
+> kubectl exec -it redis-my-redis-75c88646fb-6lz8v -- redis-cli ping
+PONG
+```
+
+- Redis pod → Running
+- Redis service → ClusterIP
+
+### Outcome
+This deployment achieves the following:
+
+- Redis deployed using a custom Helm chart
+- Internal access via Kubernetes Service
+- Successful connectivity validation
+- Clean and reusable Helm structure Accessible via service name redis
diff --git a/content/learning-paths/servers-and-cloud-computing/java-on-axion/set_ubuntu.png b/content/learning-paths/servers-and-cloud-computing/java-on-axion/set_ubuntu.png
deleted file mode 100644
index a38fbc2707..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/java-on-axion/set_ubuntu.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/jenkins/images/go-output.png b/content/learning-paths/servers-and-cloud-computing/jenkins/images/go-output.png
deleted file mode 100644
index 13c337ab8d..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/jenkins/images/go-output.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/llama-cpu/_index.md b/content/learning-paths/servers-and-cloud-computing/llama-cpu/_index.md
index f9a02504ab..7fd7bcafe9 100644
--- a/content/learning-paths/servers-and-cloud-computing/llama-cpu/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/llama-cpu/_index.md
@@ -1,5 +1,6 @@
 ---
 title: Deploy a Large Language Model (LLM) chatbot with llama.cpp using KleidiAI on Arm servers
+description: Serve the llama.cpp chatbot through an OpenAI-compatible API, enabling existing OpenAI-style clients and applications to run against a persistent Arm-hosted LLM.
 
 minutes_to_complete: 30
 
diff --git a/content/learning-paths/servers-and-cloud-computing/llama_cpp_streamline/images/annotation_marker_prefill.png b/content/learning-paths/servers-and-cloud-computing/llama_cpp_streamline/images/annotation_marker_prefill.png
deleted file mode 100644
index 2b425ddb29..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/llama_cpp_streamline/images/annotation_marker_prefill.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/multiarch_ollama_on_gke/images/learning_path_intro.png b/content/learning-paths/servers-and-cloud-computing/multiarch_ollama_on_gke/images/learning_path_intro.png
deleted file mode 100644
index d41a367bfa..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/multiarch_ollama_on_gke/images/learning_path_intro.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterAPIGW.png b/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterAPIGW.png
deleted file mode 100644
index 37d5a6ee82..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterAPIGW.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterFileServer.png b/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterFileServer.png
deleted file mode 100644
index 96310ae3f6..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterFileServer.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterRP.png b/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterRP.png
deleted file mode 100644
index f1c820b7aa..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/nginx_tune/BeforeAndAfterRP.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/nginx_tune/ExampleTestSetup.png b/content/learning-paths/servers-and-cloud-computing/nginx_tune/ExampleTestSetup.png
deleted file mode 100644
index b8cc15d1b7..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/nginx_tune/ExampleTestSetup.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/nginx_tune/before_and_after.md b/content/learning-paths/servers-and-cloud-computing/nginx_tune/before_and_after.md
index 4861298647..9b5075cb60 100644
--- a/content/learning-paths/servers-and-cloud-computing/nginx_tune/before_and_after.md
+++ b/content/learning-paths/servers-and-cloud-computing/nginx_tune/before_and_after.md
@@ -8,10 +8,10 @@ layout: "learningpathall"
 
 Application tuning allows you to gain performance without scaling your deployment up (bigger machines/nodes) or out (more machines/nodes). This gained performance can either be used, or traded for cost savings by reducing the amount of compute resources provisioned. The graphs below shows the performance gains of an Nginx file server, Reverse Proxy, and API Gateway when they are tuned.
 
-![File Server Before and after Tuning](BeforeAndAfterFileServer.png)
+![File Server Before and after Tuning](beforeandafterfileserver.png)
 
-![Reverse Proxy Before and after Tuning](BeforeAndAfterRP.png)
+![Reverse Proxy Before and after Tuning](beforeandafterrp.png)
 
-![API Gateway Before and after Tuning](BeforeAndAfterAPIGW.png)
+![API Gateway Before and after Tuning](beforeandafterapigw.png)
 
 Requirements vary based on the use case. In the example shown above, the AWS m7g.2xlarge instance type could be down sized to a m7g.xlarge to gain cost savings.
diff --git a/content/learning-paths/servers-and-cloud-computing/nginx_tune/test_optimizations.md b/content/learning-paths/servers-and-cloud-computing/nginx_tune/test_optimizations.md
index 2be058b447..37cf1aad47 100644
--- a/content/learning-paths/servers-and-cloud-computing/nginx_tune/test_optimizations.md
+++ b/content/learning-paths/servers-and-cloud-computing/nginx_tune/test_optimizations.md
@@ -33,7 +33,7 @@ make
 
 shown below is an image of a typical multi-node test setup. On the left, there is a load generator node that will run wrk2. In the middle, there is the Reverse Proxy (or API Gateway) to be tested. On the right, are multiple file servers that act as upstream servers for the Reverse Proxy (or API Gateway). It is also possible to run wrk2 against file servers directly (not shown below), and it is possible to run wrk2 co-located on the same node as Nginx (also not shown). You will need to decide the best setup for your deployment of Nginx. 
 
-![File Server Before and after Tuning](ExampleTestSetup.png)
+![File Server Before and after Tuning](exampletestsetup.png)
 
 ## Running a wrk2 test
 
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/_index.md b/content/learning-paths/servers-and-cloud-computing/pinning-threads/_index.md
new file mode 100644
index 0000000000..f3357c1b3c
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/pinning-threads/_index.md
@@ -0,0 +1,65 @@
+---
+title: Optimize application performance with CPU affinity
+
+minutes_to_complete: 30
+
+who_is_this_for: This is an advanced topic for developers, performance engineers, and system administrators looking to fine-tune the performance of their workload on many-core Arm-based systems.
+
+learning_objectives: 
+    - Pin threads to specific CPU cores using taskset and source code modifications
+    - Measure cache performance improvements from thread pinning using perf
+    - Evaluate performance trade-offs between throughput and latency consistency
+    - Implement CPU affinity strategies for co-located workloads
+
+prerequisites:
+    - An Arm Linux system with four or more CPU cores
+    - Experience with multi-threaded programming in C++ and Python
+    - Understanding of build systems and computer architecture concepts
+    - Familiarity with Linux command-line tools
+
+author: Kieran Hejmadi
+
+### Tags
+skilllevels: Advanced
+subjects: Performance and Architecture
+armips:
+    - Neoverse
+tools_software_languages:
+    - C++
+    - Python
+    - taskset
+    - perf
+    - Google Benchmark
+operatingsystems:
+    - Linux
+
+further_reading:
+    - resource:
+        title: Taskset Manual  
+        link: https://man7.org/linux/man-pages/man1/taskset.1.html
+        type: documentation
+    - resource:
+        title: pthread_setaffinity_np Manual
+        link: https://man7.org/linux/man-pages/man3/pthread_setaffinity_np.3.html
+        type: documentation
+    - resource:
+        title: NUMA Deep Dive
+        link: https://frankdenneman.nl/2016/07/07/numa-deep-dive-part-1-uma-numa/
+        type: documentation
+    - resource:
+        title: Linux Scheduler Documentation
+        link: https://www.kernel.org/doc/html/latest/scheduler/index.html
+        type: documentation
+    - resource:
+        title: Get started with Arm-based cloud instances
+        link: /learning-paths/servers-and-cloud-computing/csp/
+        type: website
+
+
+
+### FIXED, DO NOT MODIFY
+# ================================================================================
+weight: 1                       # _index.md always has weight of 1 to order correctly
+layout: "learningpathall"       # All files under learning paths have this same wrapper
+learning_path_main_page: "yes"  # This should be surfaced when looking for related content. Only set for _index.md of learning path content.
+---
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/_next-steps.md b/content/learning-paths/servers-and-cloud-computing/pinning-threads/_next-steps.md
new file mode 100644
index 0000000000..727b395ddd
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/pinning-threads/_next-steps.md
@@ -0,0 +1,8 @@
+---
+# ================================================================================
+#       FIXED, DO NOT MODIFY THIS FILE
+# ================================================================================
+weight: 21                  # The weight controls the order of the pages. _index.md always has weight 1.
+title: "Next Steps"         # Always the same, html page title.
+layout: "learningpathall"   # All files under learning paths have this same wrapper for Hugo processing.
+---
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/background_info.md b/content/learning-paths/servers-and-cloud-computing/pinning-threads/background_info.md
new file mode 100644
index 0000000000..edd36a26c6
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/pinning-threads/background_info.md
@@ -0,0 +1,35 @@
+---
+title: Understand thread pinning and CPU affinity
+weight: 2
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Explore CPU affinity
+
+CPU affinity is the practice of binding a process or thread to a specific CPU core or set of cores. This tells the operating system scheduler where that work is allowed to run. By default, the Linux scheduler dynamically migrates threads across cores to balance load and maximize overall throughput. Pinning overrides this behavior by constraining execution to a chosen set of cores.
+
+## Identify use cases for thread pinning
+
+Pinning is most often used as a fine-tuning technique for workloads that aim to consume as many CPU cycles as possible while running alongside other demanding applications. Scientific computing pipelines and real-time analytics frequently fall into this category. 
+
+Applications that pin processes to specific cores are often sensitive to latency variation rather than just average throughput. They may also have intricate memory access patterns. Pinning can reduce execution noise and provide more consistent behavior or better memory access patterns under load.
+
+## Improve memory locality on NUMA systems
+
+Memory locality provides another important motivation for pinning. On modern systems with Non-Uniform Memory Access (NUMA) architectures, different cores have varying memory access times and characteristics. The performance depends on where the data is fetched from.
+
+For example, in a server with two CPU sockets that appears as a single processor, memory access times differ depending on which core you use. By pinning threads to cores that are close to the memory they use and allocating memory accordingly, you reduce memory access latency and improve bandwidth.
+
+## Choose methods to set CPU affinity
+
+You can set affinity directly in source code using system calls. Many parallel frameworks expose higher-level controls, such as OpenMP affinity settings, that manage thread placement automatically. 
+
+Alternatively, at runtime, system administrators can pin existing processes using utilities like `taskset` or launch applications with `numactl` to control both CPU and memory placement without modifying code.
+
+## Evaluate trade-offs of thread pinning
+
+Thread pinning is a trade-off. It can improve determinism and locality, but it can also reduce flexibility and hurt performance if the chosen layout isn't optimal or if system load changes. Over-constraining the scheduler may lead to idle cores while pinned threads contend unnecessarily. 
+
+As a general rule, rely on the operating system scheduler initially and introduce pinning only when you're looking to fine-tune performance after measuring baseline behavior.
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/cpu_util.jpg b/content/learning-paths/servers-and-cloud-computing/pinning-threads/cpu_util.jpg
new file mode 100644
index 0000000000..42027aed24
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/pinning-threads/cpu_util.jpg differ
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/exclusive.jpg b/content/learning-paths/servers-and-cloud-computing/pinning-threads/exclusive.jpg
new file mode 100644
index 0000000000..b5c2f188d5
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/pinning-threads/exclusive.jpg differ
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/free.jpg b/content/learning-paths/servers-and-cloud-computing/pinning-threads/free.jpg
new file mode 100644
index 0000000000..33d2e5671e
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/pinning-threads/free.jpg differ
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/pinned_shared.jpg b/content/learning-paths/servers-and-cloud-computing/pinning-threads/pinned_shared.jpg
new file mode 100644
index 0000000000..41b7eb82c3
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/pinning-threads/pinned_shared.jpg differ
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/setup.md b/content/learning-paths/servers-and-cloud-computing/pinning-threads/setup.md
new file mode 100644
index 0000000000..ad4d15aa9b
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/pinning-threads/setup.md
@@ -0,0 +1,171 @@
+---
+title: Create a CPU-intensive program
+weight: 3
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Setup
+
+This Learning Path works on any Arm Linux system with four or more CPU cores. 
+
+For example, you can use an AWS Graviton 3 `m7g.4xlarge` instance running Ubuntu 24.04 LTS, based on the Arm Neoverse V1 architecture. 
+
+If you're unfamiliar with creating a cloud instance, see [Get started with Arm-based cloud instances](/learning-paths/servers-and-cloud-computing/csp/).
+
+The `m7g.4xlarge` instance has a uniform processor architecture, so there's no difference in memory or CPU core performance across the cores. 
+
+On Linux, check this with the following command:
+
+```bash
+lscpu | grep -i numa
+```
+
+For the `m7g.4xlarge`, all 16 cores are in the same NUMA node:
+
+```output
+NUMA node(s):                            1
+NUMA node0 CPU(s):                       0-15
+```
+
+You'll first learn how to pin threads using the `taskset` utility available in Linux. 
+
+This utility sets or retrieves the CPU affinity of a running process or sets the affinity of a process about to be launched. This approach doesn't require any modifications to the source code.
+
+## Install prerequisites
+
+Run the following commands to install the required packages:
+
+```bash
+sudo apt update && sudo apt install g++ cmake python3-venv python-is-python3 -y
+```
+
+Install Google's microbenchmarking support library:
+
+```bash
+git clone https://github.com/google/benchmark.git
+cd benchmark
+cmake -E make_directory "build"
+cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../
+sudo cmake --build "build" --config Release --target install -j $(nproc)
+```
+
+If you have issues building and installing, visit the [Benchmark repository](https://github.com/google/benchmark).
+
+Finally, install the Linux perf utility for measuring performance. See the [Linux Perf install guide](/install-guides/perf/) as you may need to build from source.
+
+## Create a CPU-intensive example program
+
+To demonstrate CPU affinity, you'll create a program that heavily utilizes all available CPU cores. This example repeatedly calculates the [Leibniz equation](https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80) to compute the value of Pi. This is a computationally inefficient algorithm to calculate Pi, and the work is split across many threads.
+
+Create a file named `use_all_cores.cpp` with the code below. This program spawns multiple threads that calculate Pi using the Leibniz formula:
+
+```cpp
+#include <vector>
+#include <iostream>
+#include <chrono>
+#include <thread>
+#include <future>
+
+using namespace std;
+
+
+double multiplethreaded_leibniz(int terms, bool use_all_cores){
+
+    int NUM_THREADS = 2; // use 2 cores by default
+    if (use_all_cores){
+        NUM_THREADS = std::thread::hardware_concurrency(); // for example, 16 for a 16-core, single-threaded processor
+    }
+    std::vector<double> partial_results(NUM_THREADS);
+
+    auto calculation = [&](int thread_id){
+        // Lambda function that does the calculation of the Leibniz equation
+        double denominator = 0.0;
+        double term = 0.0;
+
+        for (int i = thread_id; i < terms; i += NUM_THREADS){
+            if (i % 32768 == 0){
+                this_thread::sleep_for(std::chrono::nanoseconds(20));
+            }
+            denominator = (2*i) + 1;
+            if (i%2==0){
+                partial_results[thread_id] += (1/denominator);
+            } else{
+                partial_results[thread_id] -= (1/denominator);
+            }
+        }
+    };
+
+    std::vector<thread> threads;
+    for (int i = 0; i < NUM_THREADS; i++){
+        threads.push_back(std::thread(calculation, i));
+    }
+
+    for (auto& thread: threads){
+        thread.join();
+    }
+
+    // Accumulate and return final result
+    double final_result = 0.0;
+    for (auto& partial_result: partial_results){
+        final_result += partial_result;
+    }
+    final_result = final_result * 4;
+
+    return final_result;
+}
+
+int main(){
+
+    double result = 0.0;
+
+    auto start = std::chrono::steady_clock::now();
+    for (int i = 0; i < 5; i++){
+        result = multiplethreaded_leibniz((1<<29),true);
+        std::cout << "iteration\t" << i << std::endl;
+    }
+    auto end = std::chrono::steady_clock::now();
+
+    auto duration = std::chrono::duration_cast<chrono::milliseconds>(end-start);
+    std::this_thread::sleep_for(chrono::seconds(5)); // Wait until Python script has finished before printing Answer
+    std::cout << "Answer = " << result << "\t5 iterations took " << duration.count() << " milliseconds" << std::endl;
+
+    return 0;
+}
+```
+
+Compile the program with the following command:
+
+```bash
+g++ -O2 --std=c++11 use_all_cores.cpp -o prog
+```
+
+## Observe CPU utilization
+
+Observe how the compiled program utilizes CPU cores. In a separate terminal, use the `top` utility to view the utilization of each core:
+
+```bash
+top -d 0.1
+```
+
+Press the number `1` to view per-core utilization. 
+
+Then run the program in the other terminal:
+
+```bash
+./prog
+```
+
+![Terminal output showing the top command displaying system resource usage. Sixteen CPU cores labeled CPU0 through CPU15 are shown with horizontal percentage bars. Most cores show near 100% utilization with full green bars. The display includes columns for %Cpu(s), memory usage, and process information in a dark theme terminal window alt-txt#center](cpu_util.jpg "CPU utilization showing all cores being used")
+
+All cores on your system are periodically utilized up to 100% and then drop to idle until the program exits.
+
+## What you've accomplished and what's next
+
+In this section, you've:
+- Set up an AWS Graviton 3 instance and installed the required tools
+- Created a multi-threaded program that heavily utilizes all available CPU cores
+- Observed how the program distributes work across cores using the `top` utility
+
+In the next section, you'll learn how to bind this program to specific CPU cores when running alongside a single-threaded Python script.
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/thread_affinity.md b/content/learning-paths/servers-and-cloud-computing/pinning-threads/thread_affinity.md
new file mode 100644
index 0000000000..17fb51a8f0
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/pinning-threads/thread_affinity.md
@@ -0,0 +1,247 @@
+---
+title: Set CPU affinity in source code
+weight: 5
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Pin threads at the source code level
+
+Another way to set CPU affinity is at the source code level. This allows you to be more expressive about which thread goes where at specific points during runtime. 
+
+For example, in a hot path that repeatedly updates shared state with a read-modify-write pattern, a pinned thread can avoid excessive cache invalidations caused by other threads modifying data.
+
+## Create a baseline program without thread pinning
+
+To demonstrate this, you'll create two example programs. The first uses the default OS scheduling without thread pinning.
+
+Copy and paste the code below into a new file named `default_os_scheduling.cpp`:
+
+```cpp
+#include <benchmark/benchmark.h>
+#include <atomic>
+#include <thread>
+#include <vector>
+
+using namespace std;
+
+// Places each atomic float on a separate 64-byte cache line
+struct AlignedAtomic {
+  alignas(64) std::atomic<float> val = 0;
+};
+
+void os_scheduler() {
+
+  const int NUM_THREADS = 4;
+
+  AlignedAtomic a;
+  AlignedAtomic b;
+
+  // Lambda Work Function
+  auto task = [](AlignedAtomic &atomic){
+      for(int i = 0; i < (1 << 18); i++){
+        atomic.val = atomic.val + 1.0f;
+      }  
+  };
+
+  std::vector<thread> threads;
+  threads.reserve(NUM_THREADS);
+  
+  // Launch NUM_THREADS threads
+  for (int i = 0; i < NUM_THREADS; i++){
+    if (i%2 == 0){
+          threads.emplace_back(task, ref(a));
+    }
+    else{
+          threads.emplace_back(task, ref(b));
+
+    }
+  }
+
+  // wait for all threads to join before exiting
+  for (auto& thread : threads){
+    thread.join();
+  }
+}
+
+// Google Benchmark Framework
+static void default_os_scheduling(benchmark::State& s) {
+  while (s.KeepRunning()) {
+    os_scheduler();
+  }
+}
+BENCHMARK(default_os_scheduling)->UseRealTime()->Unit(benchmark::kMillisecond);
+
+BENCHMARK_MAIN();
+```
+
+This program has two atomic variables that are aligned on different cache lines to avoid thrashing. You spawn four threads: two threads perform a read-modify-write operation on the first atomic variable, and two threads perform the same operation on the second atomic variable.
+
+## Create a program with explicit thread pinning
+
+Create a file named `thread_affinity.cpp` with the code below. This program uses `pthread_setaffinity_np` to pin threads to specific CPU cores:
+
+```cpp
+#include <benchmark/benchmark.h>
+#include <pthread.h>
+#include <vector>
+#include <atomic>
+#include <cassert>
+#include <thread>
+
+
+using namespace std;
+
+// Places each atomic float on a separate 64-byte cache line
+struct AlignedAtomic {
+  alignas(64) std::atomic<float> val = 0;
+};
+
+void thread_affinity() {
+  
+  const int NUM_THREADS = 4;
+
+  AlignedAtomic a;
+  AlignedAtomic b;
+
+  // Lambda Work Function
+  auto task = [](AlignedAtomic &atomic){
+      for(int i = 0; i < (1 << 18); i++){
+        atomic.val = atomic.val + 1.0f;
+      }  
+  };
+
+  std::vector<thread> threads;
+  threads.reserve(NUM_THREADS);
+
+  // Create cpu sets
+  cpu_set_t cpu_set_0;
+  cpu_set_t cpu_set_1;
+
+  // Zero them out
+  CPU_ZERO(&cpu_set_0);
+  CPU_ZERO(&cpu_set_1);
+
+  // And set the CPU cores we want to pin the threads too
+  CPU_SET(0, &cpu_set_0);
+  CPU_SET(1, &cpu_set_1);
+
+    // Launch threads and pin variables a and b to the same CPU cores. 
+  for (int i = 0; i < NUM_THREADS; i++){
+    if (i%2 == 0){
+          threads.emplace_back(task, ref(a));
+          assert(pthread_setaffinity_np(threads[i].native_handle(), sizeof(cpu_set_t), &cpu_set_0) == 0);
+
+    }
+    else{
+          threads.emplace_back(task, ref(b));
+          assert(pthread_setaffinity_np(threads[i].native_handle(), sizeof(cpu_set_t),  &cpu_set_1) == 0);
+    }
+  }
+
+
+  // wait for all threads to join before exiting
+  for (auto& thread : threads){
+    thread.join();
+  }
+}
+
+// Thread affinity benchmark
+static void thread_affinity(benchmark::State& s) {
+  for(auto _ : s) {
+    thread_affinity();
+  }
+}
+BENCHMARK(thread_affinity)->UseRealTime()->Unit(benchmark::kMillisecond);
+
+BENCHMARK_MAIN();
+```
+
+This program uses the `pthread_setaffinity_np` function from the `pthread.h` header file to pin threads. The two threads operating on atomic variable `a` are pinned to a specific CPU set, and the other threads operating on atomic variable `b` are pinned to a different CPU.
+
+## Compile and benchmark the programs
+
+Compile both programs with the following commands:
+
+```bash
+g++ default_os_scheduling.cpp -O3 -march=native -lbenchmark -lpthread -o default-os-scheduling
+g++ thread_affinity.cpp -O3 -march=native -lbenchmark -lpthread -o thread-affinity
+```
+
+Use Perf to print statistics for both programs:
+
+```bash
+perf stat -e L1-dcache-loads,L1-dcache-load-misses ./default-os-scheduling
+perf stat -e L1-dcache-loads,L1-dcache-load-misses ./thread-affinity
+```
+
+## Analyze the performance results
+
+The output shows the `L1-dcache-load-misses` metric reduces from approximately 7.84% to approximately 0.6% as a result of thread pinning. This metric measures how often the CPU core doesn't have an up-to-date version of data in the L1 data cache and must perform an expensive operation to fetch data from a different location. 
+
+This results in a significant reduction in function execution time, dropping from 10.7 ms to 3.53 ms:
+
+```output
+Running ./default-os-scheduling
+Run on (16 X 2100 MHz CPU s)
+CPU Caches:
+  L1 Data 64 KiB (x16)
+  L1 Instruction 64 KiB (x16)
+  L2 Unified 1024 KiB (x16)
+  L3 Unified 32768 KiB (x1)
+Load Average: 0.37, 0.40, 0.20
+--------------------------------------------------------------------------
+Benchmark                                Time             CPU   Iterations
+--------------------------------------------------------------------------
+default_os_scheduling/real_time       10.7 ms        0.118 ms           64
+
+ Performance counter stats for './default-os-scheduling':
+
+         391719695      L1-dcache-loads                                                       
+          30726569      L1-dcache-load-misses            #    7.84% of all L1-dcache accesses 
+
+       0.808460086 seconds time elapsed
+
+       3.059934000 seconds user
+       0.030958000 seconds sys
+
+
+2026-01-14T09:46:00+00:00
+Running ./thread-affinity
+Run on (16 X 2100 MHz CPU s)
+CPU Caches:
+  L1 Data 64 KiB (x16)
+  L1 Instruction 64 KiB (x16)
+  L2 Unified 1024 KiB (x16)
+  L3 Unified 32768 KiB (x1)
+Load Average: 0.66, 0.46, 0.22
+--------------------------------------------------------------------
+Benchmark                          Time             CPU   Iterations
+--------------------------------------------------------------------
+thread_affinity/real_time       3.53 ms        0.343 ms          198
+
+ Performance counter stats for './thread-affinity':
+
+         699781841      L1-dcache-loads                                                       
+           3154506      L1-dcache-load-misses            #    0.45% of all L1-dcache accesses 
+
+       1.094879115 seconds time elapsed
+
+       2.044792000 seconds user
+       0.169065000 seconds sys
+```
+
+The results demonstrate that thread pinning can significantly improve performance when threads operate on separate data structures. By keeping threads on specific cores, you reduce cache coherency traffic and improve data locality.
+
+## What you've accomplished and what's next
+
+In this section, you've:
+- Created two programs to compare default OS scheduling against explicit thread pinning
+- Used the `pthread_setaffinity_np` API to control CPU affinity at the source code level
+- Measured cache performance using Perf to quantify the impact of thread pinning
+- Observed a performance improvement and a reduction in cache misses through strategic thread placement
+
+You've seen how controlling where threads run can reduce cache disruption in contention-heavy paths and improve runtime stability and performance. You've also learned about the trade-offs: pinning can boost locality and predictability, but it can hurt performance of other running processes, especially if the workload characteristics change or if you over-constrain the scheduler.
+
+Thread pinning is most effective when you have well-understood workload patterns and clear separation between data structures accessed by different threads. Use it as a fine-tuning technique after establishing baseline performance with default OS scheduling.
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/pinning-threads/using_taskset.md b/content/learning-paths/servers-and-cloud-computing/pinning-threads/using_taskset.md
new file mode 100644
index 0000000000..706463ee29
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/pinning-threads/using_taskset.md
@@ -0,0 +1,247 @@
+---
+title: Pin threads to cores with taskset
+weight: 4
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Create a single-threaded Python benchmark
+
+Create a single-threaded program that's sensitive to execution variations. This simulates scenarios like a log ingesting process or a single-threaded consumer that needs to maintain a steady pace.
+
+Check that you have Python installed:
+
+```bash
+python --version
+```
+
+The output is similar to:
+
+```output
+Python 3.12.3
+```
+
+If Python isn't installed, use your Linux package manager to install it or see the [Python downloads page](https://www.python.org/downloads/).
+
+Next, create a virtual environment to install packages without interfering with system packages:
+
+```bash
+python -m venv venv
+source venv/bin/activate
+pip install matplotlib
+```
+
+Create a file named `single_threaded_python_script.py` with the following code. This script measures the execution time of a computational function, writes results to `data.txt`, and generates time-series graphs to illustrate the effects of thread pinning:
+
+```python
+#!/usr/bin/env python3
+import time
+import matplotlib.pyplot as plt
+import matplotlib
+import sys
+
+def timer(func):
+    def foo(*args,**kwargs):
+        with open("data.txt", "a") as f:
+            start = time.perf_counter()
+            ans = func(*args,**kwargs)
+            end = time.perf_counter()
+            duration = end - start
+            # print(f"Function {func.__name__} took {(duration*1000):4f} milliseconds")
+            f.write((str(duration*1000)) + ", ")
+        return ans
+    return foo
+
+@timer
+def bar(x:int)->float:
+    """Random function that is time sensitive"""
+    res = 0.0
+    for i in range(0,x*100):
+        res += (float(i) / 9.0) + (42.0 + float(i))
+
+    return res
+
+def plot_csv_values_from_txt(path: str, *, title: str | None = None, show_markers: bool = False) -> None:
+    """
+    Reads a .txt file containing comma-separated numeric values (with optional whitespace/newlines)
+    and plots them as a simple chart.
+    """
+    with open(path, "r", encoding="utf-8") as f:
+        text = f.read()
+
+    # Split on commas, trim whitespace, ignore empty tokens (handles trailing comma)
+    tokens = [t.strip() for t in text.replace("\n", " ").split(",")]
+    values = [float(t) for t in tokens if t]
+
+    plt.figure()
+    x = range(len(values))
+    if show_markers:
+        plt.plot(x, values, marker="o", linestyle="-")
+    else:
+        plt.plot(x, values)
+
+    plt.xlabel("Sample Number")
+    plt.ylabel("Time / milliseconds")
+    if title:
+        plt.title(title)
+    plt.tight_layout()
+    plt.grid()
+    plt.show()
+    if (sys.argv[1] == "exclusive"):
+        plt.savefig("Exclusive.jpg")
+    elif (sys.argv[1] == "shared"):
+        plt.savefig("Shared.jpg")
+    elif (sys.argv[1] == "free"):
+        plt.savefig("Free.jpg")
+
+def main():
+
+    for i in range(0,10000):
+        bar(50)
+    if (sys.argv[1] == "exclusive"):
+        plot_csv_values_from_txt(path="data.txt",title="Exclusively Pinned")
+    elif (sys.argv[1] == "shared"):
+        plot_csv_values_from_txt(path="data.txt", title="Shared")
+    elif (sys.argv[1] == "free"):
+        plot_csv_values_from_txt(path="data.txt", title="Free")
+    return 0
+
+if __name__ == "__main__":
+    main()
+```
+
+Make the script executable:
+
+```bash
+chmod +x single_threaded_python_script.py
+```
+
+## Compare thread pinning strategies
+
+Explore three different scenarios to understand the trade-offs of thread pinning:
+
+1. Free: The operating system allocates both programs to any of four cores
+2. Shared-pinned: The Python script is pinned to core 0, but `prog` can run on any core
+3. Exclusive: The Python script has exclusive access to core 0, and `prog` runs on cores 1-3
+
+### Create test scripts
+
+Create three bash scripts to automate the testing.
+
+#### Free script
+
+The first script allows both programs to run on any of the first four cores.
+
+Use an editor to create a file named `free-script.sh` with the following code:
+
+```bash
+#!/bin/bash
+
+set -euo pipefail
+
+rm -f ./data.txt
+taskset --cpu-list 0-3 ./single_threaded_python_script.py free & # time-critical python script
+taskset --cpu-list 0-3 ./prog
+
+wait
+```
+
+#### Shared script
+
+The next script pins the Python script to core 0, while `prog` can use any of the first four cores.
+
+Create a file named `shared-pinned.sh` with the following code:
+
+```bash
+#!/bin/bash
+
+set -euo pipefail
+
+rm -f ./data.txt
+taskset --cpu-list 0 ./single_threaded_python_script.py shared & # time-critical python script
+taskset --cpu-list 0-3 ./prog
+
+wait
+```
+
+#### Exclusive script
+
+The last one gives the Python script exclusive access to core 0, and `prog` uses cores 1-3.
+
+Create a file named `exclusive.sh` with the following code:
+
+```bash
+#!/bin/bash
+
+set -euo pipefail
+
+rm -f ./data.txt
+taskset --cpu-list 0 ./single_threaded_python_script.py exclusive & # time-critical python script
+taskset --cpu-list 1-3 ./prog
+
+wait
+```
+
+### Run the tests
+
+Execute all three scenarios:
+
+```bash
+chmod +x free-script.sh shared-pinned.sh exclusive.sh
+./free-script.sh
+./shared-pinned.sh
+./exclusive.sh
+```
+
+## Analyze the results
+
+The terminal output shows the execution time for `prog` under the three scenarios. The Python script also generates three files: `Free.jpg`, `Exclusive.jpg`, and `Shared.jpg`.
+
+The terminal output shows the `free-script.sh` scenario (where the Linux scheduler assigns threads to cores without restriction) completes `prog` the fastest at 5.8 seconds. The slowest execution occurs when the Python script has exclusive access to CPU 0, which is expected because `prog` is constrained to fewer cores:
+
+```output
+Answer = 3.14159        5 iterations took 5838 milliseconds
+Answer = 3.14159        5 iterations took 5946 milliseconds
+Answer = 3.14159        5 iterations took 5971 milliseconds
+```
+
+However, this represents a trade-off with the Python script's performance.
+
+### Free scenario results
+
+The `Free.jpg` graph shows periodic zones of high latency (3.5 ms) that likely occur when there's contention between `prog` and the Python script:
+
+![Time-series line graph plotting execution time in milliseconds on the y-axis against sample number on the x-axis. The line fluctuates between approximately 0.5ms and 3.5ms, showing periodic spikes and zones of higher latency. The graph has a grid background and is titled 'Free'. The pattern indicates contention between processes when both can run on any core](free.jpg "Free scenario: both programs can run on any of four cores")
+
+### Shared-pinned scenario results
+
+When pinning the Python script to core 0 while `prog` remains free to use any cores, the behavior is similar:
+
+![Time-series line graph plotting execution time in milliseconds against sample number. The line shows similar behavior to the free scenario with fluctuations between approximately 0.5ms and 3.5ms and periodic spikes. The graph has a grid background and is titled 'Shared'. The pattern shows continued contention despite pinning the Python script to a specific core](pinned_shared.jpg "Shared-pinned scenario: Python script pinned to core 0, prog free to run on any core")
+
+### Exclusive scenario results
+
+When the Python script has exclusive access to core 0, the execution time is more consistent around 0.49 ms because the script doesn't contend with any other demanding processes:
+
+![Time-series line graph plotting execution time in milliseconds against sample number. The line shows consistent, stable execution time around 0.49ms with minimal variation throughout the entire sample range. The graph has a grid background and is titled 'Exclusively Pinned'. The flat, steady pattern demonstrates stable performance when the Python script has exclusive access to a dedicated core](exclusive.jpg "Exclusive scenario: Python script has exclusive access to core 0, prog runs on cores 1-3")
+
+## Understanding the trade-offs
+
+The results demonstrate key trade-offs in thread pinning:
+
+- Free allocation: Fastest overall throughput but inconsistent latency for time-sensitive tasks
+- Shared pinning: Provides some isolation but doesn't eliminate contention
+- Exclusive pinning: Most consistent latency for the pinned process but reduces available cores for other work
+
+Multiple factors influence this behavior, including the Linux scheduler algorithm, associated parameters, and process priority. These topics are beyond the scope of this Learning Path. If you'd like to learn more, see the [nice utility documentation](https://man7.org/linux/man-pages/man2/nice.2.html) for information about process priority settings.
+
+## What you've accomplished and what's next
+
+In this section:
+- Created a single-threaded Python benchmark that measures execution time variations
+- Used `taskset` to pin processes to specific CPU cores
+- Compared three thread pinning strategies: free, shared-pinned, and exclusive
+- Analyzed the trade-offs between throughput and latency consistency
+
+Next, you learn how to control thread affinity programmatically using source code modifications.
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/postgresql_tune/BeforeAndAfter.png b/content/learning-paths/servers-and-cloud-computing/postgresql_tune/BeforeAndAfter.png
deleted file mode 100644
index 6d22bb7fe8..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/postgresql_tune/BeforeAndAfter.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/postgresql_tune/before_and_after.md b/content/learning-paths/servers-and-cloud-computing/postgresql_tune/before_and_after.md
index c88dfdb334..1508253a07 100644
--- a/content/learning-paths/servers-and-cloud-computing/postgresql_tune/before_and_after.md
+++ b/content/learning-paths/servers-and-cloud-computing/postgresql_tune/before_and_after.md
@@ -12,6 +12,6 @@ Deployment configurations and the profile of SQL requests made by clients will d
 
 Application tuning allows you to gain performance without scaling your deployment up (bigger machines) or out (more machines). You have the option to use the gained performance or trade it for cost savings by reducing the total compute resources provisioned. Below is a graph that shows the difference performance tuning on `PostgreSQL` can make.
 
-![Before and after Tuning](BeforeAndAfter.png)
+![Before and after Tuning](beforeandafter.png)
 
 Requirements vary based on the use case. In the example shown above, the AWS c7g.8xlarge instance could be down sized to a c7g.4xlarge to gain cost savings.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md
index cbd31a174f..1fdd1f5e87 100644
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md
@@ -1,28 +1,29 @@
 ---
-title: Deploy RabbitMQ on Google Cloud C4A (Arm-based Axion VMs)
+title: Deploy RabbitMQ on Arm64 Cloud Platforms (Azure and GCP)
 
 minutes_to_complete: 30
 
-who_is_this_for: This is an introductory topic for software engineers and platform engineers migrating messaging and event-driven workloads from x86_64 to Arm-based servers, specifically on Google Cloud C4A virtual machines powered by Axion processors.
+who_is_this_for: This is an introductory topic for software engineers and platform engineers migrating messaging and event-driven workloads from x86_64 to Arm-based servers, specifically on Microsoft Azure Cobalt 100 Arm processors and Google Cloud C4A virtual machines powered by Axion processors. 
 
 learning_objectives:
+  - Provision Arm-based Linux virtual machines on Google Cloud (C4A with Axion processors) and Microsoft Azure (Cobalt 100)
   - Provision an Arm-based SUSE SLES virtual machine on Google Cloud (C4A with Axion processors)
-  - Install and configure RabbitMQ on a SUSE Arm64 (C4A) instance
-  - Validate RabbitMQ deployment using baseline messaging tests
-  - Implement real-world RabbitMQ use cases such as event-driven processing and notification pipelines
+  - Install and configure RabbitMQ on Arm64 Linux (SUSE SLES on GCP and Ubuntu Pro 24.04 on Azure)
+  - Build and configure required Erlang versions for RabbitMQ on Arm64
+  - Validate RabbitMQ deployments using baseline messaging and connectivity tests
+  - Implement practical RabbitMQ use cases such as event-driven processing and notification pipelines on Arm-based infrastructure
 
 prerequisites:
+  - A [Microsoft Azure](https://azure.microsoft.com/) account with access to Cobalt 100-based instances (Dpsv6).
   - A [Google Cloud Platform (GCP)](https://cloud.google.com/free) account with billing enabled
   - Basic understanding of message queues and messaging concepts (publishers, consumers)
   - Familiarity with Linux command-line operations
-  - Basic knowledge of Python for the use case examples
 
 author: Pareena Verma
 
 ##### Tags
 skilllevels: Introductory
-subjects: Containers and Virtualization
-cloud_service_providers: Google Cloud
+subjects: Databases
 
 armips:
   - Neoverse
@@ -45,6 +46,11 @@ further_reading:
       link: https://cloud.google.com/docs
       type: documentation
 
+  - resource:
+      title: Azure Virtual Machines documentation
+      link: https://learn.microsoft.com/azure/virtual-machines/
+      type: documentation    
+
   - resource:
       title: RabbitMQ documentation
       link: https://www.rabbitmq.com/documentation.html 
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_baseline.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_baseline.md
new file mode 100644
index 0000000000..eb1110076c
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_baseline.md
@@ -0,0 +1,141 @@
+---
+title: Validate RabbitMQ on Azure
+weight: 5
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Run a baseline test with RabbitMQ
+This section shows you how to validate a working RabbitMQ 4.2.0 installation with Erlang OTP 26 on an Azure Ubuntu Arm64 VM.  
+
+All steps use the command line and are suitable for baseline verification.
+
+## Verify RabbitMQ service status
+
+```console
+sudo systemctl status rabbitmq
+```
+
+### Verify Erlang Version
+RabbitMQ depends on Erlang. This step ensures the broker is using Erlang OTP 26.
+
+```console
+erl -eval 'io:format("~s~n", [erlang:system_info(system_version)]), halt().' -noshell
+```
+
+### Verify RabbitMQ Version
+Confirm the installed RabbitMQ version.
+
+```console
+rabbitmqctl version
+```
+
+### Verify Enabled Plugins
+List all enabled plugins and confirm that the management plugins are active.
+
+```console
+rabbitmq-plugins list -e
+```
+
+```output
+Listing plugins with pattern ".*" ...
+ Configured: E = explicitly enabled; e = implicitly enabled
+ | Status: * = running on rabbit@lpprojectubuntuarm64
+ |/
+[E*] rabbitmq_management       4.2.0
+[e*] rabbitmq_management_agent 4.2.0
+[e*] rabbitmq_web_dispatch     4.2.0
+````
+
+This confirms that:
+
+- The management UI is enabled
+- Required supporting plugins are running
+
+### Check RabbitMQ Node Health
+Retrieve detailed runtime and resource information for the RabbitMQ node.
+
+```console
+rabbitmqctl status
+```
+This confirms that:
+
+- Node is running
+- No alarms are reported
+- Erlang version matches OTP 26
+
+### Ensure RabbitMQ Configuration Directory Permissions
+RabbitMQ requires write access to its configuration directory for plugin management.
+
+```console
+sudo mkdir -p /opt/rabbitmq/etc/rabbitmq
+sudo chown -R azureuser:azureuser /opt/rabbitmq/etc/rabbitmq
+```
+
+### Create a Baseline Test Virtual Host
+Create an isolated virtual host for baseline testing.
+
+```console
+rabbitmqctl add_vhost test_vhost
+rabbitmqctl set_permissions -p test_vhost guest ".*" ".*" ".*"
+```
+
+This ensures:
+
+- Tests do not interfere with default workloads
+- Full permissions are available for validation
+
+### Download RabbitMQ Admin CLI
+Download the `rabbitmqadmin` CLI tool from the management endpoint.
+
+```console
+wget http://localhost:15672/cli/rabbitmqadmin -O ~/rabbitmqadmin
+chmod +x ~/rabbitmqadmin
+```
+
+This CLI is used to perform queue and message operations.
+
+### Declare a Test Queue
+Create a non-durable test queue in the test virtual host.
+
+```console
+~/rabbitmqadmin -V test_vhost declare queue name=test durable=false
+```
+
+### Publish a Test Message
+Publish a sample message to the test queue using the default exchange.
+
+```console
+~/rabbitmqadmin -V test_vhost publish \
+  exchange=amq.default \
+  routing_key=test \
+  payload="Hello RabbitMQ"
+```
+
+This validates:
+
+- Message routing
+- Exchange-to-queue binding behavior
+
+### Consume The Test Message
+Retrieve and remove the message from the queue.
+
+```console
+~/rabbitmqadmin -V test_vhost get queue=test count=1
+```
+
+You should see an output similar to:
+
+```output
++-------------+----------+---------------+----------------+---------------+------------------+------------+-------------+
+| routing_key | exchange | message_count |    payload     | payload_bytes | payload_encoding | properties | redelivered |
++-------------+----------+---------------+----------------+---------------+------------------+------------+-------------+
+| test        |          | 0             | Hello RabbitMQ | 14            | string           |            | False       |
++-------------+----------+---------------+----------------+---------------+------------------+------------+-------------+
+```
+
+- Message payload: Hello RabbitMQ
+- Queue becomes empty after consumption
+
+This baseline validates a healthy RabbitMQ 4.2.0 deployment running on Erlang/OTP 26 on an Azure Ubuntu Arm64 VM. Core components, plugins, and node health were verified, followed by successful message publish and consume operations.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_installation.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_installation.md
new file mode 100644
index 0000000000..b4da8781e1
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_installation.md
@@ -0,0 +1,158 @@
+---
+title: Install RabbitMQ on Azure Cobalt 100
+weight: 4
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Install RabbitMQ on Azure Cobalt 100
+This guide describes the end-to-end installation of RabbitMQ 4.2.0 on an Azure Cobalt 100 (Arm-based) Ubuntu Pro 24.04 virtual machine. It covers system preparation, Erlang installation, RabbitMQ setup, service configuration, and validation with the management plugin enabled.
+
+### Update system and install build dependencies
+This step ensures the operating system is up to date and installs all required packages needed to build Erlang and run RabbitMQ reliably.
+
+```console
+sudo apt update
+sudo apt install -y build-essential libssl-dev libncurses-dev libtinfo-dev \
+                    libgl1-mesa-dev libglu1-mesa-dev libpng-dev libssh-dev \
+                    unixodbc-dev wget tar xz-utils git
+```
+
+### Build and install Erlang OTP 26
+RabbitMQ 4.2.0 requires Erlang OTP 26. This section builds Erlang from source to ensure full compatibility on Arm64.
+
+```console
+# Clone Erlang source
+git clone https://github.com/erlang/otp.git
+cd otp
+
+# Checkout OTP 26 branch
+git checkout OTP-26
+
+# Clean previous builds
+make clean
+
+# Configure build with SSL/crypto support
+./configure --prefix=/usr/local/erlang-26 \
+            --enable-smp-support \
+            --enable-threads \
+            --enable-kernel-poll \
+            --with-ssl
+
+# Build and install
+make -j$(nproc)
+sudo make install
+```
+### Make Erlang PATH persistent 
+This step ensures the Erlang binaries are permanently available in the system PATH across sessions and reboots.
+
+```console
+echo 'export ERLANG_HOME=/usr/local/erlang-26' | sudo tee /etc/profile.d/erlang.sh
+echo 'export PATH=$ERLANG_HOME/bin:$PATH' | sudo tee -a /etc/profile.d/erlang.sh
+```
+
+### Download and install RabbitMQ
+
+Download the official RabbitMQ 4.2.0 generic Unix distribution and install it under `/opt/rabbitmq`.
+
+```console
+cd ~
+wget https://github.com/rabbitmq/rabbitmq-server/releases/download/v4.2.0/rabbitmq-server-generic-unix-4.2.0.tar.xz
+sudo mkdir -p /opt/rabbitmq
+sudo tar -xvf rabbitmq-server-generic-unix-4.2.0.tar.xz -C /opt/rabbitmq --strip-components=1
+
+# Create directories for logs and database
+sudo mkdir -p /var/lib/rabbitmq /var/log/rabbitmq
+sudo chown -R $USER:$USER /var/lib/rabbitmq /var/log/rabbitmq
+```
+
+## Update PATH environment variable
+
+Make RabbitMQ CLI tools available in the current shell. Add this to `~/.bashrc` or `~/.profile` for persistence across sessions.
+
+```console
+export PATH=/usr/local/erlang-26/bin:/opt/rabbitmq/sbin:$PATH
+```
+
+Add this line to `~/.bashrc` or `~/.profile` for persistence.
+
+## Configure RabbitMQ systemd service
+
+Configure RabbitMQ to run as a managed systemd service, enabling automatic startup and controlled lifecycle management.
+
+Create `/etc/systemd/system/rabbitmq.service`:
+
+```ini
+[Unit]
+Description=RabbitMQ broker
+After=network.target
+
+[Service]
+Type=simple
+User=azureuser
+Group=azureuser
+
+Environment=HOME=/home/azureuser
+Environment=RABBITMQ_HOME=/opt/rabbitmq
+Environment=RABBITMQ_MNESIA_BASE=/var/lib/rabbitmq
+Environment=RABBITMQ_LOG_BASE=/var/log/rabbitmq
+Environment=PATH=/usr/local/erlang-26/bin:/opt/rabbitmq/sbin:/usr/bin
+
+ExecStart=/opt/rabbitmq/sbin/rabbitmq-server
+ExecStop=/opt/rabbitmq/sbin/rabbitmqctl shutdown
+
+Restart=on-failure
+RestartSec=10
+LimitNOFILE=65536
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Reload systemd and start RabbitMQ:
+
+```console
+sudo systemctl daemon-reload
+sudo systemctl enable rabbitmq
+sudo systemctl start rabbitmq
+sudo systemctl status rabbitmq
+```
+
+### Enable RabbitMQ management plugin
+This step enables the RabbitMQ management plugin, which provides a web-based UI and HTTP API for monitoring and administration.
+
+```console
+# Ensure config directory exists
+sudo mkdir -p /opt/rabbitmq/etc/rabbitmq
+sudo chown -R $USER:$USER /opt/rabbitmq/etc/rabbitmq
+
+# Enable management plugin
+rabbitmq-plugins enable rabbitmq_management
+```
+
+### Verify installation
+This section validates that both Erlang and RabbitMQ are installed correctly and running with the expected versions.
+
+**Erlang version:**
+
+```console
+erl -eval 'io:format("~s~n", [erlang:system_info(system_version)]), halt().' -noshell
+```
+
+You should see an output similar to:
+```output
+Erlang/OTP 26 [erts-14.2.5.12] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:1] [jit]
+```
+
+**Verify RabbitMQ version:**
+
+```console
+rabbitmqctl version
+```
+
+You should see an output similar to:
+```output
+4.2.0
+```
+RabbitMQ 4.2.0 is successfully installed on an Azure Cobalt 100 Ubuntu Pro 24.04 Arm64 VM with systemd management, persistent storage, logging, and the management plugin enabled.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_instance.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_instance.md
new file mode 100644
index 0000000000..d9643a84e8
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/azure_instance.md
@@ -0,0 +1,52 @@
+---
+title: Create an Azure Cobalt 100 virtual machine
+weight: 3
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Introduction
+
+There are several ways to create an Arm-based Cobalt 100 virtual machine: the Microsoft Azure console, the Azure CLI tool, or using your choice of IaC (Infrastructure as Code). This guide will use the Azure console to create a virtual machine with Arm-based Cobalt 100 Processor. 
+
+This learning path focuses on the general-purpose virtual machine of the D series. Please read the guide on [Dpsv6 size series](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/general-purpose/dpsv6-series) offered by Microsoft Azure.  
+
+## Provision the virtual machine
+
+Creating a Cobalt 100 virtual machine follows the same process as creating any other Azure virtual machine. Launch the Azure portal and navigate to **Virtual Machines**.
+
+Select **Create**, then select **Virtual Machine** from the drop-down list.
+2. Inside the "Basic" tab, fill in the Instance details such as "Virtual machine name" and "Region".
+3. Choose the image for your virtual machine (for example, Ubuntu Pro 24.04 LTS) and select “Arm64” as the VM architecture.
+4. In the “Size” field, click on “See all sizes” and select the D-Series v6 family of virtual machines. Select “D4ps_v6” from the list.
+
+![Azure portal VM creation — Azure Cobalt 100 Arm64 virtual machine (D4ps_v6) alt-text#center](images/instance.png "Figure 1: Select the D-Series v6 family of virtual machines")
+
+Select **SSH public key** as the authentication type. Azure automatically generates an SSH key pair and stores it for future use, providing a fast and secure way to connect to your virtual machine.
+
+Fill in the administrator username for your VM.
+
+Select **Generate new key pair**, then select **RSA SSH Format** as the SSH Key Type. RSA offers better security with keys longer than 3072 bits. Provide a name for your SSH key pair.
+
+In the **Inbound port rules** section, select HTTP (80) and SSH (22) as the inbound ports.
+
+![Azure portal VM creation — Azure Cobalt 100 Arm64 virtual machine (D4ps_v6) alt-text#center](images/instance1.png "Figure 2: Allow inbound port rules")
+
+Select the **Review + Create** tab and review the configuration for your virtual machine. The configuration is similar to:
+
+![Azure portal VM creation — Azure Cobalt 100 Arm64 virtual machine (D4ps_v6) alt-text#center](images/ubuntu-pro.png "Figure 3: Review and Create an Azure Cobalt 100 Arm64 VM")
+
+When you're satisfied with your configuration, select the Create button, then select Download Private Key and Create Resources.
+
+![Azure portal VM creation — Azure Cobalt 100 Arm64 virtual machine (D4ps_v6) alt-text#center](images/instance4.png "Download Private key and Create Resources")
+
+Your virtual machine deploys within a few minutes. SSH into the virtual machine using the downloaded private key and the public IP address.
+
+![Azure portal VM creation — Azure Cobalt 100 Arm64 virtual machine (D4ps_v6) alt-text#center](images/final-vm.png "VM deployment confirmation in Azure portal")
+
+{{% notice Note %}}
+
+To learn more about Arm-based virtual machines in Azure, refer to “Getting Started with Microsoft Azure” in [Get started with Arm-based cloud instances](/learning-paths/servers-and-cloud-computing/csp/azure).
+
+{{% /notice %}}
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md
index 6200205e33..c3a409e450 100644
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md
@@ -1,25 +1,31 @@
 ---
-title: Learn about RabbitMQ and Google Axion C4A
+title: Learn about Arm-based cloud platforms for RabbitMQ
 
 weight: 2
 
 layout: "learningpathall"
 ---
 
-## Google Axion C4A Arm instances in Google Cloud
+## Understand Azure Cobalt 100 processors
 
-Google Axion C4A is a family of Arm-based virtual machines built on Google’s custom Axion CPU, which is based on Arm Neoverse-V2 cores. Designed for high-performance and energy-efficient computing, these virtual machines offer strong performance for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications.
+Azure's Cobalt 100 is Microsoft's first-generation, in-house Arm-based processor. Designed entirely by Microsoft and based on Arm's Neoverse N2 architecture, this 64-bit CPU delivers improved performance and energy efficiency across a broad spectrum of cloud-native, scale-out Linux workloads. These include web and application servers, data analytics, open-source databases, caching systems, and other related technologies. Running at 3.4 GHz, the Cobalt 100 processor allocates a dedicated physical core for each vCPU, ensuring consistent and predictable performance.
+
+For more information about Cobalt 100, see the blog [Announcing the preview of new Azure virtual machine based on the Azure Cobalt 100 processor](https://techcommunity.microsoft.com/blog/azurecompute/announcing-the-preview-of-new-vms-based-on-the-azure-cobalt-100-processor/4146353).
+
+## Understand Google Axion C4A instances
+
+Google Axion C4A is a family of Arm-based virtual machines built on Google's custom Axion CPU, based on Arm Neoverse V2 cores. These virtual machines offer high-performance and energy-efficient computing for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications.
 
 The C4A series provides a cost-effective alternative to x86 virtual machines while leveraging the scalability and performance benefits of the Arm architecture in Google Cloud.
 
-To learn more about Google Axion, see the [Introducing Google Axion Processors, Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog.
+For more information about Google Axion, see the [Introducing Google Axion Processors, Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog.
 
-## RabbitMQ
+## Understand RabbitMQ messaging
 
-RabbitMQ is an open-source **message broker** that enables applications to communicate asynchronously using messaging patterns such as queues, publish/subscribe, and routing. It acts as an intermediary that reliably receives, stores, and forwards messages between producers and consumers.
+RabbitMQ is an open-source message broker that enables applications to communicate asynchronously using messaging patterns such as queues, publish/subscribe, and routing. It acts as an intermediary that reliably receives, stores, and forwards messages between producers and consumers.
 
 RabbitMQ helps decouple application components, improve scalability, and increase fault tolerance by ensuring messages are not lost and can be processed independently. It supports multiple messaging protocols, including AMQP, and provides features such as message durability, acknowledgments, routing via exchanges, and flexible delivery guarantees.
 
 RabbitMQ is widely used for **event-driven architectures**, **background job processing**, **microservices communication**, and **notification systems**. It integrates easily with many programming languages and platforms.
 
-Learn more from the [RabbitMQ website](https://www.rabbitmq.com/) and the [RabbitMQ documentation](https://www.rabbitmq.com/documentation.html).
+Learn more from the [RabbitMQ official website](https://www.rabbitmq.com/) and the [official documentation](https://www.rabbitmq.com/documentation.html).
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/firewall_setup.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/firewall_setup.md
deleted file mode 100644
index 30b776551e..0000000000
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/firewall_setup.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-title: Create a firewall rule on GCP
-weight: 3
-
-### FIXED, DO NOT MODIFY
-layout: learningpathall
----
-
-## Overview
-
-In this section, you create a firewall rule within Google Cloud Console to expose TCP port 15672.
-
-{{% notice Note %}}
-For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](/learning-paths/servers-and-cloud-computing/csp/google/).
-{{% /notice %}}
-
-## Create a firewall rule in GCP
-
-To expose TCP port 15672, create a firewall rule.
-
-Navigate to the [Google Cloud Console](https://console.cloud.google.com/), go to **VPC Network > Firewall**, and select **Create firewall rule**.
-
-![Screenshot showing the VPC Network Firewall page in Google Cloud Console with the Create firewall rule button highlighted alt-txt#center](images/firewall-rule.png "Create a firewall rule")
-
-Next, create the firewall rule that exposes TCP port 15672.
-Set the **Name** of the new rule to "allow-tcp-15672". Select your network that you intend to bind to your VM (default is "autoscaling-net" but your organization might have others).
-
-Set **Direction of traffic** to "Ingress". Set **Allow on match** to "Allow" and **Targets** to "Specified target tags". Enter "allow-tcp-15672" in the **Target tags** text field. Set **Source IPv4 ranges** to your IP address so that only you can access the application.
-
-![Screenshot showing the firewall rule configuration interface with target tag set to allow-tcp-15672 and TCP port 15672 specified in the protocols and ports section alt-txt#center](images/network-rule.png "Creating the TCP/15672 firewall rule")
-
-Finally, select **Specified protocols and ports** under the **Protocols and ports** section. Select the **TCP** checkbox, enter "15672" in the **Ports** text field, and select **Create**.
-
-![Screenshot showing the Protocols and ports section with TCP checkbox selected and port 15672 entered in the ports field alt-txt#center](images/network-port.png "Specifying the TCP port to expose")
-
-## What you've accomplished and what's next
-
-You've successfully created a firewall rule to allow TCP traffic on port 15672 for the RabbitMQ management interface. This firewall rule will be applied to your virtual machine using network tags. Next, you'll provision the Google Axion C4A Arm virtual machine.
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/baseline.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_baseline.md
similarity index 58%
rename from content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/baseline.md
rename to content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_baseline.md
index c8252fc51a..87de04a1d6 100644
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/baseline.md
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_baseline.md
@@ -1,42 +1,38 @@
 ---
-title: Validate RabbitMQ installation
-weight: 6
+title: Validate RabbitMQ on Google Cloud
+weight: 9
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
 ---
 
-## RabbitMQ baseline validation on GCP SUSE Arm64 VM
-
-In this section you'll validate your RabbitMQ installation on the Google Cloud SUSE Linux Arm64 virtual machine by confirming:
+Validate your RabbitMQ installation on the Google Cloud SUSE Linux Arm64 virtual machine. This baseline confirms:
 
 - RabbitMQ service health
 - Management plugin availability
 - Queue operations (create, publish, consume)
 - CLI tooling functionality (`rabbitmqctl` and `rabbitmqadmin`)
 
-### Check RabbitMQ node status
+## Check RabbitMQ node status
+
 Verify that the RabbitMQ node is operational and healthy.
 
 ```console
 sudo rabbitmqctl status
 ```
-
-The command returns detailed status information. Verify that:
-
 - Node status reports RabbitMQ is running
 - No active alarms
 - Listeners are active on ports 5672 and 15672
 - Memory and disk space are within safe limits
 
 ### Verify enabled plugins
-Confirm that the RabbitMQ management plugins are enabled:
+Confirm that the RabbitMQ management plugins are enabled.
 
 ```console
 sudo rabbitmq-plugins list | grep management
 ```
 
-The output is similar to:
+You should see an output similar to:
 ```output
 [  ] rabbitmq_federation_management          4.2.0
 [E*] rabbitmq_management                     4.2.0
@@ -46,13 +42,13 @@ The output is similar to:
 ```
 
 ### Validate RabbitMQ listeners
-Ensure RabbitMQ is listening on the required ports:
+Ensure RabbitMQ is listening on the required ports.
 
 ```console
 sudo rabbitmqctl status | grep -A5 Listeners
 ```
 
-The output is similar to:
+You should see an output similar to:
 ```output
 Listeners
 
@@ -61,54 +57,49 @@ Interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI
 Interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0
 ```
 
-### Download RabbitMQ admin CLI tool
-
-The `rabbitmqadmin` command is a Python script to manage and monitor RabbitMQ.
-
-Download the CLI tool from the local management endpoint to the virtual machine. You can also download and run `rabbitmqadmin` on your local computer, but you need to have `python3` installed, including `pip3`. 
+### Download RabbitMQ Admin CLI tool
+Download the rabbitmqadmin CLI tool from the local management endpoint.
 
 ```console
 curl -u guest:guest http://localhost:15672/cli/rabbitmqadmin -o rabbitmqadmin
 ```
-
-Make the tool executable:
+**Make the tool executable:**
 
 ```console
 chmod +x rabbitmqadmin
 ```
-
 ### Validate queue creation
-Create a test queue to validate write operations:
+Create a test queue to validate write operations.
 
 ```console
 ./rabbitmqadmin declare queue name=testqueue durable=false
 ```
 
-The output is similar to:
+You should see an output similar to:
 ```output
 queue declared
 ```
 
 ### Publish a test message
-Send a test message to the queue:
+Send a test message to the queue.
 
 ```console
 ./rabbitmqadmin publish exchange=amq.default routing_key=testqueue payload="hello world"
 ```
 
-The output is similar to:
+You should see an output similar to:
 ```output
 Message published
 ```
 
 ### Consume message from queue
-Retrieve messages from the queue to verify read functionality:
+Retrieve messages from the queue to verify read functionality.
 
 ```console
 ./rabbitmqadmin get queue=testqueue
 ```
 
-The output is similar to:
+You should see an output similar to:
 ```output
 +-------------+----------+---------------+-------------+---------------+------------------+------------+-------------+
 | routing_key | exchange | message_count |   payload   | payload_bytes | payload_encoding | properties | redelivered |
@@ -118,21 +109,29 @@ The output is similar to:
 ```
 
 ### Verify queue state
-Confirm that the queue is empty after consumption:
+Confirm that the queue is empty after consumption.
 
 ```console
 ./rabbitmqadmin list queues name messages
 ```
 
-The output is similar to:
+You should see an output similar to:
 ```output
-+-----------+----------+
-|   name    | messages |
-+-----------+----------+
-| testqueue | 1        |
-+-----------+----------+
++--------------+----------+
+|     name     | messages |
++--------------+----------+
+| jobs         | 0        |
+| order.events | 1        |
+| testqueue    | 1        |
 ```
 
-## What you've accomplished and what's next
+### Baseline validation summary
+
+- RabbitMQ node is running and healthy
+- The management plugin is enabled and accessible
+- Queue creation is successful
+- Message publishing works as expected
+- Message consumption functions correctly
+- CLI tools operate without error
 
-You've successfully validated RabbitMQ on your Google Cloud SUSE Arm64 virtual machine. The node is running and healthy, the management plugin is enabled and accessible, and queue operations (creation, publishing, consumption) work correctly. Next, you'll explore practical use cases that demonstrate RabbitMQ's capabilities for event-driven architectures and notification systems.
+This confirms a successful baseline validation of RabbitMQ on a GCP SUSE Arm64 virtual machine.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_firewall_setup.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_firewall_setup.md
new file mode 100644
index 0000000000..582d8d8b77
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_firewall_setup.md
@@ -0,0 +1,32 @@
+---
+title: Create a firewall rule for RabbitMQ
+weight: 6
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+Create a firewall rule in Google Cloud Console to expose TCP port 15672 for the RabbitMQ management interface.
+
+{{% notice Note %}}
+For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](/learning-paths/servers-and-cloud-computing/csp/google/).
+{{% /notice %}}
+
+## Configure the firewall rule
+
+Navigate to the [Google Cloud Console](https://console.cloud.google.com/), go to **VPC Network > Firewall**, and select **Create firewall rule**.
+
+![Create a firewall rule](images/firewall-rule.png "Create a firewall rule")
+
+Next, create the firewall rule that exposes TCP port 15672.
+Set the **Name** of the new rule to "allow-tcp-15672". Select your network that you intend to bind to your VM (default is "autoscaling-net" but your organization might have others).
+
+Set **Direction of traffic** to "Ingress". Set **Allow on match** to "Allow" and **Targets** to "Specified target tags". Enter "allow-tcp-15672" in the **Target tags** text field. Set **Source IPv4 ranges** to "0.0.0.0/0".
+
+![Create a firewall rule](images/network-rule.png "Creating the TCP/15672 firewall rule")
+
+Finally, select **Specified protocols and ports** under the **Protocols and ports** section. Select the **TCP** checkbox, enter "15672" in the **Ports** text field, and select **Create**.
+
+![Specifying the TCP port to expose](images/network-port.png "Specifying the TCP port to expose")
+
+The network firewall rule is now created and you can continue with the VM creation.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/installation.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_installation.md
similarity index 53%
rename from content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/installation.md
rename to content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_installation.md
index 54d4afd6eb..b99c718a66 100644
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/installation.md
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_installation.md
@@ -1,39 +1,40 @@
 ---
-title: Install RabbitMQ
-weight: 5
+title: Install RabbitMQ on Google Cloud SUSE VM
+weight: 8
 
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
 ---
 
-## Install RabbitMQ on GCP SUSE Arm64 VM
+Install RabbitMQ on a Google Cloud Platform SUSE Linux Arm64 virtual machine using RPM packages for both Erlang and RabbitMQ Server.
 
-In this section you'll install RabbitMQ on a Google Cloud Platform SUSE Linux Arm64 virtual machine using RPM packages for both Erlang and RabbitMQ Server.
+RabbitMQ requires Erlang to be installed first.
 
-RabbitMQ needs Erlang to be installed before setting up the server.
 
-
-### Prerequisites
+You need:
 
 - GCP SUSE Linux Enterprise Server (Arm64)
 - Root or sudo privileges
 - Outbound internet access
 
-### Refresh system repositories
-Update the system's package list to get the latest available software from repositories.
+## Refresh system repositories
+
+Update the system's package list so the operating system recognizes the latest software available from its repositories.
 
 ```console
 sudo zypper refresh
 ```
 
-### Install required system utilities
+## Install required system utilities
+
 Install the basic tools needed to download and manage packages.
 
 ```console
 sudo zypper install -y curl wget gnupg tar socat logrotate
 ```
 
-### Download Erlang RPM (Arm64)
+## Download Erlang RPM (Arm64)
+
 RabbitMQ depends on Erlang. Download the Erlang RPM compatible with the Arm64 architecture.
 
 ```console
@@ -42,19 +43,19 @@ sudo rpm -Uvh erlang-26.2.5-1.el8.aarch64.rpm
 ```
 
 ### Verify Erlang installation
-Confirm that Erlang is installed correctly:
+Confirm that Erlang is installed correctly.
 
 ```console
 erl -eval 'io:format("~s~n", [erlang:system_info(system_version)]), halt().' -noshell
 ```
 
-The output is similar to:
+You should see an output similar to:
 
 ```output
 Erlang/OTP 26 [erts-14.2.5] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:1] [jit]
 ```
 
-### Download RabbitMQ server RPM
+### Download RabbitMQ Server RPM
 Download the RabbitMQ Server RPM package.
 
 ```console
@@ -64,8 +65,7 @@ sudo rpm -Uvh rabbitmq-server-4.2.0-1.el8.noarch.rpm
 
 {{% notice Note %}}
 RabbitMQ version 3.11.0 introduced significant performance enhancements for Arm-based architectures. This version needs Erlang 25.0 or later, which brings Just-In-Time (JIT) compilation and modern flame graph profiling tooling to both x86 and Arm64 CPUs. These features result in improved performance on Arm64 architectures.
-
-View the [release notes](https://github.com/rabbitmq/rabbitmq-server/blob/main/release-notes/3.11.0.md) for more information.
+You can view [this release note](https://github.com/rabbitmq/rabbitmq-server/blob/main/release-notes/3.11.0.md)
 
 The [Arm Ecosystem Dashboard](https://developer.arm.com/ecosystem-dashboard/) recommends RabbitMQ version 3.11.0, the minimum recommended on Arm platforms.
 {{% /notice %}}
@@ -86,22 +86,6 @@ sudo systemctl status rabbitmq-server
 
 The service should be in an active (running) state.
 
-```output
-● rabbitmq-server.service - Open source RabbitMQ server
-     Loaded: loaded (/usr/lib/systemd/system/rabbitmq-server.service; enabled; vendor preset: disabled)
-     Active: active (running) since Fri 2026-01-09 14:50:52 UTC; 3s ago
-   Main PID: 3953 (beam.smp)
-      Tasks: 53
-        CPU: 2.432s
-     CGroup: /system.slice/rabbitmq-server.service
-             ├─ 3953 /usr/lib64/erlang/erts-14.2.5/bin/beam.smp -W w -MBas ageffcbf -MHas ageffcbf -MBlmbcs 512 -MHlmbcs 512 -MMmcs 30 -pc unicode -P 1048576 -t 5000000 -stbt db -zdbbl >
-             ├─ 3967 erl_child_setup 32768
-             ├─ 4014 /usr/lib64/erlang/erts-14.2.5/bin/inet_gethost 4
-             ├─ 4015 /usr/lib64/erlang/erts-14.2.5/bin/inet_gethost 4
-             ├─ 4024 /usr/lib64/erlang/erts-14.2.5/bin/epmd -daemon
-             └─ 4077 /bin/sh -s rabbit_disk_monitor
-```
-
 ### Enable RabbitMQ management plugin
 Enable the RabbitMQ management plugin to access the web-based dashboard.
 
@@ -117,13 +101,13 @@ sudo systemctl restart rabbitmq-server
 ```
 
 ### Verify RabbitMQ version
-Confirm the installed RabbitMQ version:
+Confirm the installed RabbitMQ version.
 
 ```console
 sudo rabbitmqctl version
 ```
 
-The output is similar to:
+You should see an output similar to:
 
 ```output
 4.2.0
@@ -140,10 +124,6 @@ sudo rabbitmqctl set_user_tags admin administrator
 sudo rabbitmqctl set_permissions -p / admin ".*" ".*" ".*"
 ```
 
-{{% notice Warning %}}
-Replace `StrongPassword123` with a strong, unique password. For production environments, use environment variables or a secrets management system instead of hardcoding passwords.
-{{% /notice %}}
-
 **Log in to Management UI**
 
 Now, test it from outside the VM. Open a web browser on your local machine (Chrome, Firefox, Edge, etc.) and enter the following URL and credentials in the address bar:
@@ -156,19 +136,6 @@ Replace `<VM_IP>` with the public IP of your GCP VM.
 
 If everything is configured correctly, you see a RabbitMQ login page in your browser that looks like this:
 
-![Screenshot showing the RabbitMQ management UI login interface with username and password input fields and a login button#center](images/rabbitmq.png "RabbitMQ Login page")
-
-## What you've accomplished and what's next
-
-You've successfully installed RabbitMQ on your Google Cloud Arm64 VM with:
-- Erlang and RabbitMQ Server installed via RPM packages
-- RabbitMQ Management UI enabled and accessible
-- Administrative user configured for UI access
-
-Next, you'll validate your RabbitMQ installation and verify it's functioning correctly.
+![RabbitMQ page alt-text#center](images/rabbitmq.png "Figure 1: RabbitMQ Login page")
 
 This confirms that your RabbitMQ management dashboard is operational.
-
-## What you've accomplished and what's next
-
-You've successfully installed RabbitMQ on a Google Cloud SUSE Arm64 virtual machine, enabled the management plugin, created an admin user, and verified access to the web-based management interface. Next, you'll validate the RabbitMQ installation with baseline messaging tests to ensure all components are functioning correctly.
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_instance.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_instance.md
new file mode 100644
index 0000000000..c6b9d993b9
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_instance.md
@@ -0,0 +1,37 @@
+---
+title: Create a Google Axion C4A virtual machine
+weight: 7
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Overview
+
+In this section, you'll provision a Google Axion C4A Arm virtual machine on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type in the Google Cloud Console. You will then use this GCP VM to execute a few RabbitMQ use cases.
+
+{{% notice Note %}}
+For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](/learning-paths/servers-and-cloud-computing/csp/google/).
+{{% /notice %}}
+
+## Provision a Google Axion C4A Arm VM in Google Cloud Console
+
+To create a virtual machine based on the C4A instance type, navigate to the [Google Cloud Console](https://console.cloud.google.com/) and go to **Compute Engine > VM Instances**. Select **Create Instance**.
+
+Under **Machine configuration**, populate fields such as **Instance name**, **Region**, and **Zone**. Set **Series** to `C4A` and select `c4a-standard-4` for machine type.
+
+   ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console")
+
+Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. Select **Pay As You Go** for the license type and select **Select**.
+
+Under **Networking**, enable **Allow HTTP traffic** and add "allow-tcp-15672" as a network tag in the **Network tags** text field.
+
+![Adding the TCP/15672 firewall rule to the VM alt-text#center](images/network-config.png "Adding the TCP/15672 firewall rule to the VM")
+
+Select **Create** to launch the instance. After the instance is created, the **SSH** option and public IP address appear in the VM instances list. Save the public IP address for later use. Select the **SSH** option to open an SSH shell to your VM instance.
+
+![Invoke an SSH session via your browser alt-text#center](images/gcp-pubip-ssh.png "Invoke an SSH session into your running VM instance")
+
+A browser window opens with a shell session to your VM instance.
+
+![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance")
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_use_case1.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_use_case1.md
new file mode 100644
index 0000000000..1bc8be5d40
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_use_case1.md
@@ -0,0 +1,212 @@
+---
+title: RabbitMQ use Case 1 - event processing with Python Workers
+weight: 10
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## RabbitMQ Use Case – Event Processing with Python Workers
+This use case demonstrates how RabbitMQ enables event-driven architectures using topic exchanges, durable queues, and Python-based worker consumers. It focuses on reliable, asynchronous event processing, which is a common production pattern.
+
+- Topic exchange–based routing
+- Durable queues and bindings
+- A Python-based worker using the `pika` client
+- Message publishing and consumption validation
+
+The use case models an **event-driven system**, where order-related events are published and processed asynchronously by workers.
+
+### Use case overview
+
+**Scenario:**  
+An application publishes order-related events (`order.created`, `order.updated`, etc.) to RabbitMQ. A background worker consumes these events from a queue and processes them independently.
+
+The goal of this use case is to showcase how order-related events can be published to RabbitMQ and processed asynchronously by background workers without tightly coupling producers and consumers.
+
+**Typical events include:**
+
+- order.created
+- order.updated
+- order.completed
+
+This architecture improves scalability, fault tolerance, and system decoupling.
+
+### Prerequisites
+
+- RabbitMQ installed and running
+- RabbitMQ management plugin enabled
+- Python 3 installed
+- Network access to RabbitMQ broker
+
+### Declare a topic exchange
+Create a durable topic exchange to route events based on routing keys.
+
+```console
+./rabbitmqadmin declare exchange name=events type=topic durable=true
+```
+
+- Creates a durable topic exchange named events.
+- Routes messages using wildcard-based routing keys (for example, order.*).
+- Ensures the exchange survives broker restarts.
+
+### Declare a durable queue
+Create a durable queue to store order-related events.
+
+```console
+./rabbitmqadmin declare queue name=order.events durable=true
+```
+
+- Create a durable queue for order events.
+- Guarantee that messages are persisted until consumed.
+- Ensure reliability in case of worker or broker restarts.
+
+You should see an output similar to:
+```output
+queue declared
+```
+
+### Bind queue to exchange
+Bind the queue to the exchange using a topic routing pattern.
+
+```console
+./rabbitmqadmin declare binding source=events destination=order.events routing_key="order.*"
+```
+
+- Connects the queue to the exchange.
+- Ensures all order-related routing keys match the queue.
+- Enables flexible event expansion without changing consumers.
+
+You should see an output similar to:
+```output
+binding declared
+```
+
+This binding ensures the queue receives all messages with routing keys such as:
+- order.created
+- order.updated
+- order.completed
+
+### Publish an event message
+Publish a sample order event to the exchange.
+
+```console
+./rabbitmqadmin publish exchange=events routing_key="order.created" payload='{"order_id":123}'
+```
+
+- Publishes an event to the events exchange.
+- Uses a routing key that matches the binding filter.
+- Payload is structured JSON to simulate real event data.
+
+You should see an output similar to:
+```output
+Message published
+```
+
+### Install Python dependencies
+Install pip and the pika RabbitMQ client library.
+
+```console
+sudo zypper install -y python3-pip
+pip install pika
+```
+
+### Create the worker script
+Create a Python worker file to process messages from a queue.
+
+A **Python worker** was created to process messages from a RabbitMQ queue (jobs) using the pika library. The queue is durable, ensuring message persistence. The worker implements fair dispatch (prefetch_count=1) and manual acknowledgments to reliably process each job without loss. Messages were successfully published to the queue using rabbitmqadmin, and the worker consumed them as expected.
+
+Using your favorite editor (the example uses "edit") create your "worker.py" file:
+
+```console
+edit worker.py
+```
+
+**worker.py:**
+
+```python
+import pika
+import time
+import json
+
+# RabbitMQ broker address
+RABBITMQ_IP = "localhost"
+
+connection = pika.BlockingConnection(
+    pika.ConnectionParameters(host=RABBITMQ_IP)
+)
+channel = connection.channel()
+
+# Ensure queue exists
+channel.queue_declare(queue='jobs', durable=True)
+
+print("Worker started. Waiting for jobs...")
+
+def process_job(ch, method, properties, body):
+    job = json.loads(body.decode())
+    print(f"[Worker] Received job: {job}")
+
+    # Simulate processing
+    time.sleep(2)
+
+    # Acknowledge message
+    ch.basic_ack(delivery_tag=method.delivery_tag)
+
+# Fair dispatch configuration
+channel.basic_qos(prefetch_count=1)
+
+channel.basic_consume(
+    queue='jobs',
+    on_message_callback=process_job
+)
+
+channel.start_consuming()
+```
+
+### Start the worker
+Run the worker process.
+
+```console
+python3 worker.py
+```
+
+You should see an output similar to:
+```output
+The worker started. Waiting for jobs...
+```
+
+### Publish job messages
+From another SSH terminal, publish a job message.
+
+```console
+./rabbitmqadmin publish routing_key=jobs payload='{"job":"test1"}'
+```
+
+**Worker output:**
+
+```output
+Worker started. Waiting for jobs...
+[Worker] Received job: {'job': 'test1'}
+```
+
+Publish another job:
+
+```console
+./rabbitmqadmin publish routing_key=jobs payload='{"job":"hello1"}'
+```
+
+**Worker output:**
+
+```output
+Worker started. Waiting for jobs...
+[Worker] Received job: {'job': 'hello1'}
+```
+Press "CTRL-C" to exit the worker application.
+
+## Use case validation
+
+- Event routing via topic exchanges functions correctly  
+- Durable queues and acknowledgments ensure reliable message processing  
+- Worker-based consumption supports safe and controlled job execution
+
+This use case demonstrates how RabbitMQ enables reliable, decoupled, and scalable event processing using topic-based routing and Python workers.
+The setup provides a strong foundation for production-grade, message-driven architectures on GCP SUSE Arm64 virtual machines.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_use_case2.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_use_case2.md
new file mode 100644
index 0000000000..9eb681e429
--- /dev/null
+++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/gcp_use_case2.md
@@ -0,0 +1,304 @@
+---
+title: RabbitMQ use case 2 - WhatsApp Notification
+weight: 11 
+
+### FIXED, DO NOT MODIFY
+layout: learningpathall
+---
+
+## Overview
+
+In this section you'll implement a real-world asynchronous messaging pattern where RabbitMQ processes WhatsApp notifications reliably using a worker-based architecture.
+
+## Why use RabbitMQ for WhatsApp notification
+
+In production systems, sending WhatsApp notifications must be:
+- Reliable
+- Asynchronous
+- Independent of the main application flow
+
+RabbitMQ acts as a message broker to decouple message production from message consumption.
+
+The architecture follows this flow:
+
+- Application publishes a message to RabbitMQ
+- RabbitMQ routes the message to a queue
+- A Python worker consumes the message
+- The worker simulates sending a WhatsApp notification
+
+## Prerequisites
+
+- GCP SUSE Arm64 virtual machine
+- RabbitMQ installed and running
+- RabbitMQ Management Plugin enabled
+- Python 3.8 or later
+- `pika` Python client library installed
+
+## Install Python dependencies
+
+Install Python and the RabbitMQ Python client needed to build a consumer.
+
+```console
+sudo zypper install -y python3 python3-pip
+pip3 install pika
+```
+
+## Understand the RabbitMQ topology
+
+This use case uses a direct exchange topology for exact-match routing.
+
+**Exchanges:**
+- `notifications` (direct): Routes WhatsApp notification messages based on an exact routing key match.
+
+**Queue:**
+- `whatsapp.notifications` (durable): Stores WhatsApp messages persistently until they are consumed by a worker.
+
+**Binding:**
+- Exchange: `notifications`
+- Routing key: `whatsapp` 
+- Queue: `whatsapp.notifications`
+
+This ensures only WhatsApp-related messages are routed to the final destination for processing.
+
+## Declare RabbitMQ resources
+
+Create the required exchange, queue, and binding for WhatsApp notifications.
+  
+```console
+./rabbitmqadmin declare exchange \
+  name=notifications \
+  type=direct \
+  durable=true
+
+./rabbitmqadmin declare queue \
+  name=whatsapp.notifications \
+  durable=true
+
+./rabbitmqadmin declare binding \
+  source=notifications \
+  destination=whatsapp.notifications \
+  routing_key=whatsapp
+```
+
+Each command confirms successful creation.
+
+## Validate the setup
+
+Verify that RabbitMQ resources exist and are correctly connected.
+
+```console
+./rabbitmqadmin list queues name messages
+./rabbitmqadmin list exchanges name type
+./rabbitmqadmin list bindings
+```
+
+- `list queues` displays all queues along with the number of messages currently stored in each queue.
+- `list exchanges` lists all exchanges and their types, allowing verification of correct exchange configuration.
+- `list bindings` shows how exchanges, queues, and routing keys are connected.
+
+The output shows you the following:
+
+- `notifications` exchange of type direct
+- `whatsapp.notifications` durable queue
+- Correct routing key binding (`whatsapp`)
+- Zero or more queued messages
+
+This confirms topology correctness before consuming messages.
+
+```output
+> ./rabbitmqadmin list queues name messages
++------------------------+----------+
+|          name          | messages |
++------------------------+----------+
+| jobs                   | 0        |
+| order.events           | 1        |
+| testqueue              | 1        |
+| whatsapp.notifications | 0        |
++------------------------+----------+
+
+> ./rabbitmqadmin list exchanges name type
++--------------------+---------+
+|        name        |  type   |
++--------------------+---------+
+|                    | direct  |
+| amq.direct         | direct  |
+| amq.fanout         | fanout  |
+| amq.headers        | headers |
+| amq.match          | headers |
+| amq.rabbitmq.trace | topic   |
+| amq.topic          | topic   |
+| events             | topic   |
+| notifications      | direct  |
++--------------------+---------+
+
+> ./rabbitmqadmin list bindings
++---------------+------------------------+------------------------+
+|    source     |      destination       |      routing_key       |
++---------------+------------------------+------------------------+
+|               | jobs                   | jobs                   |
+|               | order.events           | order.events           |
+|               | testqueue              | testqueue              |
+|               | whatsapp.notifications | whatsapp.notifications |
+| events        | order.events           | order.*                |
+| notifications | whatsapp.notifications | whatsapp               |
++---------------+------------------------+------------------------+
+```
+
+## Implement the WhatsApp worker
+
+Create a Python consumer (worker) that processes WhatsApp notification messages from the queue.
+
+Create a file named `whatsapp_worker.py` with the following code:
+
+```python
+import pika
+import json
+import time
+
+RABBITMQ_HOST = "localhost"
+RABBITMQ_VHOST = "/"
+RABBITMQ_USER = "guest"
+RABBITMQ_PASS = "guest"
+QUEUE_NAME = "whatsapp.notifications"
+
+credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
+
+parameters = pika.ConnectionParameters(
+    host=RABBITMQ_HOST,
+    virtual_host=RABBITMQ_VHOST,
+    credentials=credentials,
+    heartbeat=60
+)
+
+print("[DEBUG] Connecting to RabbitMQ...")
+connection = pika.BlockingConnection(parameters)
+channel = connection.channel()
+
+print("[DEBUG] Declaring queue...")
+channel.queue_declare(queue=QUEUE_NAME, durable=True)
+
+print("[DEBUG] Setting QoS...")
+channel.basic_qos(prefetch_count=1)
+
+print("WhatsApp Worker started. Waiting for messages...")
+
+def send_whatsapp(ch, method, properties, body):
+    data = json.loads(body.decode())
+    print(f"[Worker] Sending WhatsApp message to {data['phone']}")
+    print(f"[Worker] Message content: {data['message']}")
+
+    # Simulate external WhatsApp API call
+    time.sleep(1)
+
+    print("[Worker] Message sent successfully")
+    ch.basic_ack(delivery_tag=method.delivery_tag)
+
+channel.basic_consume(
+    queue=QUEUE_NAME,
+    on_message_callback=send_whatsapp,
+    auto_ack=False
+)
+
+print("[DEBUG] Starting consumer loop (this should block)...")
+channel.start_consuming()
+```
+
+## Start the worker
+
+Run the worker in a dedicated terminal session:
+
+```console
+python3 whatsapp_worker.py
+```
+
+The worker runs correctly and waits for messages without exiting.
+
+The output is similar to:
+
+```output
+[DEBUG] Connecting to RabbitMQ...
+[DEBUG] Declaring queue...
+[DEBUG] Setting QoS...
+WhatsApp Worker started. Waiting for messages...
+[DEBUG] Starting consumer loop (this should block)...
+```
+
+The process blocks without returning to the shell prompt.
+
+## Publish a test message
+
+Open another SSH terminal and publish a WhatsApp notification message to RabbitMQ.
+
+```console
+./rabbitmqadmin publish \
+  exchange=notifications \
+  routing_key=whatsapp \
+  payload='{"phone":"+911234567890","message":"Hello from RabbitMQ"}'
+```
+
+The output appears in the first terminal running `whatsapp_worker.py`:
+
+```output
+[Worker] Sending WhatsApp message to +911234567890
+[Worker] Message content: Hello from RabbitMQ
+[Worker] Message sent successfully
+```
+
+## Validate message consumption
+
+The worker terminal displays the complete log:
+
+```output
+[DEBUG] Connecting to RabbitMQ...
+[DEBUG] Declaring queue...
+[DEBUG] Setting QoS...
+WhatsApp Worker started. Waiting for messages...
+[DEBUG] Starting consumer loop (this should block)...
+[Worker] Sending WhatsApp message to +911234567890
+[Worker] Message content: Hello from RabbitMQ
+[Worker] Message sent successfully
+```
+
+This confirms:
+
+- Message routing works correctly
+- Queue consumption is successful
+- Manual acknowledgments are applied
+
+This validates the end-to-end message flow.
+
+## Verify queue state
+
+```console
+./rabbitmqadmin list queues name messages consumers
+```
+
+The output is similar to:
+
+```output
++------------------------+----------+-----------+
+|          name          | messages | consumers |
++------------------------+----------+-----------+
+| jobs                   | 0        | 0         |
+| order.events           | 2        | 0         |
+| testqueue              | 1        | 0         |
+| whatsapp.notifications | 0        | 1         |
++------------------------+----------+-----------+
+```
+
+This confirms:
+
+- Messages were consumed successfully
+- One active consumer is connected
+- No backlog remains in the queue
+
+## What you've accomplished
+
+In this use case, you:
+- Configured a direct exchange topology for exact-match routing
+- Created durable queues and bindings for WhatsApp notifications
+- Implemented a Python worker using the `pika` library
+- Published test messages and validated end-to-end message flow
+- Confirmed reliable message consumption with manual acknowledgments
+
+This pattern demonstrates how RabbitMQ enables asynchronous, decoupled communication for notification systems on Arm-based cloud platforms.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/final-vm.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/final-vm.png
new file mode 100644
index 0000000000..5207abfb41
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/final-vm.png differ
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance.png
new file mode 100644
index 0000000000..285cd764a5
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance.png differ
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance1.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance1.png
new file mode 100644
index 0000000000..b9d22c352d
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance1.png differ
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance4.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance4.png
new file mode 100644
index 0000000000..2a0ff1e3b0
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/instance4.png differ
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/ubuntu-pro.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/ubuntu-pro.png
new file mode 100644
index 0000000000..d54bd75ca6
Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/ubuntu-pro.png differ
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/instance.md
deleted file mode 100644
index 7063cb18e5..0000000000
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/instance.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-title: Create a Google Axion C4A Arm virtual machine on GCP 
-weight: 4
-
-### FIXED, DO NOT MODIFY
-layout: learningpathall
----
-
-## Overview
-
-In this section, you provision a Google Axion C4A Arm virtual machine on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type in the Google Cloud Console.
-
-{{% notice Note %}}
-For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](/learning-paths/servers-and-cloud-computing/csp/google/).
-{{% /notice %}}
-
-## Provision a Google Axion C4A Arm VM in Google Cloud console
-
-To create a virtual machine based on the C4A instance type, navigate to the [Google Cloud Console](https://console.cloud.google.com/) and go to **Compute Engine > VM Instances**. Select **Create Instance**.
-
-Under **Machine configuration**, populate fields such as **Instance name**, **Region**, and **Zone**. Set **Series** to `C4A` and select `c4a-standard-4` for machine type.
-
-   ![Screenshot showing the machine configuration interface in Google Cloud Console with C4A series selected and c4a-standard-4 machine type highlighted, displaying 4 vCPUs and 16 GB memory specifications alt-txt#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console")
-
-Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. Select "Pay As You Go" for the license type and press **Select**.
-
-Under **Networking**, enable **Allow HTTP traffic** and add "allow-tcp-15672" as a network tag in the **Network tags** text field.
-
-![Screenshot showing the Networking section of the VM configuration with allow-tcp-15672 entered in the Network tags field alt-txt#center](images/network-config.png "Adding the TCP/15672 firewall rule to the VM")
-
-Select **Create** to launch the instance. Once created, you see an **SSH** option and the public IP address for your VM in the list of VM instances. Save the public IP address as you need it in the next step. Select the **SSH** option to launch an SSH shell into your VM instance.
-
-![Screenshot showing the VM instances list with columns for name, zone, machine type, internal IP, external IP, and an SSH button for each instance alt-txt#center](images/gcp-pubip-ssh.png "Invoke an SSH session into your running VM instance")
-
-A window opens from your browser and you see a shell into your VM instance.
-
-![Screenshot showing a browser-based terminal window with a command prompt connected to the Google Cloud VM via SSH alt-txt#center](images/gcp-shell.png "Terminal shell in your VM instance")
-
-## What you've accomplished and what's next
-
-You've successfully provisioned a Google Axion C4A Arm virtual machine on Google Cloud Platform with the appropriate firewall rules and network configuration. The VM is running SUSE Linux Enterprise Server and is accessible via SSH. Next, you'll install and configure RabbitMQ on this instance.
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case1.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case1.md
deleted file mode 100644
index 922eea6e2e..0000000000
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case1.md
+++ /dev/null
@@ -1,243 +0,0 @@
----
-title: "RabbitMQ use case 1: event processing with Python workers"
-weight: 7
-
-### FIXED, DO NOT MODIFY
-layout: learningpathall
----
-
-## Event processing with topic-based routing
-
-In this use case, you implement an event-driven workflow using RabbitMQ with a topic exchange, a durable queue, and a Python worker consumer. You publish order events (for example, `order.created`, `order.updated`) and process them asynchronously.
-
-This pattern is useful when you need flexible, wildcard-based routing (such as `order.*`) where multiple event types route to the same queue and producers and consumers evolve independently.
-
-### Use case overview
-
-**Scenario:**  
-An application publishes order-related events (`order.created`, `order.updated`, etc.) to RabbitMQ. A background worker consumes these events from a queue and processes them independently.
-
-Order-related events are published to RabbitMQ and processed asynchronously by background workers without tightly coupling producers and consumers.
-
-**Typical events include:**
-
-- order.created
-- order.updated
-- order.completed
-
-This architecture improves scalability, fault tolerance, and system decoupling.
-
-**When to use this pattern:**
-
-Use topic exchanges when you need wildcard routing where `order.*` matches `order.created`, `order.updated`, and `order.completed`. This approach allows multiple related event types to flow to the same consumer and provides flexibility to add new event types without reconfiguring consumers.
-
-**Comparison:**
-
-Use Case 1 (Topic Exchange) provides flexible routing with wildcards, ideal for event streams. Use Case 2 (Direct Exchange) provides exact-match routing, ideal for targeted notifications.
-
-### Declare a topic exchange
-
-Create a durable topic exchange to route events based on routing keys:
-
-```console
-./rabbitmqadmin declare exchange name=events type=topic durable=true
-```
-
-This creates a durable topic exchange named `events` that routes messages using wildcard-based routing keys (for example, `order.*`) and survives broker restarts.
-
-The output is similar to:
-```output
-exchange declared
-```
-
-### Declare a durable queue
-
-Create a durable queue to store order-related events.
-
-```console
-./rabbitmqadmin declare queue name=order.events durable=true
-```
-
-This creates a durable queue for order events that guarantees messages are persisted until consumed, ensuring reliability in case of worker or broker restarts.
-
-The output is similar to:
-```output
-queue declared
-```
-
-### Bind queue to exchange
-
-Bind the queue to the exchange using a topic routing pattern:
-
-```console
-./rabbitmqadmin declare binding source=events destination=order.events routing_key="order.*"
-```
-
-This connects the queue to the exchange so all order-related routing keys match the queue. The wildcard pattern `order.*` matches routing keys such as `order.created`, `order.updated`, and `order.completed`, enabling flexible event expansion without changing consumers.
-
-The output is similar to:
-```output
-binding declared
-```
-
-### Validate the setup
-
-Confirm that the exchange, queue, and binding exist and are correctly connected:
-
-```console
-./rabbitmqadmin list exchanges name type
-./rabbitmqadmin list queues name messages
-./rabbitmqadmin list bindings
-```
-
-These commands verify that the `events` exchange exists (type: `topic`), the `order.events` queue exists with zero messages initially, and a binding connects `events` to `order.events` with the `order.*` routing pattern.
-
-The output is similar to:
-
-```output
-+--------------------+---------+
-|        name        |  type   |
-+--------------------+---------+
-|                    | direct  |
-| amq.direct         | direct  |
-| amq.fanout         | fanout  |
-| amq.headers        | headers |
-| amq.match          | headers |
-| amq.rabbitmq.trace | topic   |
-| amq.topic          | topic   |
-| events             | topic   |
-+--------------------+---------+
-+--------------+----------+
-|     name     | messages |
-+--------------+----------+
-| order.events | 0        |
-| testqueue    | 1        |
-+--------------+----------+
-+--------+--------------+--------------+
-| source | destination  | routing_key  |
-+--------+--------------+--------------+
-|        | order.events | order.events |
-|        | testqueue    | testqueue    |
-| events | order.events | order.*      |
-+--------+--------------+--------------+
-```
-
-### Install Python dependencies
-
-To create the worker, you need Python 3 with the `pika` library, which provides the RabbitMQ client:
-
-```console
-sudo zypper install -y python3-pip
-pip3 install pika
-```
-
-This installs `pip` (Python package manager) and `pika` (RabbitMQ client library for Python).
-
-### Create the worker script
-
-The Python worker consumes order-related events from the `order.events` queue. This worker uses durable queues for message persistence, `prefetch_count=1` for fair dispatch, and manual acknowledgments for reliable processing.
-
-Using a text editor, create a `worker.py` file with the content below: 
-
-```python
-import pika
-import time
-import json
-
-RABBITMQ_HOST = "localhost"
-QUEUE_NAME = "order.events"
-
-print("[DEBUG] Connecting to RabbitMQ...")
-connection = pika.BlockingConnection(
-    pika.ConnectionParameters(host=RABBITMQ_HOST)
-)
-channel = connection.channel()
-
-print("[DEBUG] Declaring queue...")
-channel.queue_declare(queue=QUEUE_NAME, durable=True)
-
-print("[DEBUG] Setting QoS...")
-channel.basic_qos(prefetch_count=1)
-
-print("Worker started. Waiting for events...")
-
-def process_event(ch, method, properties, body):
-    event = json.loads(body.decode())
-    print(f"[Worker] Received event: {event}")
-    print(f"[Worker] Processing event type: {event.get('event', 'unknown')}")
-
-    # Simulate processing time
-    time.sleep(2)
-    
-    print("[Worker] Event processed successfully")
-    ch.basic_ack(delivery_tag=method.delivery_tag)
-
-channel.basic_consume(
-    queue=QUEUE_NAME,
-    on_message_callback=process_event
-)
-
-print("[DEBUG] Starting consumer loop...")
-channel.start_consuming()
-```
-
-### Start the worker
-
-Now that you've created the worker script, run it to start consuming messages:
-
-```console
-python3 worker.py
-```
-
-The worker connects to RabbitMQ and begins listening for events. The output is similar to:
-```output
-[DEBUG] Connecting to RabbitMQ...
-[DEBUG] Declaring queue...
-[DEBUG] Setting QoS...
-Worker started. Waiting for events...
-[DEBUG] Starting consumer loop...
-```
-
-### Publish event messages
-
-With the worker running, open another SSH terminal and publish an order event:
-
-```console
-./rabbitmqadmin publish exchange=events routing_key="order.created" payload='{"order_id":123,"event":"order.created"}'
-```
-
-The message routes through the `events` exchange to the `order.events` queue, where the worker consumes it. The worker output shows:
-
-```output
-[Worker] Received event: {'order_id': 123, 'event': 'order.created'}
-[Worker] Processing event type: order.created
-[Worker] Event processed successfully
-```
-
-Publish a second event to test the wildcard routing:
-
-```console
-./rabbitmqadmin publish exchange=events routing_key="order.updated" payload='{"order_id":123,"event":"order.updated"}'
-```
-
-The worker processes this event using the same logic. The output shows:
-
-```output
-[Worker] Received event: {'order_id': 123, 'event': 'order.updated'}
-[Worker] Processing event type: order.updated
-[Worker] Event processed successfully
-```
-
-The wildcard binding (`order.*`) allows the worker to process any event with a routing key matching this pattern. You can publish additional events such as `order.completed` or `order.cancelled` and the worker processes them all.
-
-When you're done testing, press Ctrl+C in the worker terminal to exit the application.
-
-## What you've accomplished and what's next
-
-You've implemented an event-driven system using RabbitMQ with topic exchange routing, durable queues, manual acknowledgments, and fair dispatch. 
-
-The Python worker processes order events asynchronously, and the wildcard routing pattern (`order.*`) allows multiple related event types to flow to the same consumer.
-
-This pattern works well for event streams where you want flexibility to add new event types without reconfiguring consumers.
-
-Next, you implement a WhatsApp notification pipeline using a direct exchange with exact-match routing, better suited for targeted notifications.
diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case2.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case2.md
deleted file mode 100644
index d19c979490..0000000000
--- a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case2.md
+++ /dev/null
@@ -1,282 +0,0 @@
----
-title: "RabbitMQ use case 2: WhatsApp notification"
-weight: 8 
-
-### FIXED, DO NOT MODIFY
-layout: learningpathall
----
-## WhatsApp notification with direct exchange routing
-
-In this use case, you implement an asynchronous notification workflow where RabbitMQ routes WhatsApp notification messages using a direct exchange with exact-match routing. A Python worker consumes and processes these messages reliably.
-
-### Use case overview
-
-In many production systems, sending WhatsApp notifications must be reliable, asynchronous, and independent of the main application flow. RabbitMQ acts as a message broker to decouple message production from consumption.
-
-**When to use this pattern:**
-
-Use direct exchanges when you need exact-match routing where `whatsapp` routes only to the WhatsApp queue. This approach provides simple, predictable routing without wildcards, and each message type routes to a specific, dedicated queue.
-
-**Comparison:**
-
-Use Case 1 (Topic Exchange) provides flexible routing with wildcards, ideal for event streams. Use Case 2 (Direct Exchange) provides exact-match routing, ideal for targeted notifications.
-
-### Architecture flow
-
-The application publishes a WhatsApp notification message to the `notifications` exchange. RabbitMQ routes the message to the `whatsapp.notifications` queue using the exact-match routing key `whatsapp`. The Python worker then consumes the message from the queue and simulates sending the WhatsApp notification. In production, this would call an external WhatsApp API.
-
-### RabbitMQ topology
-
-A direct exchange topology is used for exact-match routing. The `notifications` exchange (type: `direct`) routes notification messages based on exact routing key matches. The `whatsapp.notifications` queue is durable, which means it persists messages across broker restarts. The binding connects the exchange to the queue using the `whatsapp` routing key, ensuring only messages published with this exact key are routed to the queue.
-
-### Declare RabbitMQ resources
-
-Create the required exchange, queue, and binding for WhatsApp notifications.
-
-**Declare the exchange:**
-
-```console
-./rabbitmqadmin declare exchange \
-  name=notifications \
-  type=direct \
-  durable=true
-```
-
-This creates a durable direct exchange named `notifications` that routes messages using exact routing keys.
-
-The output is similar to:
-```output
-exchange declared
-```
-
-**Declare the queue:**
-
-```console
-./rabbitmqadmin declare queue \
-  name=whatsapp.notifications \
-  durable=true
-```
-
-This creates a durable queue to persist WhatsApp notification messages until consumed.
-
-The output is similar to:
-```output
-queue declared
-```
-
-**Declare the binding:**
-
-```console
-./rabbitmqadmin declare binding \
-  source=notifications \
-  destination=whatsapp.notifications \
-  routing_key=whatsapp
-```
-
-This links the exchange to the queue using the `whatsapp` routing key.
-
-Expected output:
-```output
-binding declared
-```
-
-### Validate the setup
-
-Validate that RabbitMQ resources exist and are correctly connected:
-
-```console
-./rabbitmqadmin list exchanges name type
-./rabbitmqadmin list queues name messages
-./rabbitmqadmin list bindings source destination routing_key
-```
-
-These commands verify that the `notifications` exchange exists (type: `direct`), the `whatsapp.notifications` queue exists with zero messages, and the binding connects the exchange to the queue with routing key `whatsapp`.
-
-The output is similar to:
-
-```output
-+---------------+--------+
-| name          | type   |
-+---------------+--------+
-| notifications | direct |
-+---------------+--------+
-
-+------------------------+----------+
-| name                   | messages |
-+------------------------+----------+
-| whatsapp.notifications | 0        |
-+------------------------+----------+
-
-+---------------+------------------------+-------------+
-| source        | destination            | routing_key |
-+---------------+------------------------+-------------+
-| notifications | whatsapp.notifications | whatsapp    |
-+---------------+------------------------+-------------+
-```
-
-### Install Python dependencies
-
-If you haven't already installed Python dependencies in Use Case 1, install them now:
-
-```console
-sudo zypper install -y python3-pip
-pip3 install pika
-```
-
-This installs `pip` (Python package manager) and `pika` (RabbitMQ client library for Python).
-
-### WhatsApp worker implementation
-
-The worker attaches as a blocking consumer to the `whatsapp.notifications` queue and processes incoming messages. This worker uses durable queues for message persistence, `prefetch_count=1` for fair dispatch, and manual acknowledgments for reliable processing.
-
-Using a text editor, create a `whatsapp_worker.py` file with the content below:
-
-```python
-import pika
-import json
-import time
-
-RABBITMQ_HOST = "localhost"
-QUEUE_NAME = "whatsapp.notifications"
-
-print("[DEBUG] Connecting to RabbitMQ...")
-connection = pika.BlockingConnection(
-    pika.ConnectionParameters(host=RABBITMQ_HOST)
-)
-channel = connection.channel()
-
-print("[DEBUG] Declaring queue...")
-channel.queue_declare(queue=QUEUE_NAME, durable=True)
-
-print("[DEBUG] Setting QoS...")
-channel.basic_qos(prefetch_count=1)
-
-print("WhatsApp Worker started. Waiting for messages...")
-
-def send_whatsapp(ch, method, properties, body):
-    data = json.loads(body.decode())
-    phone = data.get('phone', 'unknown')
-    message = data.get('message', '')
-    
-    print(f"[Worker] Processing WhatsApp notification")
-    print(f"[Worker] Recipient: {phone}")
-    print(f"[Worker] Message: {message}")
-
-    # Simulate external WhatsApp API call
-    time.sleep(1)
-
-    print("[Worker] WhatsApp notification sent successfully")
-    ch.basic_ack(delivery_tag=method.delivery_tag)
-
-channel.basic_consume(
-    queue=QUEUE_NAME,
-    on_message_callback=send_whatsapp,
-    auto_ack=False
-)
-
-print("[DEBUG] Starting consumer loop (this should block)...")
-channel.start_consuming()
-```
-
-### Start the worker
-
-Now that you've created the worker script, run it in a dedicated terminal session:
-
-```console
-python3 whatsapp_worker.py
-```
-
-The worker connects to RabbitMQ and begins listening for WhatsApp notifications. The output is similar to:
-
-```output
-[DEBUG] Connecting to RabbitMQ...
-[DEBUG] Declaring queue...
-[DEBUG] Setting QoS...
-WhatsApp Worker started. Waiting for messages...
-[DEBUG] Starting consumer loop (this should block)...
-```
-
-The worker blocks at this point, waiting for messages without returning to the shell prompt.
-
-### Publish a test message
-
-With the worker running, open another SSH terminal and publish a WhatsApp notification message:
-
-```console
-./rabbitmqadmin publish \
-  exchange=notifications \
-  routing_key=whatsapp \
-  payload='{"phone":"+911234567890","message":"Hello from RabbitMQ"}'
-```
-
-The message routes through the `notifications` exchange to the `whatsapp.notifications` queue, where the worker consumes it. In the first SSH terminal where the worker is running, you should see:
-
-```output
-[Worker] Processing WhatsApp notification
-[Worker] Recipient: +911234567890
-[Worker] Message: Hello from RabbitMQ
-[Worker] WhatsApp notification sent successfully
-```
-
-### Message consumption validation
-
-The complete worker output shows the full message flow:
-
-```output
-[DEBUG] Connecting to RabbitMQ...
-[DEBUG] Declaring queue...
-[DEBUG] Setting QoS...
-WhatsApp Worker started. Waiting for messages...
-[DEBUG] Starting consumer loop (this should block)...
-[Worker] Processing WhatsApp notification
-[Worker] Recipient: +911234567890
-[Worker] Message: Hello from RabbitMQ
-[Worker] WhatsApp notification sent successfully
-```
-
-This output confirms that message routing works correctly through the direct exchange, the worker successfully consumes from the queue, manual acknowledgments are applied, and the end-to-end message flow is validated.
-
-### Verify queue state
-
-To confirm successful message consumption, check the queue status:
-
-```console
-./rabbitmqadmin list queues name messages consumers
-```
-
-The output is similar to:
-
-```output
-+------------------------+----------+-----------+
-| name                   | messages | consumers |
-+------------------------+----------+-----------+
-| whatsapp.notifications | 0        | 1         |
-+------------------------+----------+-----------+
-```
-
-The output shows zero messages remaining (all were consumed), one active consumer connected, and no message backlog in the queue.
-
-When you're done testing, press Ctrl+C in the worker terminal to exit the application.
-
-## What you've accomplished
-
-You've implemented an asynchronous notification system using RabbitMQ with direct exchange routing, durable queues, manual acknowledgments, and fair dispatch. The Python worker processes WhatsApp notifications asynchronously, and the exact-match routing ensures messages go only to the intended queue.
-
-This pattern works well for targeted notifications (email, SMS, WhatsApp, push notifications) where routing needs to be simple and predictable. Each notification type routes to a dedicated queue using an exact-match routing key, providing reliable, guaranteed delivery.
-
-The key difference from Use Case 1 is the routing approach: Use Case 1 uses topic exchange with wildcard routing (`order.*`) for flexible event streams, while Use Case 2 uses direct exchange with exact routing (`whatsapp`) for targeted notifications.
-
-## Delete RabbitMQ resources
-
-When you're finished, stop the RabbitMQ workers and delete the resources. 
-
-```console
-./rabbitmqadmin delete queue name=order.events
-./rabbitmqadmin delete queue name=whatsapp.notifications
-./rabbitmqadmin delete queue name=testqueue
-./rabbitmqadmin delete exchange name=events
-./rabbitmqadmin delete exchange name=notifications
-```
-
-When you are done, be sure to delete the Google Cloud VM and the firewall rule. 
-
diff --git a/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/bl31_breakpoint.png b/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/bl31_breakpoint.png
deleted file mode 100644
index e87f5e3a4c..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/bl31_breakpoint.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/fv_size.png b/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/fv_size.png
deleted file mode 100644
index f7fccd9e51..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/fv_size.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/peicore.png b/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/peicore.png
deleted file mode 100644
index c27adc5517..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/refinfra-debug/images/peicore.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard1.png b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard1.png
deleted file mode 100644
index 65c0ad6965..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard1.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard2.png b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard2.png
deleted file mode 100644
index 9b719815b8..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard2.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard3.png b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard3.png
deleted file mode 100644
index c6d06abb5d..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-dashboard3.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-data.png b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-data.png
deleted file mode 100644
index d04d48a86d..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-data.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-sentiment.png b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-sentiment.png
deleted file mode 100644
index cf081542b7..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Kibana-sentiment.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Sentiment-Analysis.png b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Sentiment-Analysis.png
deleted file mode 100644
index b39715be72..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/_images/Sentiment-Analysis.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/elasticsearch-and-kibana.md b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/elasticsearch-and-kibana.md
index d608ce9a36..172cb15e58 100644
--- a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/elasticsearch-and-kibana.md
+++ b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/elasticsearch-and-kibana.md
@@ -76,23 +76,23 @@ After the dashboard is set up, use the public IP of your server on port `5601` t
 
 Switch to *Stack Management* using the menu on the left side as Figure 3 shows.
 
-![kibana-data #center](_images/Kibana-data.png "Figure 3: Switch to Stack Management.")
+![kibana-data #center](_images/kibana-data.png "Figure 3: Switch to Stack Management.")
 
 To make sure that you are receiving the data from the sentiment analysis application through Elasticsearch, check whether you have the Data View enabled in Stack Management.
 
-![kibana-sentiment #center](_images/Kibana-sentiment.png "Figure 4: Create and Manage Data Views.")
+![kibana-sentiment #center](_images/kibana-sentiment.png "Figure 4: Create and Manage Data Views.")
 
 You can also check the types of attributes that are received as the Data Views. Now, you can switch to the dashboards on the left menu and start creating the visualizations to analyze the data.
 
-![kibana-dashboard1 #center](_images/Kibana-dashboard1.png "Figure 5: Dashboards on Left Menu." )
+![kibana-dashboard1 #center](_images/kibana-dashboard1.png "Figure 5: Dashboards on Left Menu." )
 
 Figure 6 shows a sample dashboard structure, displaying the records of different sentiments.
 
-![kibana-dashboard2 #center](_images/Kibana-dashboard2.png "Figure 6: Sample Dashboard Structure." )
+![kibana-dashboard2 #center](_images/kibana-dashboard2.png "Figure 6: Sample Dashboard Structure." )
 
 Similarly, you can design and create dashboards to analyze a particular set of data. The screenshot in Figure 7 shows the dashboard designed for this Learning Path.
 
-![kibana-dashboard3 #center](_images/Kibana-dashboard3.png "Figure 7: Dashboard for this Learning Path.")
+![kibana-dashboard3 #center](_images/kibana-dashboard3.png "Figure 7: Dashboard for this Learning Path.")
 
 Navigate to the `dashboards` directory in the cloned GitHub repository and locate the  `sentiment_dashboard.ndjson` file. 
 
diff --git a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/understand-the-basics.md b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/understand-the-basics.md
index 9bd49b7800..f32eb018a0 100644
--- a/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/understand-the-basics.md
+++ b/content/learning-paths/servers-and-cloud-computing/sentiment-analysis-eks/understand-the-basics.md
@@ -36,7 +36,7 @@ Using an Arm-based Amazon EKS cluster can address these issues by offering flexi
 
 Figure 1 shows the solution architecture that this Learning Path uses for sentiment analysis:
 
-![sentiment analysis #center](_images/Sentiment-Analysis.png "Figure 1: Sentiment Analysis Architecture." )
+![sentiment analysis #center](_images/sentiment-analysis.png "Figure 1: Sentiment Analysis Architecture." )
 
 The technology stack for this solution includes the following steps:
 
diff --git a/content/learning-paths/servers-and-cloud-computing/sysreport/_index.md b/content/learning-paths/servers-and-cloud-computing/sysreport/_index.md
index f0bb527d16..de5480792e 100644
--- a/content/learning-paths/servers-and-cloud-computing/sysreport/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/sysreport/_index.md
@@ -35,7 +35,7 @@ further_reading:
         link: /install-guides/perf/
         type: website
     - resource:
-        title: AWS Perf (APerf) 
+        title: APerf 
         link: /install-guides/aperf/
         type: website
     - resource:
diff --git a/content/learning-paths/servers-and-cloud-computing/thirdai-sentiment-analysis/thirdai_logo.png b/content/learning-paths/servers-and-cloud-computing/thirdai-sentiment-analysis/thirdai_logo.png
deleted file mode 100644
index a77d2a6bec..0000000000
Binary files a/content/learning-paths/servers-and-cloud-computing/thirdai-sentiment-analysis/thirdai_logo.png and /dev/null differ
diff --git a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/_index.md b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/_index.md
index 37b2d4c3d6..ae8a05eee8 100644
--- a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/_index.md
@@ -1,17 +1,13 @@
 ---
-title: Fine tune LLM CPU inference performance with multithreading
-
-draft: true
-cascade:
-    draft: true
+title: Tune LLM CPU inference performance with multithreading
 
 minutes_to_complete: 30
 
-who_is_this_for: This is an introductory topic ML engineers optimizing LLM inference performance on Arm CPUs.
+who_is_this_for: This is an introductory topic for ML engineers optimizing LLM inference performance on Arm CPUs.
 
 learning_objectives: 
     - Understand how PyTorch uses multiple threads for CPU inference
-    - Measure performance impact of thread count on LLM inference
+    - Measure the performance impact of thread count on LLM inference
     - Tune thread count to optimize inference for specific models and systems
 
 prerequisites:
diff --git a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/background.md b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/background.md
index 6b64e23672..b6d401598d 100644
--- a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/background.md
+++ b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/background.md
@@ -6,6 +6,8 @@ weight: 3
 layout: learningpathall
 ---
 
+## Understanding threading trade-offs in CPU inference
+
 A well-known challenge in parallel programming is choosing the right number of threads for a given amount of work. When multiple threads are created to perform a task, the actual computation must be large enough to justify the overhead of coordinating those threads.
 
 If a computation is split across many threads, the costs of creating the threads and synchronizing their results through shared memory can easily outweigh any performance gains from parallel execution. The same principle applies to generative AI workloads running on CPU.
@@ -16,15 +18,15 @@ PyTorch attempts to automatically choose an appropriate number of threads. Howev
 
 ## Multithreading with PyTorch on CPU
 
-When running inference, PyTorch uses an Application Thread Pool. PyTorch supports two types of parallelism: inter-op parallelism spawns threads to run separate operations in a graph in parallel (for example, one thread for a matmul and another thread for a softmax), while intra-op parallelism spawns multiple threads to work on the same operation.
+When running inference, PyTorch uses an Application Thread Pool. PyTorch supports two types of parallelism: inter-op parallelism spawns threads to run separate operations in a graph in parallel (for example, one thread for a matrix multiplication and another thread for a softmax), while intra-op parallelism spawns multiple threads to work on the same operation.
 
-The diagram below is taken from the [PyTorch documentation](https://docs.pytorch.org/docs/stable/index.html). 
+The diagram below shows PyTorch's threading model from the [PyTorch documentation](https://docs.pytorch.org/docs/stable/index.html). 
 
-![Diagram showing PyTorch's threading model with application thread pool, inter-op thread pool, and intra-op thread pool for CPU inference#center](./pytorch-threading.jpg "PyTorch threading model")
+![Diagram showing PyTorch's threading model with application thread pool, inter-op thread pool, and intra-op thread pool for CPU inference alt-txt#center](./pytorch-threading.jpg "PyTorch threading model")
 
 The `torch.set_num_threads()` [API](https://docs.pytorch.org/docs/stable/generated/torch.set_num_threads.html) sets the maximum number of threads to spawn in the Application Thread Pool.
 
-As of PyTorch 2.8.0, the default number of threads equals the number of CPU cores (see [PyTorch CPU Threading Documentation](https://docs.pytorch.org/docs/2.8/notes/cpu_threading_torchscript_inference.html) for more detail). PyTorch determines the ideal number of threads based on the workload size, as shown in this code snippet from [ParallelOpenMP.h](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/ParallelOpenMP.h):
+As of PyTorch 2.8.0, the default number of threads equals the number of CPU cores (see the [PyTorch CPU Threading Documentation](https://docs.pytorch.org/docs/2.8/notes/cpu_threading_torchscript_inference.html) for more detail). PyTorch determines the ideal number of threads based on the workload size, as shown in this code snippet from [ParallelOpenMP.h](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/ParallelOpenMP.h):
 
 ```cpp
 int64_t num_threads = omp_get_num_threads();
@@ -112,9 +114,9 @@ Environment variables:
 ATen parallel backend: OpenMP
 ```
 
-The number of threads is set to the core count of 96, and the execution time is 2.24 ms.
+PyTorch uses all 96 cores, and the execution time is 2.24 ms.
 
-Reduce the number of OpenMP threads using the `OMP_NUM_THREADS` environment variable: 
+Now reduce the number of OpenMP threads using the `OMP_NUM_THREADS` environment variable: 
 
 ```bash
 OMP_NUM_THREADS=16 python pytorch_omp_example.py
@@ -142,6 +144,10 @@ Environment variables:
 ATen parallel backend: OpenMP
 ```
 
-The time varies with the number of threads and type of processor in your system. 
+The execution time varies with the number of threads and the processor type in your system.
+
+## What you've accomplished and what's next
+
+You've learned how PyTorch manages threads for CPU inference and seen how thread count affects performance in a simple example. The optimal thread count depends on both the workload size and system architecture.
 
-In the next section, you'll apply these concepts to a much larger workload using a large language model (LLM). 
\ No newline at end of file
+Next, you'll apply these concepts to a more realistic workload by tuning thread settings for large language model inference. 
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/build.md b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/build.md
index 3d7ed7bf54..a05f6ad0ed 100644
--- a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/build.md
+++ b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/build.md
@@ -6,8 +6,12 @@ weight: 2
 layout: learningpathall
 ---
 
+## Before you begin
+
+Before you can tune PyTorch threading for LLM inference on Arm CPUs, you need to set up your development environment with Docker, PyTorch, and access to the Gemma-3 models from Hugging Face. This section walks you through creating your Hugging Face account, configuring an Arm server, and running the PyTorch container with all necessary dependencies.
+
 {{% notice Note %}}
-This Learning Path uses Arm's downstream canary release of PyTorch, which includes ready-to-use examples and scripts. While this release offers access to the latest downstream features, it's intended for experimentation rather than production use.
+This Learning Path uses Arm's downstream canary release of PyTorch, which includes ready-to-use examples and scripts. This release provides access to the latest features but is intended for experimentation rather than production use.
 {{% /notice %}}
 
 ## Create a Hugging Face account
@@ -79,9 +83,9 @@ aarch64_pytorch ~>
 
 ## Log in to Hugging Face
 
-Create a new Read token on Hugging Face by navigating to [Create new Access Token](https://huggingface.co/settings/tokens/new?tokenType=read). 
+Create a new Read token on Hugging Face by navigating to [Create new Access Token](https://huggingface.co/settings/tokens/new?tokenType=read).
 
-![Screenshot of Hugging Face token creation page showing the 'Create new token' dialog with token type set to 'Read'#center](./hf-access-token.jpg "Hugging Face token creation")
+![Screenshot of Hugging Face token creation interface showing a dialog box with fields for token name and type, with the 'Read' option selected and a 'Create token' button visible alt-txt#center](./hf-access-token.jpg "Hugging Face token creation interface")
 
 Provide a token name, create the token, and copy the generated value. From within the Docker container, run the following command and paste the token when prompted:
 
@@ -89,6 +93,15 @@ Provide a token name, create the token, and copy the generated value. From withi
 huggingface-cli login
 ```
 
-Messages indicating the token is valid and login is successful are printed. 
+Messages indicating the token is valid and login is successful are printed.
+
+Be aware that the login doesn't persist after the Docker container exits. You'll need to log in again if you restart the container.
+
+## What you've accomplished and what's next
+
+You've set up your environment with:
+- A Hugging Face account with access to the Gemma-3 models
+- An Arm server or cloud instance with Docker installed
+- The PyTorch-aarch64 container running and authenticated
 
-Be aware that the login doesn't persist after the Docker container exits. You'll need to log in again if you restart the container.
\ No newline at end of file
+You're now ready to run LLM inference experiments and measure how thread count affects performance.
\ No newline at end of file
diff --git a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/tune.md b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/tune.md
index 03427aec39..55ece1a16d 100644
--- a/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/tune.md
+++ b/content/learning-paths/servers-and-cloud-computing/tune-pytorch-cpu-perf-with-threads/tune.md
@@ -6,6 +6,10 @@ weight: 4
 layout: learningpathall
 ---
 
+## Run inference experiments with different thread counts
+
+Now that you understand how PyTorch threading works and have your environment configured, you're ready to tune thread settings for actual LLM inference workloads. This section shows you how to measure inference performance across different thread counts using Google's Gemma-3 models on Arm CPUs. You'll run experiments with both the 270M and 1B parameter variants to understand how model size affects optimal thread configuration.
+
 This section runs inference on Google's [Gemma-3](https://huggingface.co/google/gemma-3-1b-it) model and measures how inference performance varies with thread count for both the 270 million parameter and 1 billion parameter models. The `transformers_llm_text_gen.py` script applies groupwise, layout-aware INT4 quantization by default.
 
 Create a file named `comparison-1b.sh` with the following script: 
@@ -101,17 +105,17 @@ Decode Tokens per second: 45.23
 
 The graph below shows how prefill tokens per second change with the number of OpenMP threads for the 270M and 1B variants of Gemma-3:
 
-![Graph showing prefill tokens per second versus number of OpenMP threads for Gemma-3 270M and 1B models. Both models peak at 16-32 threads, with the 270M model showing steeper decline after the peak#center](./prefill_throughput.png "Prefill throughput comparison")
+![Line graph comparing prefill throughput performance of Gemma-3 270M and 1B models across different thread counts from 2 to 96. The y-axis shows tokens per second (0-3000), and the x-axis shows number of OpenMP threads. Both lines peak at 16-32 threads, with the 270M model achieving higher throughput but showing a steeper decline after peak performance alt-txt#center](./prefill_throughput.png "Prefill throughput versus thread count for Gemma-3 models")
 
-As expected, the smaller 270M model runs faster. Both models reach their optimal token generation rate at around 16–32 threads, though the 270M model exhibits a sharper performance drop-off beyond this range compared with the 1B variant.
+As expected, the smaller 270M model runs faster. Both models reach their optimal token generation rate at around 16 to 32 threads, though the 270M model exhibits a sharper performance drop-off beyond this range compared with the 1B variant.
 
 
 
 ## Use PyTorch compilation mode
 
-The examples so far have used PyTorch's eager execution mode. You can also test performance with PyTorch's compile mode.
+The examples so far have used PyTorch's eager execution mode. PyTorch's compile mode can provide additional performance improvements. 
 
-Install a C++ compiler and dependencies:
+Before testing compile mode, install a C++ compiler and dependencies:
 
 ```bash
 sudo apt update && sudo apt install g++ python3.10-dev build-essential -y
@@ -151,13 +155,13 @@ Decode Tokens per second: 107.37
 
 Reducing the thread count from 96 (default) to 16 provides a significant reduction in end-to-end generation time.
 
-## What you've learned
+## What you've accomplished and what's next
 
-You've explored how the number of OpenMP threads impacts LLM inference performance on Arm CPUs. You've learned that:
+You've explored how the number of OpenMP threads impacts LLM inference performance on Arm CPUs and learned that:
 
 - Default thread settings on many-core systems don't always provide optimal performance
-- Smaller models typically benefit from fewer threads due to lower synchronization overhead
+- Smaller models typically benefit from fewer threads because of lower synchronization overhead
 - The optimal thread count depends on both model size and system architecture
-- PyTorch's compile mode can provide additional performance improvements when combined with thread tuning
+- PyTorch's compile mode provides additional performance improvements when combined with thread tuning
 
-In practice, use a heuristic or trial-and-error approach to determine the optimal thread count for your specific model and system configuration.
+For your specific workloads, experiment with different thread counts to find the optimal setting. Start with powers of 2 (8, 16, 32) and measure the actual throughput and latency for your use case. The performance characteristics you observed in this Learning Path apply to other LLM inference workloads on Arm CPUs.
diff --git a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/1-overview-and-build.md b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/1-overview-and-build.md
index cef2927b0a..3e31cf0eda 100644
--- a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/1-overview-and-build.md
+++ b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/1-overview-and-build.md
@@ -14,7 +14,9 @@ vLLM is an open-source, high-throughput inference and serving engine for large l
 * Continuous batching: dynamically merges incoming inference requests into larger batches, maximizing Arm CPU utilization and overall throughput
 * KV cache management: efficiently stores and reuses key-value attention states, sustaining concurrency across multiple active sessions while minimizing memory overhead
 * Token streaming: streams generated tokens as they are produced, enabling real-time responses for chat or API scenarios
+
 ## Interaction modes
+
 You can use vLLM in two main ways:
 - Using an OpenAI-Compatible REST Server: vLLM provides a /v1/chat/completions endpoint compatible with the OpenAI API schema, making it drop-in ready for tools like LangChain, LlamaIndex, and the official OpenAI Python SDK
 - Using a Python API: load and serve models programmatically within your own Python scripts for flexible local inference and evaluation
@@ -27,28 +29,6 @@ In this Learning Path, you'll build a CPU-optimized version of vLLM targeting th
 This build enables high-performance LLM inference on Arm servers, leveraging specialized Arm math libraries and kernel optimizations.
 After compiling, you’ll validate your build by running a local chat example to confirm functionality and measure baseline inference speed.
 
-## Why this is fast on Arm
-
-vLLM achieves high performance on Arm servers by combining software and hardware optimizations. Here’s why your build runs fast:
-
-- Arm-optimized kernels: vLLM uses oneDNN and the Arm Compute Library to accelerate matrix multiplications, normalization, and activation functions. These libraries are tuned for Arm’s aarch64 architecture.
-- Efficient quantization: INT4 quantized models run faster on Arm because KleidiAI microkernels use DOT-product instructions (SDOT/UDOT) available on Arm CPUs.
-- Paged attention tuning: the paged attention mechanism is optimized for Arm’s NEON and SVE pipelines, improving token reuse and throughput during long-sequence generation.
-- MoE fusion: for Mixture-of-Experts models, vLLM fuses INT4 expert layers to reduce memory transfers and bandwidth bottlenecks.
-- Thread affinity and memory allocation: setting thread affinity ensures balanced CPU core usage, while tcmalloc reduces memory fragmentation and allocator contention.
-
-These optimizations work together to deliver higher throughput and lower latency for LLM inference on Arm servers.
-
-vLLM's performance on Arm servers is driven by both software optimization and hardware-level acceleration.
-Each component of this optimized build contributes to higher throughput and lower latency during inference:
-
-- Optimized kernels: the aarch64 vLLM build uses direct oneDNN with the Arm Compute Library for key operations.
-- 4‑bit weight quantization: vLLM supports INT4 quantized models, and Arm accelerates this using KleidiAI microkernels, which take advantage of DOT-product (SDOT/UDOT) instructions.
-- Efficient MoE execution: for Mixture-of-Experts (MoE) models, vLLM fuses INT4 quantized expert layers to reduce intermediate memory transfers, which minimizes bandwidth bottlenecks
-- Optimized Paged attention: the paged attention mechanism, which handles token reuse during long-sequence generation, is SIMD-tuned for Arm’s NEON and SVE (Scalable Vector Extension) pipelines.
-- System tuning: using thread affinity ensures efficient CPU core pinning and balanced thread scheduling across Arm clusters.
-Additionally, enabling tcmalloc (Thread-Caching Malloc) reduces allocator contention and memory fragmentation under high-throughput serving loads.
-
 ## Set up your environment 
 
 Before you begin, make sure your environment meets these requirements:
@@ -79,9 +59,11 @@ This ensures optimized Arm kernels are used for matrix multiplications, layer no
 {{% /notice %}}
 
 ## Build vLLM for Arm64 CPU
+
 You’ll now build vLLM optimized for Arm (aarch64) servers with oneDNN and the Arm Compute Library (ACL) automatically enabled in the CPU backend.
 
 ## Create and activate a Python virtual environment
+
 It’s best practice to build vLLM inside an isolated environment to prevent conflicts between system and project dependencies:
 
 ```bash
@@ -91,6 +73,7 @@ python3 -m pip install --upgrade pip
 ```
 
 ## Clone vLLM and install build requirements
+
 Download the official vLLM source code and install its CPU-specific build dependencies:
 
 ```bash
@@ -102,6 +85,7 @@ pip install -r requirements/cpu.txt -r requirements/cpu-build.txt
 The specific commit (5fb4137) pins a verified version of vLLM that officially adds Arm CPUs to the list of supported build targets, ensuring full compatibility and optimized performance for Arm-based systems.
 
 ## Build the vLLM wheel for CPU
+
 Run the following command to compile and package vLLM as a Python wheel optimized for CPU inference:
 
 ```bash
@@ -148,32 +132,6 @@ Processed prompts: 100%|██████████████████
 
 If you see token streaming and generated text, your vLLM build is correctly configured for Arm64 inference.
 
-Once your Arm-optimized vLLM build completes, you can validate it by running a small offline inference example. This ensures that the CPU-specific backend and oneDNN and ACL optimizations were correctly compiled into your build.
-Run the built-in chat example included in the vLLM repository:
-
-```bash
-python examples/offline_inference/basic/chat.py \
-  --dtype=bfloat16 \
-  --model TinyLlama/TinyLlama-1.1B-Chat-v1.0
-```
-
-Explanation:
---dtype=bfloat16 runs inference in bfloat16 precision. Recent Arm processors support the BFloat16 (BF16) number format in PyTorch. For example, AWS Graviton3 and Graviton3 processors support BFloat16.
---model specifies a small Hugging Face model for testing (TinyLlama-1.1B-Chat), ideal for functional validation before deploying larger models.
-You should see token streaming in the console, followed by a generated output confirming that vLLM's inference pipeline is working correctly.
-
-```output
-Generated Outputs:
---------------------------------------------------------------------------------
-Prompt: None
-
-Generated text: 'The Importance of Higher Education\n\nHigher education is a fundamental right'
---------------------------------------------------------------------------------
-Adding requests: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 9552.05it/s]
-Processed prompts: 100%|████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.78it/s, est. speed input: 474.32 toks/s, output: 108.42 toks/s]
-...
-```
-
 {{% notice Note %}}
 As CPU support in vLLM continues to mature, these manual build steps will eventually be replaced by a streamlined `pip` install workflow for aarch64, simplifying future deployments on Arm servers.
 {{% /notice %}}
diff --git a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/2-quantize-model.md b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/2-quantize-model.md
index bfa9435c14..3283b99c96 100644
--- a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/2-quantize-model.md
+++ b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/2-quantize-model.md
@@ -5,10 +5,8 @@ weight: 3
 ### FIXED, DO NOT MODIFY
 layout: learningpathall
 ---
-## Accelerate LLMs with 4-bit quantization
 
-You can accelerate many LLMs on Arm CPUs with 4‑bit quantization. In this section, you’ll quantize the deepseek-ai/DeepSeek-V2-Lite model to 4-bit integer (INT4) weights.
-The quantized model runs efficiently through vLLM's INT4 inference path, which is accelerated by Arm KleidiAI microkernels.
+You can improve vLLM performance using 4‑bit quantization. In this section, you’ll quantize the deepseek-ai/DeepSeek-V2-Lite model to 4-bit integer (INT4) weights. 
 
 ## Install quantization tools
 
@@ -130,7 +128,7 @@ if __name__ == "__main__":
     main()
 ```
 
-This script creates a Arm KleidiAI INT4 quantized copy of the vLLM model and saves it to a new directory.
+This script creates an INT4 quantized copy of the vLLM model and saves it to a new directory.
 
 ## Quantize DeepSeek‑V2‑Lite model
 
diff --git a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/3-run-inference-and-serve.md b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/3-run-inference-and-serve.md
index 911b38b3ec..9a35d1a39a 100644
--- a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/3-run-inference-and-serve.md
+++ b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/3-run-inference-and-serve.md
@@ -136,11 +136,8 @@ vllm serve deepseek-ai/DeepSeek-V2-Lite \
 ```
 Use this BF16 setup to establish a quality reference baseline, then compare throughput and latency against your INT4 deployment to quantify the performance/accuracy trade-offs on your Arm system.
 
-## Go beyond: power up your vLLM workflow
-Now that you’ve successfully quantized, served, and benchmarked a model using vLLM on Arm, you can build on what you’ve learned to push performance, scalability, and usability even further.
-
 ## Try different models
-Explore other Hugging Face models that work well with vLLM and take advantage of Arm acceleration:
+Explore other Hugging Face models that work well with vLLM:
 
 - Meta Llama 2 and Llama 3: these versatile models work well for general tasks, and you can try them to compare BF16 and INT4 performance
 - Qwen and Qwen-Chat: these models support multiple languages and are tuned for instructions, giving you high-quality results
@@ -149,5 +146,3 @@ Explore other Hugging Face models that work well with vLLM and take advantage of
 You can quantize and serve any of these models using the same `quantize_vllm_models.py` script. Just update the model name in the script.
 
 You can also try connecting a chat client by linking your server with OpenAI-compatible user interfaces such as [Open WebUI](https://github.com/open-webui/open-webui).
-
-Continue exploring how Arm efficiency, oneDNN and ACL acceleration, and vLLM dynamic batching work together to provide fast, sustainable, and scalable AI inference on modern Arm architectures.
diff --git a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/4-accuracy-benchmarking.md b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/4-accuracy-benchmarking.md
index db43cdce54..c6ff28b323 100644
--- a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/4-accuracy-benchmarking.md
+++ b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/4-accuracy-benchmarking.md
@@ -8,7 +8,7 @@ layout: learningpathall
 
 ## Why accuracy benchmarking
 
-The lm-evaluation-harness is the standard way to measure model accuracy across common academic benchmarks (for example, MMLU, HellaSwag, GSM8K) and runtimes (such as Hugging Face, vLLM, and llama.cpp). In this Learning Path, you'll run accuracy tests for both BF16 and INT4 deployments of your model served by vLLM on Arm-based servers.
+The lm-evaluation-harness is the standard way to measure model accuracy across common academic benchmarks (for example, MMLU, HellaSwag, GSM8K) and runtimes (such as Hugging Face, vLLM, and llama.cpp). In this section, you'll run accuracy tests for both BF16 and INT4 deployments of your model served by vLLM on Arm-based servers.
 
 You will:
   * Install lm-eval-harness with vLLM support
@@ -54,7 +54,7 @@ export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4
 ```
 
 {{% notice Note %}}
-`LD_PRELOAD` uses tcmalloc to reduce allocator contention. Install it via `sudo apt-get install -y libtcmalloc-minimal4` if you haven’t already.
+`LD_PRELOAD` uses TCMalloc to reduce allocator contention. Install it via `sudo apt-get install -y libtcmalloc-minimal4` if you haven’t already.
 {{% /notice %}}
 
 ## Accuracy Benchmarking Meta‑Llama‑3.1‑8B‑Instruct BF16 model
@@ -75,11 +75,15 @@ lm_eval \
 
 Run accuracy tests on your INT4 quantized model using the same tasks and settings as the BF16 baseline. Replace the model path with your quantized output directory.
 
+Use the INT4 quantization recipe and script from the previous steps to quantize the `meta-llama/Meta-Llama-3.1-8B-Instruct` model using groupwise min-max quantization (default group size: 32).
+
+Groupwise INT4 (minmax):
+
 ```bash
 lm_eval \
   --model vllm \
   --model_args \
-    pretrained=Meta-Llama-3.1-8B-Instruct-w4a8dyn-mse-channelwise,dtype=float32,max_model_len=4096,enforce_eager=True \
+    pretrained=Meta-Llama-3.1-8B-Instruct-w4a8dyn-minmax-groupwise,dtype=float32,max_model_len=4096,enforce_eager=True \
   --tasks mmlu,hellaswag \
   --batch_size auto \
   --output_path results
@@ -87,7 +91,7 @@ lm_eval \
 
 The expected output includes per-task accuracy metrics. Compare these results to your BF16 baseline to evaluate the impact of INT4 quantization on model quality.
 
-Use the INT4 quantization recipe & script from previous steps to quantize `meta-llama/Meta-Llama-3.1-8B-Instruct` model.
+Use the INT4 quantization recipe and script from previous steps again, this time to quantize `meta-llama/Meta-Llama-3.1-8B-Instruct` model using channelwise MSE quantization.
 
 Channelwise INT4 (MSE):
 
@@ -129,4 +133,6 @@ Now that you've completed accuracy benchmarking for both BF16 and INT4 models on
 - Sweep quantization recipes (minmax vs mse; channelwise vs groupwise, group size) to find a better accuracy/performance balance.
 - Record both throughput and accuracy to choose the best configuration for your workload.
 
-You've learned how to set up lm-evaluation-harness, run benchmarks for BF16 and INT4 models, and interpret key accuracy metrics on Arm platforms. Great job reaching this milestone—your results will help you make informed decisions about model deployment and optimization!
+You've learned how to set up lm-evaluation-harness, run benchmarks for BF16 and INT4 models, and interpret key accuracy metrics on Arm platforms. 
+
+Your results will help you make informed decisions about model deployment and optimization.
diff --git a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/_index.md b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/_index.md
index 2d0d92e616..049ec8fce8 100644
--- a/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/_index.md
+++ b/content/learning-paths/servers-and-cloud-computing/vllm-acceleration/_index.md
@@ -1,9 +1,9 @@
 ---
-title: Accelerate vLLM inference on Arm servers
+title: Run vLLM inference with INT4 quantization on Arm servers
     
 minutes_to_complete: 60
 
-who_is_this_for: This is an introductory topic for developers interested in building and optimizing vLLM for Arm-based servers. This Learning Path shows you how to quantize large language models (LLMs) to INT4, serve them efficiently using an OpenAI-compatible API, and benchmark model accuracy with the LM Evaluation Harness.
+who_is_this_for: This is an introductory topic for developers interested in building and optimizing vLLM for Arm-based servers. This Learning Path shows you how to quantize large language models (LLMs) to INT4, serve them using an OpenAI-compatible API, and benchmark model accuracy with the LM Evaluation Harness.
 
 learning_objectives:
     - Build an optimized vLLM for aarch64 with oneDNN and the Arm Compute Library (ACL)
diff --git a/content/migration/Arm-migration-overview.png b/content/migration/Arm-migration-overview.png
deleted file mode 100644
index 337519f31d..0000000000
Binary files a/content/migration/Arm-migration-overview.png and /dev/null differ
diff --git a/content/migration/Bottom-up.png b/content/migration/Bottom-up.png
deleted file mode 100644
index 938e27cccb..0000000000
Binary files a/content/migration/Bottom-up.png and /dev/null differ
diff --git a/content/migration/Top-down.png b/content/migration/Top-down.png
deleted file mode 100644
index 40bc75305a..0000000000
Binary files a/content/migration/Top-down.png and /dev/null differ
diff --git a/content/migration/_index.md b/content/migration/_index.md
index d7fa740e94..a70d413c96 100644
--- a/content/migration/_index.md
+++ b/content/migration/_index.md
@@ -227,7 +227,7 @@ Based on your initial research, decide how to proceed with trying your software
 
 Top-down porting involves moving the complete software stack to an Arm machine and trying to build and run it straightaway. You will almost certainly face errors, which you can address one at a time until the full application runs on Arm. 
 
-![Top-down porting methodology](/migration/Top-down.png) 
+![Top-down porting methodology](/migration/top-down.png) 
 
 This methodology is great for simpler stacks, when breaking down the problem would take more time than fixing errors iteratively. 
 
@@ -235,7 +235,7 @@ This methodology is great for simpler stacks, when breaking down the problem wou
 
 Bottom-up porting is more systematic. Here, you break apart your software stack, starting with the foundations of your app to get those running on an Arm server first. Then add back one or two software packages at a time, and recompile and run to ensure it works on Arm. If errors arise, fix them or replace the incompatible package with an Arm compatible version. Continue building back your stack until it is fully formed on an Arm server. 
 
-![Bottom-up porting methodology](/migration/Bottom-up.png) 
+![Bottom-up porting methodology](/migration/bottom-up.png) 
 
 This methodology is sensible for large stacks, where running the whole app on Arm immediately would cause too many errors to effectively work through.
 
diff --git a/data/stats_current_test_info.yml b/data/stats_current_test_info.yml
index 77dc3b4868..ea39d57274 100644
--- a/data/stats_current_test_info.yml
+++ b/data/stats_current_test_info.yml
@@ -35,8 +35,9 @@ sw_categories:
       readable_title: Ansible
       tests_and_status: []
     aperf:
-      readable_title: AWS Perf (APerf)
-      tests_and_status: []
+      readable_title: APerf
+      tests_and_status:
+      - ubuntu:latest: passed
     arduino-pico:
       readable_title: Arduino core for the Raspberry Pi Pico
       tests_and_status: []
diff --git a/themes/arm-design-system-hugo-theme/layouts/migration/migration.html b/themes/arm-design-system-hugo-theme/layouts/migration/migration.html
index 2cc590d647..1cf297aa60 100644
--- a/themes/arm-design-system-hugo-theme/layouts/migration/migration.html
+++ b/themes/arm-design-system-hugo-theme/layouts/migration/migration.html
@@ -11,7 +11,7 @@ <h1>Migration overview</h1>
     {{partial "general-formatting/markdown-content-formatting.html" .Params.description}}
     <br>
     <br>    
-    <img src="/migration/Arm-migration-overview.png" alt='Image Alt Text: Arm migration overview' title="Arm Migration Overview" class="content-uploaded-image centered" />
+    <img src="/migration/arm-migration-overview.png" alt='Image Alt Text: Arm migration overview' title="Arm Migration Overview" class="content-uploaded-image centered" />
 
 </div>
 </div>
diff --git a/themes/arm-design-system-hugo-theme/layouts/partials/general-formatting/metadata-table.html b/themes/arm-design-system-hugo-theme/layouts/partials/general-formatting/metadata-table.html
index 27b03d3127..cfc919af1e 100644
--- a/themes/arm-design-system-hugo-theme/layouts/partials/general-formatting/metadata-table.html
+++ b/themes/arm-design-system-hugo-theme/layouts/partials/general-formatting/metadata-table.html
@@ -470,7 +470,7 @@ <h2 id="about-marker">{{$page_h2}}</h2>
             </div>
         </div>
         {{if not $is_learning_path}}
-            <p style="margin-top: 24px;">This guide is intended to get you up and running with this tool quickly with the most common settings. For a thorough review of all options, refer to the official documentation.</p>
+            <p style="margin-top: 24px;">This guide shows you how to install and use the tool with the most common configuration. For advanced options and complete reference information, see the official documentation. Some install guides also include optional next steps to help you explore related workflows or integrations.</p>
         {{end}}
     </div>
 </div>
diff --git a/themes/arm-design-system-hugo-theme/layouts/partials/head/head.html b/themes/arm-design-system-hugo-theme/layouts/partials/head/head.html
index f7e96d3007..b1d49bd052 100644
--- a/themes/arm-design-system-hugo-theme/layouts/partials/head/head.html
+++ b/themes/arm-design-system-hugo-theme/layouts/partials/head/head.html
@@ -26,6 +26,15 @@
 <link rel="canonical" href="https://learn.arm.com{{ .Permalink }}" />
 <title>{{ $title }}</title>
 
+{{/* Meta description: short, task-oriented summary for SERP and AI selection.
+     This is intentionally distinct from the JSON-LD description. */}}
+{{- with .Params.description -}}
+  {{- $d := . | plainify | htmlEscape | trim " \t\r\n" -}}
+  {{- if gt (len $d) 0 -}}
+    <meta name="description" content="{{ $d }}">
+  {{- end -}}
+{{- end -}}
+
 {{ partial "head/jsonld.html" . }}
 
 <!-- Include the Arm standard Lato font for machines that don't have by default (Mac) -->
diff --git a/themes/arm-design-system-hugo-theme/layouts/partials/head/jsonld.html b/themes/arm-design-system-hugo-theme/layouts/partials/head/jsonld.html
index 57fcbcc742..7a12b884a1 100644
--- a/themes/arm-design-system-hugo-theme/layouts/partials/head/jsonld.html
+++ b/themes/arm-design-system-hugo-theme/layouts/partials/head/jsonld.html
@@ -43,6 +43,7 @@
         "@type"    "Course"
         "name"     .Title
     -}}
+    {{/* Schema description: audience-focused explanation for structured data. This is intentionally distinct from the HTML meta description. */}}
     {{- with .Params.who_is_this_for     }}{{ $j = merge $j (dict "description" ( . | plainify )) }}{{ end -}}
     {{- if $duration                     }}{{ $j = merge $j (dict "timeRequired" $duration) }}{{ end -}}
     {{- with .Params.skilllevels         }}{{ $j = merge $j (dict "educationalLevel" .) }}{{ end -}}
diff --git a/themes/arm-design-system-hugo-theme/layouts/partials/header/breadcrumbs.html b/themes/arm-design-system-hugo-theme/layouts/partials/header/breadcrumbs.html
index 53b4990bec..545713ecaa 100644
--- a/themes/arm-design-system-hugo-theme/layouts/partials/header/breadcrumbs.html
+++ b/themes/arm-design-system-hugo-theme/layouts/partials/header/breadcrumbs.html
@@ -42,9 +42,12 @@
         {{$breadcrumb_title = print "Tag: " .p1.Title}}
     {{end}}
 
-    <!-- Filter out breadcrumbs we don't want to show: taxonomy pages  -->
-    {{ if and (ne .p1.Title "Learning-paths") (ne .p1.Kind "taxonomy") }}
+    <!-- define "learning-paths" in RelPermalink to filter out -->
+    {{$is_learning_paths_section := and .p1.IsSection (eq .p1.RelPermalink "/learning-paths/")}}
+
+    <!-- Filter out breadcrumbs we don't want to show: taxonomy pages + learning-paths -->
+    {{ if and (not $is_learning_paths_section) (ne .p1.Kind "taxonomy") }}
         <ads-breadcrumb slot="breadcrumb" label="{{ $breadcrumb_link }}" {{ if ne .p1 .p2 }} link="{{ $breadcrumb_link }}" {{else }} current="true"{{ end }}>{{$breadcrumb_title}}</ads-breadcrumb>
     {{end}}
 
-{{ end }}
\ No newline at end of file
+{{ end }}