Merge pull request #710 from roboflow/feature/workflows_profiler

Workflows profiler
roboflow · Oct 4, 2024 · e7071a0 · e7071a0
2 parents 74b0eb4 + 0139543
commit e7071a0
Show file tree

Hide file tree

Showing 54 changed files with 2,029 additions and 130 deletions.
diff --git a/.gitignore b/.gitignore
@@ -169,4 +169,7 @@ docs/workflows/gallery/*
 !tests/workflows/integration_tests/execution/assets/*.jpg
 !tests/workflows/integration_tests/execution/assets/rock_paper_scissors/*.jpg
 !tests/workflows/unit_tests/core_steps/models/third_party/assets/*.png
-!tests/workflows/integration_tests/execution/assets/*.png
+!tests/workflows/integration_tests/execution/assets/*.png
+
+inference_profiling
+tests/inference_sdk/unit_tests/http/inference_profiling
diff --git a/development/stream_interface/workflows_demo.py b/development/stream_interface/workflows_demo.py
@@ -45,7 +45,7 @@ def main() -> None:
         ],
     }
     pipeline = InferencePipeline.init_with_workflow(
-        video_reference=["rtsp://localhost:8554/live.stream"],
+        video_reference=["rtsp://localhost:8554/live0.stream"],
         workflow_specification=workflow_specification,
         watchdog=watchdog,
         on_prediction=workflows_sink,

diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu b/docker/dockerfiles/Dockerfile.onnx.cpu
@@ -69,11 +69,12 @@ ENV NUM_WORKERS=1
 ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.dev b/docker/dockerfiles/Dockerfile.onnx.cpu.dev
@@ -69,11 +69,12 @@ ENV NUM_WORKERS=1
 ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT watchmedo auto-restart --directory=/app/inference --pattern=*.py --recursive -- uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.parallel b/docker/dockerfiles/Dockerfile.onnx.cpu.parallel
@@ -74,6 +74,7 @@ ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT redis-server --io-threads 3 --save --port $REDIS_PORT & \

diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.slim b/docker/dockerfiles/Dockerfile.onnx.cpu.slim
@@ -57,8 +57,9 @@ ENV NUM_WORKERS=1
 ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
 ENV API_LOGGING_ENABLED=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu b/docker/dockerfiles/Dockerfile.onnx.gpu
@@ -73,12 +73,13 @@ ENV NUM_WORKERS=1
 ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
 ENV API_LOGGING_ENABLED=True
 ENV LMM_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.dev b/docker/dockerfiles/Dockerfile.onnx.gpu.dev
@@ -78,6 +78,7 @@ ENV API_LOGGING_ENABLED=True
 ENV LMM_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.parallel b/docker/dockerfiles/Dockerfile.onnx.gpu.parallel
@@ -63,6 +63,7 @@ ENV REDIS_HOST=localhost
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT python3 entrypoint.py
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.slim b/docker/dockerfiles/Dockerfile.onnx.gpu.slim
@@ -52,9 +52,10 @@ ENV NUM_WORKERS=1
 ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.4.5.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.4.5.0
@@ -67,11 +67,12 @@ ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV OPENBLAS_CORETYPE=ARMV8
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
 ENV RUNS_ON_JETSON=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1 b/docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1
@@ -82,11 +82,12 @@ ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV OPENBLAS_CORETYPE=ARMV8
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
 ENV RUNS_ON_JETSON=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1 b/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1
@@ -78,11 +78,12 @@ ENV PORT=9001
 ENV OPENBLAS_CORETYPE=ARMV8 
 ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
-ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
 ENV RUNS_ON_JETSON=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.lambda.slim b/docker/dockerfiles/Dockerfile.onnx.lambda.slim
@@ -65,6 +65,7 @@ ENV ALLOW_NON_HTTPS_URL_INPUT=False
 ENV ALLOW_URL_INPUT_WITHOUT_FQDN=False
 ENV ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=False
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_WORKFLOWS_PROFILING=True
 
 WORKDIR ${LAMBDA_TASK_ROOT}
 

diff --git a/docker/dockerfiles/Dockerfile.onnx.trt b/docker/dockerfiles/Dockerfile.onnx.trt
@@ -52,10 +52,12 @@ ENV NUM_WORKERS=1
 ENV HOST=0.0.0.0
 ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
+ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
 ENV API_LOGGING_ENABLED=True
 ENV PYTHONPATH=/app/:${PYTHONPATH}
 ENV PATH=/opt/miniconda/bin:$PATH
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_WORKFLOWS_PROFILING=True
 ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docs/inference_helpers/inference_sdk.md b/docs/inference_helpers/inference_sdk.md
@@ -536,6 +536,24 @@ Please note that either `specification` is provided with specification of workfl
 both `workspace_name` and `workflow_id` are given to use workflow predefined in Roboflow app. `workspace_name`
 can be found in Roboflow APP URL once browser shows the main panel of workspace.
 
+!!! warning "Server-side caching of Workflow definitions"
+
+    In `inference v0.22.0` we've added server-side caching of Workflows reginsted on Roboflow platform which is
+    **enabled by default**. When you use `run_workflow(...)` method with `workspace_name` and `workflow_id`
+    server will cache the definition for 15 minutes. If you change the definition in Workflows UI and re-run the
+    method, you may not see the change. To force processing without cache, pass `use_cache=False` as a parameter of 
+    `run_workflow(...)` method. 
+
+
+!!! tip "Workflows profiling"
+
+    Since `inference v0.22.0`, you may request profiler trace of your Workflow execution from server passing 
+    `enable_profiling=True` parameter to `run_workflow(...)` method. If server configuration enables traces exposure,
+    you will be able to find a JSON file with trace in a directory specified by `profiling_directory` parameter of 
+    `InferenceConfiguration` - by default it is `inference_profiling` directory in your current working directory.
+    The traces can be directly loaded and rendered in Google Chrome - navigate into `chrome://tracing` in your 
+    borwser and hit "load" button. 
+
 
 ## Details about client configuration
 
@@ -700,6 +718,11 @@ to prevent errors)
     when internet connection is a bottleneck and large images are submitted despite small 
     model input size). 
 
+### Configuration of Workflows execution
+
+- `profiling_directory`: parameter specify the location where Workflows profiler traces are saved. By default, it is
+`./inference_profiling` directory.
+
 ## FAQs
 
 ## Why does the Inference client have two modes (`v0` and `v1`)?

diff --git a/docs/server_configuration/environmental_variables.md b/docs/server_configuration/environmental_variables.md
@@ -4,9 +4,14 @@
 
 Below is a list of some environmental values that require more in-depth explanation.
 
-Environmental variable                     | Default                                                                  | Description
------------------------------------------- | ------------------------------------------------------------------------ | -----------
-ONNXRUNTIME_EXECUTION_PROVIDERS            | "[CUDAExecutionProvider,OpenVINOExecutionProvider,CPUExecutionProvider]" | List of execution providers in priority order, warning message will be displayed if provider is not supported on user platform
-SAM2_MAX_EMBEDDING_CACHE_SIZE                        | 100                                                                     | The number of sam2 embeddings that will be held in memory. The embeddings will be held in gpu memory. Each embedding takes 16777216 bytes.
-SAM2_MAX_LOGITS_CACHE_SIZE                        | 1000                                                                     | The number of sam2 logits that will be held in memory. The the logits will be in cpu memory. Each logit takes 262144 bytes.
-DISABLE_SAM2_LOGITS_CACHE                        | False                                                                     | If set to True, disables the caching of SAM2 logits. This can be useful for debugging or in scenarios where memory usage needs to be minimized, but may result in slower performance for repeated similar requests.
+Environmental variable                     | Description                                                                  | Default
+------------------------------------------ |--------------------------------------------------------------------------| -----------
+`ONNXRUNTIME_EXECUTION_PROVIDERS`            | List of execution providers in priority order, warning message will be displayed if provider is not supported on user platform | See [here](https://github.com/roboflow/inference/blob/main/inference/core/env.py#L262)
+`SAM2_MAX_EMBEDDING_CACHE_SIZE`              | The number of sam2 embeddings that will be held in memory. The embeddings will be held in gpu memory. Each embedding takes 16777216 bytes. | 100
+`SAM2_MAX_LOGITS_CACHE_SIZE`                 | The number of sam2 logits that will be held in memory. The the logits will be in cpu memory. Each logit takes 262144 bytes. | 1000
+`DISABLE_SAM2_LOGITS_CACHE`                  | If set to True, disables the caching of SAM2 logits. This can be useful for debugging or in scenarios where memory usage needs to be minimized, but may result in slower performance for repeated similar requests. | False
+`ENABLE_WORKFLOWS_PROFILING`                 | If set to True, in `inference` server allows the server to output Workflows profiler traces the client, running in Python package with `InferencePipeline` it enables profiling. | False
+`WORKFLOWS_PROFILER_BUFFER_SIZE`             | Size of profiler buffer (number of consecutive Wrofklows Execution Engine `run(...)` invocations to trace in buffer. | 64
+`ENABLE_STREAM_API`                          | Flag to enable Stream Management API in `inference` server - see [more](/workflows/video_processing/overview/). | False
+`RUNS_ON_JETSON`                             | Boolean flag to tell if `inference` runs on Jetson device - set to `True` in all docker builds for Jetson architecture. | False
+`WORKFLOWS_DEFINITION_CACHE_EXPIRY`          | Number of seconds to cache Workflows definitions as a result of `get_workflow_specification(...)` function call  | `15 * 60` - 15 minutes
diff --git a/docs/using_inference/inference_pipeline.md b/docs/using_inference/inference_pipeline.md
@@ -209,6 +209,14 @@ pipeline = InferencePipeline.init_with_workflow(
 )
 ```
 
+!!! tip "Workflows profiling"
+
+    Since `inference v0.22.0`, you may profile your Workflow execution inside `InferencePipeline` when 
+    you export environmental variable `ENABLE_WORKFLOWS_PROFILING=True`. Additionally, you can tune the 
+    number of frames you keep in profiler buffer via another environmental variable `WORKFLOWS_PROFILER_BUFFER_SIZE`.
+    `init_with_workflow(...)` was also given a new parameter `profiling_directory` which can be adjusted to 
+    dictate where to save the trace. 
+
 ## Sinks
 
 Sinks define what an Inference Pipeline should do with each prediction. A sink is a function with signature:

diff --git a/docs/workflows/modes_of_running.md b/docs/workflows/modes_of_running.md
@@ -81,11 +81,11 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
             workspace_name="<your-workspace-name>",
             workflow_id="<your-workflow-id>",
             images={
-                "image": "https://your-image-url"
+                "image": ["https://your-image-url", "https://your-other-image-url"]
             },
             parameters={
                 "parameter": "some-value"
-            }     
+            },
         )
         ```
 
@@ -97,7 +97,9 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
         - method parameter named `images` is supposed to be filled with dictionary that contains names and values
         for all Workflow inputs declared as `WorkflowImage`. Names must match your Workflow definition,
         as value you can pass either `np.array`, `PIL.Image`, URL to your image, local path to your image
-        or image in `base64` string. It is optional if Workflow does not define images as inputs.
+        or image in `base64` string. It is optional if Workflow does not define images as inputs. 
+        
+        - **Batch input for images is supported - simply pass list of images under given input name.**
 
         - method parameter named `parameters` is supposed to be filled with dictionary that contains names and values
         for all Workflow inputs of type `WorkflowParameter`. It's optional and must be filled according to Workflow
@@ -123,7 +125,7 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
             workspace_name="<your-workspace-name>",
             workflow_id="<your-workflow-id>",
             images={
-                "image": "https://your-image-url"
+                "image": ["https://your-image-url", "https://your-other-image-url"]
             },
             parameters={
                 "parameter": "some-value"
@@ -140,6 +142,8 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
         for all Workflow inputs declared as `WorkflowImage`. Names must match your Workflow definition,
         as value you can pass either `np.array`, `PIL.Image`, URL to your image, local path to your image
         or image in `base64` string. It is optional if Workflow does not define images as inputs.
+
+        - **Batch input for images is supported - simply pass list of images under given input name.**
 
         - method parameter named `parameters` is supposed to be filled with dictionary that contains names and values
         for all Workflow inputs of type `WorkflowParameter`. It's optional and must be filled according to Workflow

diff --git a/inference/core/entities/requests/workflows.py b/inference/core/entities/requests/workflows.py
@@ -18,6 +18,20 @@ class WorkflowInferenceRequest(BaseModel):
         default=None,
         description="List of field that shall be excluded from the response (among those defined in workflow specification)",
     )
+    enable_profiling: bool = Field(
+        default=False,
+        description="Flag to request Workflow run profiling. Enables Workflow profiler only when server settings "
+        "allow profiling traces to be exported to clients. Only applies for Workflows definitions saved "
+        "on Roboflow platform.",
+    )
+
+
+class PredefinedWorkflowInferenceRequest(WorkflowInferenceRequest):
+    use_cache: bool = Field(
+        default=True,
+        description="Controls usage of cache for workflow definitions. Set this to False when you frequently modify "
+        "definition saved in Roboflow app and want to fetch the newest version for the request.",
+    )
 
 
 class WorkflowSpecificationInferenceRequest(WorkflowInferenceRequest):
@@ -46,5 +60,14 @@ class DescribeInterfaceRequest(BaseModel):
     )
 
 
+class PredefinedWorkflowDescribeInterfaceRequest(DescribeInterfaceRequest):
+    use_cache: bool = Field(
+        default=True,
+        description="Controls usage of cache for workflow definitions. Set this to False when you frequently modify "
+        "definition saved in Roboflow app and want to fetch the newest version for the request. "
+        "Only applies for Workflows definitions saved on Roboflow platform.",
+    )
+
+
 class WorkflowSpecificationDescribeInterfaceRequest(DescribeInterfaceRequest):
     specification: dict
diff --git a/inference/core/entities/responses/workflows.py b/inference/core/entities/responses/workflows.py
@@ -16,6 +16,10 @@ class WorkflowInferenceResponse(BaseModel):
     outputs: List[Dict[str, Any]] = Field(
         description="Dictionary with keys defined in workflow output and serialised values"
     )
+    profiler_trace: Optional[List[dict]] = Field(
+        description="Profiler events",
+        default=None,
+    )
 
 
 class WorkflowValidationStatus(BaseModel):

diff --git a/inference/core/env.py b/inference/core/env.py
@@ -430,3 +430,9 @@
 ENABLE_STREAM_API = str2bool(os.getenv("ENABLE_STREAM_API", "False"))
 
 RUNS_ON_JETSON = str2bool(os.getenv("RUNS_ON_JETSON", "False"))
+
+ENABLE_WORKFLOWS_PROFILING = str2bool(os.getenv("ENABLE_WORKFLOWS_PROFILING", "False"))
+WORKFLOWS_PROFILER_BUFFER_SIZE = int(os.getenv("WORKFLOWS_PROFILER_BUFFER_SIZE", "64"))
+WORKFLOWS_DEFINITION_CACHE_EXPIRY = int(
+    os.getenv("WORKFLOWS_DEFINITION_CACHE_EXPIRY", 15 * 60)
+)