Skip to content

Commit

Permalink
Merge pull request #710 from roboflow/feature/workflows_profiler
Browse files Browse the repository at this point in the history
Workflows profiler
  • Loading branch information
PawelPeczek-Roboflow authored Oct 4, 2024
2 parents 74b0eb4 + 0139543 commit e7071a0
Show file tree
Hide file tree
Showing 54 changed files with 2,029 additions and 130 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,7 @@ docs/workflows/gallery/*
!tests/workflows/integration_tests/execution/assets/*.jpg
!tests/workflows/integration_tests/execution/assets/rock_paper_scissors/*.jpg
!tests/workflows/unit_tests/core_steps/models/third_party/assets/*.png
!tests/workflows/integration_tests/execution/assets/*.png
!tests/workflows/integration_tests/execution/assets/*.png

inference_profiling
tests/inference_sdk/unit_tests/http/inference_profiling
2 changes: 1 addition & 1 deletion development/stream_interface/workflows_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def main() -> None:
],
}
pipeline = InferencePipeline.init_with_workflow(
video_reference=["rtsp://localhost:8554/live.stream"],
video_reference=["rtsp://localhost:8554/live0.stream"],
workflow_specification=workflow_specification,
watchdog=watchdog,
on_prediction=workflows_sink,
Expand Down
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,12 @@ ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.cpu.dev
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,12 @@ ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT watchmedo auto-restart --directory=/app/inference --pattern=*.py --recursive -- uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.cpu.parallel
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT redis-server --io-threads 3 --save --port $REDIS_PORT & \
Expand Down
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.cpu.slim
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,13 @@ ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV LMM_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.dev
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ ENV API_LOGGING_ENABLED=True
ENV LMM_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.parallel
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ ENV REDIS_HOST=localhost
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT python3 entrypoint.py
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.gpu.slim
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.jetson.4.5.0
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ ENV HOST=0.0.0.0
ENV PORT=9001
ENV OPENBLAS_CORETYPE=ARMV8
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV RUNS_ON_JETSON=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,12 @@ ENV HOST=0.0.0.0
ENV PORT=9001
ENV OPENBLAS_CORETYPE=ARMV8
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV RUNS_ON_JETSON=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
3 changes: 2 additions & 1 deletion docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,12 @@ ENV PORT=9001
ENV OPENBLAS_CORETYPE=ARMV8
ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=2
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV RUNS_ON_JETSON=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.lambda.slim
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ ENV ALLOW_NON_HTTPS_URL_INPUT=False
ENV ALLOW_URL_INPUT_WITHOUT_FQDN=False
ENV ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=False
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_WORKFLOWS_PROFILING=True

WORKDIR ${LAMBDA_TASK_ROOT}

Expand Down
2 changes: 2 additions & 0 deletions docker/dockerfiles/Dockerfile.onnx.trt
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=4
ENV API_LOGGING_ENABLED=True
ENV PYTHONPATH=/app/:${PYTHONPATH}
ENV PATH=/opt/miniconda/bin:$PATH
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
23 changes: 23 additions & 0 deletions docs/inference_helpers/inference_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,24 @@ Please note that either `specification` is provided with specification of workfl
both `workspace_name` and `workflow_id` are given to use workflow predefined in Roboflow app. `workspace_name`
can be found in Roboflow APP URL once browser shows the main panel of workspace.

!!! warning "Server-side caching of Workflow definitions"

In `inference v0.22.0` we've added server-side caching of Workflows reginsted on Roboflow platform which is
**enabled by default**. When you use `run_workflow(...)` method with `workspace_name` and `workflow_id`
server will cache the definition for 15 minutes. If you change the definition in Workflows UI and re-run the
method, you may not see the change. To force processing without cache, pass `use_cache=False` as a parameter of
`run_workflow(...)` method.


!!! tip "Workflows profiling"

Since `inference v0.22.0`, you may request profiler trace of your Workflow execution from server passing
`enable_profiling=True` parameter to `run_workflow(...)` method. If server configuration enables traces exposure,
you will be able to find a JSON file with trace in a directory specified by `profiling_directory` parameter of
`InferenceConfiguration` - by default it is `inference_profiling` directory in your current working directory.
The traces can be directly loaded and rendered in Google Chrome - navigate into `chrome://tracing` in your
borwser and hit "load" button.


## Details about client configuration

Expand Down Expand Up @@ -700,6 +718,11 @@ to prevent errors)
when internet connection is a bottleneck and large images are submitted despite small
model input size).

### Configuration of Workflows execution

- `profiling_directory`: parameter specify the location where Workflows profiler traces are saved. By default, it is
`./inference_profiling` directory.

## FAQs

## Why does the Inference client have two modes (`v0` and `v1`)?
Expand Down
17 changes: 11 additions & 6 deletions docs/server_configuration/environmental_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,14 @@

Below is a list of some environmental values that require more in-depth explanation.

Environmental variable | Default | Description
------------------------------------------ | ------------------------------------------------------------------------ | -----------
ONNXRUNTIME_EXECUTION_PROVIDERS | "[CUDAExecutionProvider,OpenVINOExecutionProvider,CPUExecutionProvider]" | List of execution providers in priority order, warning message will be displayed if provider is not supported on user platform
SAM2_MAX_EMBEDDING_CACHE_SIZE | 100 | The number of sam2 embeddings that will be held in memory. The embeddings will be held in gpu memory. Each embedding takes 16777216 bytes.
SAM2_MAX_LOGITS_CACHE_SIZE | 1000 | The number of sam2 logits that will be held in memory. The the logits will be in cpu memory. Each logit takes 262144 bytes.
DISABLE_SAM2_LOGITS_CACHE | False | If set to True, disables the caching of SAM2 logits. This can be useful for debugging or in scenarios where memory usage needs to be minimized, but may result in slower performance for repeated similar requests.
Environmental variable | Description | Default
------------------------------------------ |--------------------------------------------------------------------------| -----------
`ONNXRUNTIME_EXECUTION_PROVIDERS` | List of execution providers in priority order, warning message will be displayed if provider is not supported on user platform | See [here](https://github.com/roboflow/inference/blob/main/inference/core/env.py#L262)
`SAM2_MAX_EMBEDDING_CACHE_SIZE` | The number of sam2 embeddings that will be held in memory. The embeddings will be held in gpu memory. Each embedding takes 16777216 bytes. | 100
`SAM2_MAX_LOGITS_CACHE_SIZE` | The number of sam2 logits that will be held in memory. The the logits will be in cpu memory. Each logit takes 262144 bytes. | 1000
`DISABLE_SAM2_LOGITS_CACHE` | If set to True, disables the caching of SAM2 logits. This can be useful for debugging or in scenarios where memory usage needs to be minimized, but may result in slower performance for repeated similar requests. | False
`ENABLE_WORKFLOWS_PROFILING` | If set to True, in `inference` server allows the server to output Workflows profiler traces the client, running in Python package with `InferencePipeline` it enables profiling. | False
`WORKFLOWS_PROFILER_BUFFER_SIZE` | Size of profiler buffer (number of consecutive Wrofklows Execution Engine `run(...)` invocations to trace in buffer. | 64
`ENABLE_STREAM_API` | Flag to enable Stream Management API in `inference` server - see [more](/workflows/video_processing/overview/). | False
`RUNS_ON_JETSON` | Boolean flag to tell if `inference` runs on Jetson device - set to `True` in all docker builds for Jetson architecture. | False
`WORKFLOWS_DEFINITION_CACHE_EXPIRY` | Number of seconds to cache Workflows definitions as a result of `get_workflow_specification(...)` function call | `15 * 60` - 15 minutes
8 changes: 8 additions & 0 deletions docs/using_inference/inference_pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ pipeline = InferencePipeline.init_with_workflow(
)
```

!!! tip "Workflows profiling"

Since `inference v0.22.0`, you may profile your Workflow execution inside `InferencePipeline` when
you export environmental variable `ENABLE_WORKFLOWS_PROFILING=True`. Additionally, you can tune the
number of frames you keep in profiler buffer via another environmental variable `WORKFLOWS_PROFILER_BUFFER_SIZE`.
`init_with_workflow(...)` was also given a new parameter `profiling_directory` which can be adjusted to
dictate where to save the trace.

## Sinks

Sinks define what an Inference Pipeline should do with each prediction. A sink is a function with signature:
Expand Down
12 changes: 8 additions & 4 deletions docs/workflows/modes_of_running.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
workspace_name="<your-workspace-name>",
workflow_id="<your-workflow-id>",
images={
"image": "https://your-image-url"
"image": ["https://your-image-url", "https://your-other-image-url"]
},
parameters={
"parameter": "some-value"
}
},
)
```

Expand All @@ -97,7 +97,9 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
- method parameter named `images` is supposed to be filled with dictionary that contains names and values
for all Workflow inputs declared as `WorkflowImage`. Names must match your Workflow definition,
as value you can pass either `np.array`, `PIL.Image`, URL to your image, local path to your image
or image in `base64` string. It is optional if Workflow does not define images as inputs.
or image in `base64` string. It is optional if Workflow does not define images as inputs.
- **Batch input for images is supported - simply pass list of images under given input name.**

- method parameter named `parameters` is supposed to be filled with dictionary that contains names and values
for all Workflow inputs of type `WorkflowParameter`. It's optional and must be filled according to Workflow
Expand All @@ -123,7 +125,7 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
workspace_name="<your-workspace-name>",
workflow_id="<your-workflow-id>",
images={
"image": "https://your-image-url"
"image": ["https://your-image-url", "https://your-other-image-url"]
},
parameters={
"parameter": "some-value"
Expand All @@ -140,6 +142,8 @@ leverage our Inference SDK in Python, or even use cURL. Explore the examples bel
for all Workflow inputs declared as `WorkflowImage`. Names must match your Workflow definition,
as value you can pass either `np.array`, `PIL.Image`, URL to your image, local path to your image
or image in `base64` string. It is optional if Workflow does not define images as inputs.

- **Batch input for images is supported - simply pass list of images under given input name.**

- method parameter named `parameters` is supposed to be filled with dictionary that contains names and values
for all Workflow inputs of type `WorkflowParameter`. It's optional and must be filled according to Workflow
Expand Down
23 changes: 23 additions & 0 deletions inference/core/entities/requests/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,20 @@ class WorkflowInferenceRequest(BaseModel):
default=None,
description="List of field that shall be excluded from the response (among those defined in workflow specification)",
)
enable_profiling: bool = Field(
default=False,
description="Flag to request Workflow run profiling. Enables Workflow profiler only when server settings "
"allow profiling traces to be exported to clients. Only applies for Workflows definitions saved "
"on Roboflow platform.",
)


class PredefinedWorkflowInferenceRequest(WorkflowInferenceRequest):
use_cache: bool = Field(
default=True,
description="Controls usage of cache for workflow definitions. Set this to False when you frequently modify "
"definition saved in Roboflow app and want to fetch the newest version for the request.",
)


class WorkflowSpecificationInferenceRequest(WorkflowInferenceRequest):
Expand Down Expand Up @@ -46,5 +60,14 @@ class DescribeInterfaceRequest(BaseModel):
)


class PredefinedWorkflowDescribeInterfaceRequest(DescribeInterfaceRequest):
use_cache: bool = Field(
default=True,
description="Controls usage of cache for workflow definitions. Set this to False when you frequently modify "
"definition saved in Roboflow app and want to fetch the newest version for the request. "
"Only applies for Workflows definitions saved on Roboflow platform.",
)


class WorkflowSpecificationDescribeInterfaceRequest(DescribeInterfaceRequest):
specification: dict
4 changes: 4 additions & 0 deletions inference/core/entities/responses/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ class WorkflowInferenceResponse(BaseModel):
outputs: List[Dict[str, Any]] = Field(
description="Dictionary with keys defined in workflow output and serialised values"
)
profiler_trace: Optional[List[dict]] = Field(
description="Profiler events",
default=None,
)


class WorkflowValidationStatus(BaseModel):
Expand Down
6 changes: 6 additions & 0 deletions inference/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,3 +430,9 @@
ENABLE_STREAM_API = str2bool(os.getenv("ENABLE_STREAM_API", "False"))

RUNS_ON_JETSON = str2bool(os.getenv("RUNS_ON_JETSON", "False"))

ENABLE_WORKFLOWS_PROFILING = str2bool(os.getenv("ENABLE_WORKFLOWS_PROFILING", "False"))
WORKFLOWS_PROFILER_BUFFER_SIZE = int(os.getenv("WORKFLOWS_PROFILER_BUFFER_SIZE", "64"))
WORKFLOWS_DEFINITION_CACHE_EXPIRY = int(
os.getenv("WORKFLOWS_DEFINITION_CACHE_EXPIRY", 15 * 60)
)
Loading

0 comments on commit e7071a0

Please sign in to comment.