Skip to content

Commit

Permalink
Merge branch 'main' into 3234-bring-in-change-from-main
Browse files Browse the repository at this point in the history
  • Loading branch information
aronchick committed Jan 25, 2024
2 parents bea22ad + 849dab5 commit 91ba4ae
Show file tree
Hide file tree
Showing 8 changed files with 341 additions and 0 deletions.
28 changes: 28 additions & 0 deletions ops/metrics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Usage
**Start containers:**
```shell
docker-compose up
```
**Export collection endpoint for bacalhau**
```shell
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
```
**Start Bacalhau**
```shell
bacalhau serve --node-type=compute,requester
```
**Open Browser**
- Grafana: http://localhost:3000
- Username: `admin`
- Password: `admin`
- Jaeger: http://localhost:16686

**Clean up**
- Remove volumes associated with containers to reset state.

**Saving Changes to a Grafana Dashboard**
- export dashboard data from grafana as json
- save it to file ./grafana/provisioning/dashboards/dashboard.json

# Best Practices for Telemetry Collections
[OpenTelemetry In Bacalhau](../../docs/docs/dev/open_telemetry_in_bacalhau.md)
52 changes: 52 additions & 0 deletions ops/metrics/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
version: '3.5'

services:
prometheus:
image: prom/prometheus:latest
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus-storage:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
ports:
- 9090:9090
restart: always

grafana:
image: grafana/grafana
depends_on:
- prometheus
volumes:
- ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources # Datasource provisioning
- ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards # Dashboard provisioning

ports:
- 3000:3000
restart: always

opentelemetry-collector:
image: otel/opentelemetry-collector:latest
command: [ "--config=/etc/otel-collector-config.yaml" ] # Command to use the custom config
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
ports:
- 127.0.0.1:4318:4318 # HTTP
- 55681:55681 # OpenTelemetry protocol
depends_on:
- prometheus

jaeger:
container_name: jaeger
image: jaegertracing/all-in-one:latest
ports:
- "6831:6831/udp"
- "5778:5778"
- "4316:4316"
- "16686:16686"
- "14268:14268"

volumes:
prometheus-storage: {}
187 changes: 187 additions & 0 deletions ops/metrics/grafana/provisioning/dashboards/dashboard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "P6EBD7EB59B5FF381"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P6EBD7EB59B5FF381"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "bacalhau_jobs_received_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Jobs Receieved",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P6EBD7EB59B5FF381"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 8
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P6EBD7EB59B5FF381"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "bacalhau_jobs_completed_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Jobs Completed",
"type": "stat"
}
],
"refresh": "",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Bacalhau Metrics",
"uid": "cbe6c668-d74b-4a27-be8b-431c19b2d4ca",
"version": 1,
"weekStart": ""
}
9 changes: 9 additions & 0 deletions ops/metrics/grafana/provisioning/dashboards/dashboards.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: 1

providers:
- name: 'default' # A unique name for this provider
orgId: 1 # Optional: specify organization ID, 1 is default
folder: '' # The folder to save dashboards in Grafana
type: file
options:
path: /etc/grafana/provisioning/dashboards # Path within the container
7 changes: 7 additions & 0 deletions ops/metrics/grafana/provisioning/datasources/datasources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: 1

datasources:
- name: Prometheus OTEL
type: prometheus
access: proxy
url: http://prometheus:9090
Empty file.
53 changes: 53 additions & 0 deletions ops/metrics/otel-collector-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# receive telemetry data from bacalhau otel sdk.
receivers:
otlp:
protocols:
http:
endpoint: "0.0.0.0:4318"

# batch process data and label it with 'otel' as the service colector
processors:
batch:
memory_limiter:
check_interval: 5s
limit_mib: 4000
spike_limit_mib: 500
resource:
attributes:
- key: service.collector
value: otel
action: insert
attributes/metrics:
actions:
- pattern: net\.sock.+
action: delete


exporters:
# metrics are exported to prometheus
prometheus:
endpoint: "0.0.0.0:9095"
namespace: "bacalhau"
# uncomment for debugging, will print all metrics to stdout
#logging:
#loglevel: debug
# traces go to jaeger instance
otlp/jaeger:
endpoint: "jaeger:4317"
tls:
insecure: true
insecure_skip_verify: true

service:
pipelines:
metrics:
receivers: [otlp]
processors: [memory_limiter, resource, attributes/metrics, batch]
exporters: [prometheus]
#exporters: [prometheus, logging]
traces:
receivers: [otlp]
processors: [memory_limiter, resource, attributes/metrics, batch]
exporters: [otlp/jaeger]
#exporters: [logging, otlp/jaeger]

5 changes: 5 additions & 0 deletions ops/metrics/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
scrape_configs:
- job_name: 'otel-collector'
scrape_interval: 5s
static_configs:
- targets: ['opentelemetry-collector:9095']

0 comments on commit 91ba4ae

Please sign in to comment.