-
Notifications
You must be signed in to change notification settings - Fork 90
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add remaining metrics and update dashboard cfg
- Loading branch information
frrist
committed
Jan 25, 2024
1 parent
c062554
commit aad6d1b
Showing
10 changed files
with
1,311 additions
and
28 deletions.
There are no files selected for viewing
1,224 changes: 1,211 additions & 13 deletions
1,224
ops/metrics/grafana/provisioning/dashboards/dashboard.json
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
scrape_configs: | ||
- job_name: 'otel-collector' | ||
scrape_interval: 5s | ||
scrape_interval: 1s | ||
static_configs: | ||
- targets: ['opentelemetry-collector:9095'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,37 @@ | ||
package compute | ||
|
||
import ( | ||
"github.com/samber/lo" | ||
"go.opentelemetry.io/otel" | ||
"go.opentelemetry.io/otel/metric" | ||
) | ||
|
||
// Metrics for monitoring compute nodes: | ||
var ( | ||
meter = otel.GetMeterProvider().Meter("compute") | ||
jobsReceived, _ = meter.Int64Counter( | ||
meter = otel.GetMeterProvider().Meter("compute") | ||
jobsReceived = lo.Must(meter.Int64Counter( | ||
"jobs_received", | ||
metric.WithDescription("Number of jobs received by the compute node"), | ||
) | ||
)) | ||
|
||
jobsAccepted, _ = meter.Int64Counter( | ||
jobsAccepted = lo.Must(meter.Int64Counter( | ||
"jobs_accepted", | ||
metric.WithDescription("Number of jobs bid on and accepted by the compute node"), | ||
) | ||
)) | ||
|
||
jobsCompleted, _ = meter.Int64Counter( | ||
jobsCompleted = lo.Must(meter.Int64Counter( | ||
"jobs_completed", | ||
metric.WithDescription("Number of jobs completed by the compute node."), | ||
) | ||
)) | ||
|
||
jobsFailed, _ = meter.Int64Counter( | ||
jobsFailed = lo.Must(meter.Int64Counter( | ||
"jobs_failed", | ||
metric.WithDescription("Number of jobs failed by the compute node."), | ||
) | ||
)) | ||
|
||
jobDurationMilliseconds = lo.Must(meter.Int64Histogram( | ||
"job_duration_milliseconds", | ||
metric.WithDescription("Duration of a job on the compute node in milliseconds."), | ||
metric.WithUnit("ms"), | ||
)) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package metrics | ||
|
||
import ( | ||
"github.com/samber/lo" | ||
"go.opentelemetry.io/otel" | ||
|
||
"github.com/bacalhau-project/bacalhau/pkg/telemetry" | ||
) | ||
|
||
var ( | ||
nodeMeter = otel.GetMeterProvider().Meter("bacalhau-node") | ||
) | ||
|
||
var ( | ||
NodeInfo = lo.Must(telemetry.NewCounter( | ||
nodeMeter, | ||
"bacalhau_node_info", | ||
"A static metric with labels describing the bacalhau node", | ||
)) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package telemetry | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
"go.opentelemetry.io/otel/attribute" | ||
"go.opentelemetry.io/otel/metric" | ||
) | ||
|
||
// Timer measures the duration of an event. | ||
type Timer struct { | ||
startTime time.Time | ||
durationRecorder metric.Int64Histogram | ||
} | ||
|
||
func NewTimer(durationRecorder metric.Int64Histogram) *Timer { | ||
return &Timer{ | ||
durationRecorder: durationRecorder, | ||
} | ||
} | ||
|
||
// Start begins the timer by recording the current time. | ||
func (t *Timer) Start() { | ||
t.startTime = time.Now() | ||
} | ||
|
||
// Stop ends the timer and records the duration since Start was called. | ||
// `attrs` are optional attributes that can be added to the duration metric for additional context. | ||
func (t *Timer) Stop(ctx context.Context, attrs ...attribute.KeyValue) { | ||
if t.startTime.IsZero() { | ||
// Handle the case where Stop is called without Start being called. | ||
return | ||
} | ||
|
||
// Calculate the duration and record it using the OpenTelemetry histogram. | ||
duration := time.Since(t.startTime).Milliseconds() | ||
t.durationRecorder.Record(ctx, duration, metric.WithAttributes(attrs...)) | ||
t.startTime = time.Time{} // Reset the start time for future use. | ||
} |