Sentinel wraps Go functions to automatically expose Prometheus metrics with reliability handlers. For functions, the library will: recover panic occurances as errors, configure retry handling, and track metrics of successes, errors, panics, retries, timeouts, and execution durations. Sentinel is designed to be minimal, robust, and immediately integrate with existing applications.
Default configurations will automatically export the following Prometheus metrics:
| Metric | Type | Description | Effective When |
|---|---|---|---|
sentinel_in_flight |
Gauge | Active number of running tasks | - |
sentinel_successes_total |
Counter | Total successful tasks | - |
sentinel_failures_total |
Counter | Total failed tasks | - |
sentinel_errors_total |
Counter | Total errors over all attempts | - |
sentinel_panics_total |
Counter | Total panic occurrences | - |
sentinel_timeouts_total |
Counter | Total errors based on timeouts | - |
sentinel_durations_seconds |
Histogram | Task execution durations in buckets | durationBuckets is set |
sentinel_pending_total |
Gauge | Active number of pending tasks | MaxConcurrency > 0 |
sentinel_retries_total |
Counter | Total retry attempts for tasks | MaxRetries > 0 |
Note, - reflects that configured metrics are always applicable.
Library requires Go version >= 1.23:
go get github.com/mcwalrus/go-sentinelConfigure an observer and observe a task:
package main
import (
"context"
"log"
sentinel "github.com/mcwalrus/go-sentinel"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Execute task
err := observer.Run(func() error {
log.Println("Processing task...")
return nil
})
// Handle error
if err != nil {
log.Printf("Task failed: %v\n", err)
}
}Observer records errors via metrics with returning errors:
package main
import (
"errors"
"log"
sentinel "github.com/mcwalrus/go-sentinel"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Task fails
err := observer.Run(func() error {
return errors.New("task failed")
})
// Handle error
if err != nil {
log.Printf("Task failed: %v\n", err)
}
}Observer provides context timeouts based on ObserverConfig:
package main
import (
"context"
"errors"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Set tasks timeout
observer.UseConfig(sentinel.ObserverConfig{
Timeout: 10 * time.Second,
})
// Task respects context timeout
err := observer.RunFunc(func(ctx context.Context) error {
<-ctx.Done()
return ctx.Err()
},
)
if !errors.Is(err, context.DeadlineExceeded) {
panic("expected timeout error, got:", err)
}
}Timeout errors are recorded by both timeouts_total and errors_total counters.
Panic occurrences are just returned as errors by the observer:
package main
import (
"context"
"errors"
"math/rand"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
"github.com/prometheus/client_golang/prometheus"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Task panics
err := observer.Run(func() error {
panic("stations!:0")
})
// Handle error
if err != nil {
log.Printf("Task failed: %v\n", err)
}
// Recover panic value
if r, ok := sentinel.IsPanicError(err); ok {
log.Printf("panic value: %v\n", r)
}
}Panics are always recorded with panics_total and errors_total counters.
By default, the observer recovers panics and converts them to errors. You can disable this behavior to let panics propagate normally:
package main
import (
"context"
"log"
sentinel "github.com/mcwalrus/go-sentinel"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Disabled recovery
observer.DisablePanicRecovery(true)
// Panic propogates
err := observer.Run(func() error {
panic("some failure")
})
// Unreachable code
log.Printf("err was: %v\n", err)
}Set histogram buckets with the observer to export durations_seconds metrics:
package main
import (
"context"
"errors"
"log"
"math/rand"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
)
func main() {
// New observer with durations
observer := sentinel.NewObserver(
[]float64{0.100, 0.250, 0.400, 0.500, 1.000}, // in seconds
)
// Run tasks for 50-1000ms before returning
for i := 0; i < 100; i++ {
_ = observer.RunFunc(func(ctx context.Context) error {
sleep := time.Duration(rand.Intn(950)+50) * time.Millisecond
log.Printf("Sleeping for %v...\n", sleep)
time.Sleep(sleep)
return nil
})
}
}Timeouts are always recorded with timeouts_total and errors_total counters.
Configure retry with wait strategies for resilient task execution:
package main
import (
"context"
"errors"
"log"
"math/rand"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
"github.com/mcwalrus/go-sentinel/retry"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Set retry behavior
observer.UseConfig(sentinel.ObserverConfig{
MaxRetries: 3,
RetryStrategy: retry.WithJitter(
time.Second,
retry.Exponential(100*time.Millisecond),
),
})
// Fail every attempt
err := observer.Run(func() error {
return errors.New("task failed")
})
// Unwrap join errors
errUnwrap, ok := (err).(interface {Unwrap() []error})
if !ok {
panic("not unwrap")
}
// Handle each error
errs := errUnwrap.Unwrap()
for i, err := range errs {
log.Printf("Task failed: %d: %v", i, err)
}
}Tasks called with MaxRetries=3 may be called up to four times total.
Use sentinel.RetryCount(ctx) to read the current retry attempt count within an observed function.
Use template for integrating sentinel with a prometheus endpoint:
import (
"log"
"time"
"net/http"
sentinel "github.com/mcwalrus/go-sentinel"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil,
sentinel.WithNamespace("myapp"),
sentinel.WithSubsystem("workers"),
)
// Register observer
registry := prometheus.NewRegistry()
observer.MustRegister(registry)
// Expose metrics endpoint
http.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
go func() {
err := http.ListenAndServe(":8080", nil)
if err != nil {
log.Fatal(err)
}
}()
// Your application code
for range time.NewTicker(3 * time.Second).C {
err := observer.Run(doFunc)
if err != nil {
log.Printf("error occurred: %v\n", err)
}
}
}Prometheus metrics will be exposed with names myapp_workers_... on host localhost:8080/metrics.
Configure circuit breaker to stop retries based on particular errors:
package main
import (
"errors"
"log"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
"github.com/mcwalrus/go-sentinel/circuit"
"github.com/mcwalrus/go-sentinel/retry"
)
var ErrCustom = errors.New("unrecoverable error")
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Configure circuit breaker
observer.UseConfig(sentinel.ObserverConfig{
MaxRetries: 5,
RetryBreaker: func(err error) bool {
return errors.Is(err, ErrCustom)
},
})
// Task runs once on custom error
var count int
err := observer.Run(func() error {
count++
return ErrCustom
})
if err != nil && count == 1 {
log.Printf("Task stopped early: %v\n", err)
}
}Use a control handler to prevent task execution or retries, useful for graceful shutdown:
package main
import (
"context"
sentinel "github.com/mcwalrus/go-sentinel"
"github.com/mcwalrus/go-sentinel/circuit"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Configure control
done := make(chan struct{})
observer.UseConfig(sentinel.ObserverConfig{
Control: circuit.WhenClosed(done),
})
// Control to reject new requests
close(done)
// Expect early termination error
var count int
err := observer.Run(func() error {
count++
return nil
})
if err != nil && count == 0 {
log.Printf("error: %T\n", err)
}
}Manage the observer to control the number of tasks that can executing concurrently:
package main
import (
"context"
"fmt"
"sync"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
)
func main() {
// New observer
observer := sentinel.NewObserver(nil)
// Set concurrency limit
observer.UseConfig(sentinel.ObserverConfig{
MaxConcurrency: 5,
})
// Run concurrent routines
var wg sync.WaitGroup
for i := 0; i < 20; i++ {
go func (id int) {
defer wg.Done()
_ = observer.Run(func() error {
fmt.Printf("Task %d executing...", id)
time.Sleep(100 * time.Millisecond)
return nil
})
}(i)
}
// Wait for task completions
wg.Wait()
}The metric sentinel_pending_total tracks the number of tasks waiting for a concurrency slot.
VecObserver enables creating multiple observers that share the same underlying metrics but are differentiated by Prometheus labels:
package main
import (
"net/http"
"time"
sentinel "github.com/mcwalrus/go-sentinel"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
func main() {
// Create VecObserver with label names
vecObserver := sentinel.NewVecObserver(
[]float64{0.1, 0.5, 1, 2, 5},
[]string{"service", "pipeline"},
)
// Register VecObserver metrics just once
registry := prometheus.NewRegistry()
vecObserver.MustRegister(registry)
// Create observers with different labels
mObserver, _ := vecObserver.WithLabels("api", "main")
bgObserver, _ := vecObserver.WithLabels("api", "background")
// Set observer configurations
mObserver.UseConfig(sentinel.ObserverConfig{
Timeout: 60 * time.Second,
MaxRetries: 2,
})
bgObserver.UseConfig(sentinel.ObserverConfig{
Timeout: 120 * time.Second,
MaxRetries: 4,
})
// Use observers
_ = prodObserver.Run(func() error {
return nil
})
_ = stagingObserver.Run(func() error {
return nil
})
}Using VecObserver instead of creating multiple Observers would be recommended best practise for most cases.
Please report any issues or feature requests to the GitHub repository.
I am particularly keen to hear feedback around how to appropriately present the library alongside issues.
Please reach out to me directly for issues which require urgent fixes.
This module is maintained by Max Collier under an MIT License Agreement.