Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to configure Mimir alertmanager #447

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
43 changes: 43 additions & 0 deletions integration/mimir/alertmanager_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package mimir

import (
"os"
"testing"

"github.com/grafana/grizzly/pkg/grizzly"
"github.com/grafana/grizzly/pkg/mimir"
"github.com/grafana/grizzly/pkg/mimir/client"
"github.com/grafana/grizzly/pkg/testutil"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)

func TestAlertmanager(t *testing.T) {
alertmanagerTestFilePath := "testdata/alertmanager/test-alertmanager.yml"
provider := mimir.NewProvider(&testutil.TestContext().Mimir)
client := client.NewHTTPClient(&testutil.TestContext().Mimir)
handler := mimir.NewAlertmanagerHandler(provider, client)

t.Run("create prometheus alertmanager config", func(t *testing.T) {

Check failure on line 21 in integration/mimir/alertmanager_test.go

View workflow job for this annotation

GitHub Actions / lint

unnecessary leading newline (whitespace)

file, err := os.ReadFile(alertmanagerTestFilePath)
require.NoError(t, err)

var resource grizzly.Resource
require.NoError(t, yaml.Unmarshal(file, &resource.Body))
require.NoError(t, handler.Add(resource))

t.Run("get remote alertmanager config", func(t *testing.T) {
file, err := os.ReadFile(alertmanagerTestFilePath)
require.NoError(t, err)

var resource grizzly.Resource
require.NoError(t, yaml.Unmarshal(file, &resource.Body))

remoteResource, err := handler.GetRemote(grizzly.Resource{})
require.NoError(t, err)

require.Equal(t, resource, *remoteResource)
})
})
}
5 changes: 3 additions & 2 deletions integration/mimir/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ import (
)

func TestRules(t *testing.T) {
dirName := "testdata/rules"
provider := mimir.NewProvider(&testutil.TestContext().Mimir)
handler := provider.GetHandlers()[0]

t.Run("create rule group", func(t *testing.T) {
dirs, err := os.ReadDir("testdata")
dirs, err := os.ReadDir(dirName)
require.NoError(t, err)

for _, dir := range dirs {
file, err := os.ReadFile(filepath.Join("testdata", dir.Name()))
file, err := os.ReadFile(filepath.Join(dirName, dir.Name()))
require.NoError(t, err)

var resource grizzly.Resource
Expand Down
21 changes: 21 additions & 0 deletions integration/mimir/testdata/alertmanager/test-alertmanager.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: grizzly.grafana.com/v1alpha1
kind: PrometheusAlertmanagerConfig
metadata:
name: global
spec:
template_files:
default_template: |
{{ define "__alertmanager" }}AlertManager{{ end }}
{{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver | urlquery }}{{ end }}
alertmanager_config:
global:
smtp_smarthost: 'localhost:25'
smtp_from: '[email protected]'
templates:
- 'default_template'
route:
receiver: example-email
receivers:
- name: example-email
email_configs:
- to: '[email protected]'
123 changes: 123 additions & 0 deletions pkg/mimir/alertmanager-handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package mimir

import (
"fmt"

"github.com/grafana/grizzly/pkg/mimir/client"
"github.com/grafana/grizzly/pkg/mimir/models"
"gopkg.in/yaml.v3"

"github.com/grafana/grizzly/pkg/grizzly"
)

const (
GlobalAlertmangerConfigName = "global"
AlertmanagerConfigFile = "prometheusAlertmanagerConfig.yaml"
)

// AlertmanagerHandler is a Grizzly Handler for Alertmanager Configuration
type AlertmanagerHandler struct {
grizzly.BaseHandler
clientTool client.Mimir
}

// NewAlertmanagerHandler returns a new Grizzly Handler for Alertmanager Configuration
func NewAlertmanagerHandler(provider *Provider, clientTool client.Mimir) *AlertmanagerHandler {
return &AlertmanagerHandler{
BaseHandler: grizzly.NewBaseHandler(provider, "PrometheusAlertmanagerConfig", false),
clientTool: clientTool,
}
}

// ResourceFilePath returns the location on disk where a resource should be updated
func (h *AlertmanagerHandler) ResourceFilePath(resource grizzly.Resource, filetype string) string {
return AlertmanagerConfigFile
}

// Validate returns the uid of resource
func (h *AlertmanagerHandler) Validate(resource grizzly.Resource) error {
if resource.Name() != GlobalAlertmangerConfigName {
return fmt.Errorf("name of prometheus alertmanager config must be '%s', got '%s'", GlobalAlertmangerConfigName, resource.Name())
}
return nil
}

func (h *AlertmanagerHandler) GetSpecUID(resource grizzly.Resource) (string, error) {
return "", fmt.Errorf("GetSpecUID not implemented for PrometheusAlertmanagerConfig")
}

// GetByUID retrieves JSON for a resource from an endpoint, by UID
func (h *AlertmanagerHandler) GetByUID(uid string) (*grizzly.Resource, error) {
return h.getRemoteAlertmanagerConfig()
}

// GetRemote retrieves an alertmanager config as a Resource
func (h *AlertmanagerHandler) GetRemote(resource grizzly.Resource) (*grizzly.Resource, error) {
return h.getRemoteAlertmanagerConfig()
}

// ListRemote retrieves as list of UIDs of all remote resources
func (h *AlertmanagerHandler) ListRemote() ([]string, error) {
return []string{GlobalAlertmangerConfigName}, nil
}

// Add pushes an alertmanager config to Mimir via the API
func (h *AlertmanagerHandler) Add(resource grizzly.Resource) error {
return h.writeAlertmanagerConfig(resource)
}

// Update pushes an alertmanager config to Mimir via the API
func (h *AlertmanagerHandler) Update(existing, resource grizzly.Resource) error {
return h.writeAlertmanagerConfig(resource)
}

// getRemoteAlertmanagerConfig retrieves an alertmanager config object from Mimir
func (h *AlertmanagerHandler) getRemoteAlertmanagerConfig() (*grizzly.Resource, error) {
cfg, err := h.clientTool.GetAlertmanagerConfig()
if err != nil {
return nil, err
}

var spec map[string]interface{}
cfgYaml, err := yaml.Marshal(cfg)
if err != nil {
return nil, err
}

err = yaml.Unmarshal(cfgYaml, &spec)
if err != nil {
return nil, err
}

resource, err := grizzly.NewResource(h.APIVersion(), h.Kind(), GlobalAlertmangerConfigName, spec)
if err != nil {
return nil, err
}

return &resource, nil
}

func (h *AlertmanagerHandler) writeAlertmanagerConfig(resource grizzly.Resource) error {
newConfig := models.PrometheusAlertmanagerConfig{
TemplateFiles: map[string]string{},
AlertmanagerConfig: "",
}
alertmanagerConfigIn, found := resource.Spec()["alertmanager_config"]
if found {
alertmanagerConfig, err := yaml.Marshal(alertmanagerConfigIn)
if err != nil {
return err
}
newConfig.AlertmanagerConfig = string(alertmanagerConfig)
}

templateFilesIn, found := resource.Spec()["template_files"]
if found {
templateFiles := make(map[string]string)
for key, in := range templateFilesIn.(map[string]interface{}) {
templateFiles[key] = in.(string)
}
newConfig.TemplateFiles = templateFiles
}
return h.clientTool.CreateAlertmangerConfig(newConfig)
}
40 changes: 37 additions & 3 deletions pkg/mimir/client/http_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

var loadRulesEndpoint = "%s/prometheus/config/v1/rules/%s"
var listRulesEndpoint = "%s/prometheus/api/v1/rules"
var alertmanagerAPIPath = "%s/api/v1/alerts"

type ListGroupResponse struct {
Status string `yaml:"status"`
Expand All @@ -41,6 +42,37 @@ func NewHTTPClient(config *config.MimirConfig) Mimir {
return &Client{config: config}
}

func (c *Client) CreateAlertmangerConfig(resource models.PrometheusAlertmanagerConfig) error {
url := fmt.Sprintf(alertmanagerAPIPath, c.config.Address)
cfg, err := yaml.Marshal(&resource)
if err != nil {
return err
}

_, err = c.doRequest(http.MethodPost, url, cfg)
if err != nil {
return err
}

return nil
}

func (c *Client) GetAlertmanagerConfig() (*models.PrometheusAlertmanagerConfig, error) {
url := fmt.Sprintf(alertmanagerAPIPath, c.config.Address)
res, err := c.doRequest(http.MethodGet, url, nil)
if err != nil {
return nil, err
}

config := models.PrometheusAlertmanagerConfig{}
err = yaml.Unmarshal(res, &config)
if err != nil {
return nil, errors.New("unable to unmarshal response")
}

return &config, nil
}

func (c *Client) ListRules() (map[string][]models.PrometheusRuleGroup, error) {
url := fmt.Sprintf(listRulesEndpoint, c.config.Address)
res, err := c.doRequest(http.MethodGet, url, nil)
Expand Down Expand Up @@ -103,11 +135,13 @@ func (c *Client) doRequest(method string, url string, body []byte) ([]byte, erro

res, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("request to load rules failed: %s", err)
return nil, fmt.Errorf("request failed: %s", err)
}

if res.StatusCode >= 300 {
return nil, fmt.Errorf("error loading rules: %d", res.StatusCode)
if res.StatusCode >= 300 && res.StatusCode != http.StatusNotFound {
return nil, fmt.Errorf("error: %d", res.StatusCode)
} else if res.StatusCode == http.StatusNotFound {
return []byte(nil), nil
}

b, err := io.ReadAll(res.Body)
Expand Down
2 changes: 2 additions & 0 deletions pkg/mimir/client/mimir.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ import (
type Mimir interface {
ListRules() (map[string][]models.PrometheusRuleGroup, error)
CreateRules(resource models.PrometheusRuleGrouping) error
GetAlertmanagerConfig() (*models.PrometheusAlertmanagerConfig, error)
CreateAlertmangerConfig(resource models.PrometheusAlertmanagerConfig) error
}
5 changes: 5 additions & 0 deletions pkg/mimir/models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ type PrometheusRuleGrouping struct {
Namespace string `yaml:"namespace"`
Groups []PrometheusRuleGroup `yaml:"groups"`
}

type PrometheusAlertmanagerConfig struct {
TemplateFiles map[string]string `yaml:"template_files"`
AlertmanagerConfig string `yaml:"alertmanager_config"`
}
1 change: 1 addition & 0 deletions pkg/mimir/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ func (p *Provider) APIVersion() string {
func (p *Provider) GetHandlers() []grizzly.Handler {
return []grizzly.Handler{
NewRuleHandler(p, p.clientTool),
NewAlertmanagerHandler(p, p.clientTool),
}
}
10 changes: 5 additions & 5 deletions pkg/mimir/rules-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (h *RuleHandler) GetByUID(uid string) (*grizzly.Resource, error) {
return h.getRemoteRuleGroup(uid)
}

// GetRemote retrieves a datasource as a Resource
// GetRemote retrieves a rule group as a Resource
func (h *RuleHandler) GetRemote(resource grizzly.Resource) (*grizzly.Resource, error) {
uid := fmt.Sprintf("%s.%s", resource.GetMetadata("namespace"), resource.Name())
return h.getRemoteRuleGroup(uid)
Expand All @@ -70,17 +70,17 @@ func (h *RuleHandler) ListRemote() ([]string, error) {
return h.getRemoteRuleGroupList()
}

// Add pushes a datasource to Grafana via the API
// Add pushes a rule group to Mimir via the API
func (h *RuleHandler) Add(resource grizzly.Resource) error {
return h.writeRuleGroup(resource)
}

// Update pushes a datasource to Grafana via the API
// Update pushes a rule group to Mimir via the API
func (h *RuleHandler) Update(existing, resource grizzly.Resource) error {
return h.writeRuleGroup(resource)
}

// getRemoteRuleGroup retrieves a datasource object from Grafana
// getRemoteRuleGroup retrieves a rule group object from Mimir
func (h *RuleHandler) getRemoteRuleGroup(uid string) (*grizzly.Resource, error) {
parts := strings.SplitN(uid, ".", 2)
namespace := parts[0]
Expand Down Expand Up @@ -111,7 +111,7 @@ func (h *RuleHandler) getRemoteRuleGroup(uid string) (*grizzly.Resource, error)
return nil, grizzly.ErrNotFound
}

// getRemoteRuleGroupList retrieves a datasource object from Grafana
// getRemoteRuleGroupList retrieves a rule group object from Mimir
func (h *RuleHandler) getRemoteRuleGroupList() ([]string, error) {
groupings, err := h.clientTool.ListRules()
if err != nil {
Expand Down
8 changes: 4 additions & 4 deletions test-docker-compose/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: '3.4'
services:
grafana:
image: &image grafana/grafana:10.2.0
image: &grafanaImage grafana/grafana:10.2.0
restart: on-failure
ports:
- "3001:3001"
Expand All @@ -22,7 +22,7 @@ services:
# It's important to use nginx to serve the subpath, otherwise, it doesn't really test the real-world scenario
# Grafana will redirect all subpaths to the root URL if that's the correct path, while an nginx will fail if the subpath is not correct
grafana_subpath:
image: *image
image: *grafanaImage
restart: on-failure
ports:
- "3002:3002"
Expand Down Expand Up @@ -53,7 +53,7 @@ services:
- ./provisioning/mimir/htpasswd:/etc/nginx/.htpasswd:ro

grafana_basic_auth:
image: *image
image: *grafanaImage
ports:
- "3004:3004"
environment:
Expand All @@ -69,7 +69,7 @@ services:

mimir:
image: grafana/mimir:2.12.0
command: [ "-config.file=/etc/mimir.yaml" ]
command: [ "-config.file=/etc/mimir.yaml", "-target=all,alertmanager" ]
hostname: mimir
volumes:
- ./provisioning/mimir/mimir.yaml:/etc/mimir.yaml
5 changes: 5 additions & 0 deletions test-docker-compose/provisioning/mimir/mimir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@ ruler_storage:
ruler:
enable_api: true
poll_interval: 1s

alertmanager_storage:
backend: filesystem
local:
path: /tmp/mimir/alerts
Loading