-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore(aggregated_metrics): Add metrics to aggreated metrics #14986
Changes from 3 commits
edb32d8
1eb1735
46c03b4
6e820c6
5ed2f3b
4f37e52
4babda5
b7b207e
90dc9d7
bf8baeb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ import ( | |
"bufio" | ||
"bytes" | ||
"context" | ||
"errors" | ||
"fmt" | ||
"io" | ||
"net/http" | ||
|
@@ -16,6 +17,7 @@ import ( | |
"github.com/go-kit/log/level" | ||
"github.com/golang/snappy" | ||
"github.com/opentracing/opentracing-go" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/common/config" | ||
"github.com/prometheus/common/model" | ||
"github.com/prometheus/prometheus/model/labels" | ||
|
@@ -71,6 +73,8 @@ type Push struct { | |
backoff *backoff.Config | ||
|
||
entries entries | ||
|
||
metrics *Metrics | ||
} | ||
|
||
type entry struct { | ||
|
@@ -108,6 +112,7 @@ func NewPush( | |
useTLS bool, | ||
backoffCfg *backoff.Config, | ||
logger log.Logger, | ||
registrer prometheus.Registerer, | ||
) (*Push, error) { | ||
client, err := config.NewClientFromConfig(cfg, "pattern-ingester-push", config.WithHTTP2Disabled()) | ||
if err != nil { | ||
|
@@ -142,6 +147,7 @@ func NewPush( | |
entries: entries{ | ||
entries: make([]entry, 0), | ||
}, | ||
metrics: NewMetrics(registrer, "pattern_ingester"), | ||
} | ||
|
||
go p.run(pushPeriod) | ||
|
@@ -222,6 +228,10 @@ func (p *Push) buildPayload(ctx context.Context) ([]byte, error) { | |
|
||
payload = snappy.Encode(nil, payload) | ||
|
||
p.metrics.streamsPerPush.WithLabelValues(p.tenantID).Observe(float64(len(streams))) | ||
p.metrics.entriesPerPush.WithLabelValues(p.tenantID).Observe(float64(len(entries))) | ||
p.metrics.servicesTracked.WithLabelValues(p.tenantID).Set(float64(len(entriesByStream))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the difference between number of streams and the length of the entries by stream map? won't those always be the same? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed, was supposed to count services tracked. |
||
|
||
sp.LogKV( | ||
"event", "build aggregated metrics payload", | ||
"num_service", len(entriesByStream), | ||
|
@@ -287,6 +297,31 @@ func (p *Push) run(pushPeriod time.Duration) { | |
} | ||
} | ||
|
||
func (p *Push) sendPayload(ctx context.Context, payload []byte) (int, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I couldn't find any references to this new function, is it being used? |
||
|
||
status, err := p.send(ctx, payload) | ||
if err != nil { | ||
errorType := "unknown" | ||
if status == 429 { | ||
errorType = "rate_limited" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we move these to constants so their easier to find, and test against? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, moved this to constants |
||
} else if status/100 == 5 { | ||
errorType = "server_error" | ||
} else if status/100 != 2 { | ||
errorType = "client_error" | ||
} | ||
p.metrics.pushErrors.WithLabelValues(p.tenantID, errorType).Inc() | ||
} else { | ||
p.metrics.pushSuccesses.WithLabelValues(p.tenantID).Inc() | ||
} | ||
if err != nil { | ||
return 0, err | ||
} | ||
|
||
p.metrics.payloadSize.WithLabelValues(p.tenantID).Observe(float64(len(payload))) | ||
|
||
return status, err | ||
} | ||
|
||
// send makes one attempt to send the payload to Loki | ||
func (p *Push) send(ctx context.Context, payload []byte) (int, error) { | ||
var ( | ||
|
@@ -320,6 +355,9 @@ func (p *Push) send(ctx context.Context, payload []byte) (int, error) { | |
|
||
resp, err = p.httpClient.Do(req) | ||
if err != nil { | ||
if errors.Is(ctx.Err(), context.DeadlineExceeded) { | ||
p.metrics.writeTimeout.WithLabelValues(p.tenantID).Inc() | ||
} | ||
return -1, fmt.Errorf("failed to push payload: %w", err) | ||
} | ||
status := resp.StatusCode | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should the namespace be
loki
here? we already have the subsystem aspattern_ingester
, so I think this would make the metric havepattern_ingester
in it twice?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, missed this one. Fixed.