Skip to content

Commit

Permalink
Add remote write v2 HA tracker and relabel
Browse files Browse the repository at this point in the history
Signed-off-by: SungJin1212 <[email protected]>
  • Loading branch information
SungJin1212 committed Nov 6, 2024
1 parent ba60d7a commit bf1e65f
Show file tree
Hide file tree
Showing 5 changed files with 397 additions and 49 deletions.
4 changes: 2 additions & 2 deletions pkg/cortexpbv2/compatv2.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ import (
)

// ToWriteRequestV2 converts matched slices of Labels, Samples, and Histograms into a WriteRequest proto.
func ToWriteRequestV2(lbls []labels.Labels, samples []Sample, histograms []Histogram, metadata []Metadata, source WriteRequest_SourceEnum, additionalSymbols ...string) *WriteRequest {
func ToWriteRequestV2(lbls []labels.Labels, samples []Sample, histograms []Histogram, metadata []Metadata, source WriteRequest_SourceEnum, help ...string) *WriteRequest {
st := writev2.NewSymbolTable()
labelRefs := make([][]uint32, 0, len(lbls))
for _, lbl := range lbls {
labelRefs = append(labelRefs, st.SymbolizeLabels(lbl, nil))
}

for _, s := range additionalSymbols {
for _, s := range help {
st.Symbolize(s)
}

Expand Down
84 changes: 75 additions & 9 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
writev2 "github.com/prometheus/prometheus/prompb/io/prometheus/write/v2"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/storage"
"github.com/weaveworks/common/httpgrpc"
Expand Down Expand Up @@ -515,6 +516,17 @@ func shardByAllLabels(userID string, labels []cortexpb.LabelAdapter) uint32 {
return h
}

// Remove the label labelname from a slice of LabelPairs if it exists.
func removeLabelV2(labelName string, labels *labels.Labels) {
for i := 0; i < len(*labels); i++ {
pair := (*labels)[i]
if pair.Name == labelName {
*labels = append((*labels)[:i], (*labels)[i+1:]...)
return
}
}
}

// Remove the label labelname from a slice of LabelPairs if it exists.
func removeLabel(labelName string, labels *[]cortexpb.LabelAdapter) {
for i := 0; i < len(*labels); i++ {
Expand Down Expand Up @@ -619,11 +631,9 @@ func (d *Distributor) validateSeries(ts cortexpb.PreallocTimeseries, userID stri
nil
}

func (d *Distributor) prepareSeriesKeysV2(ctx context.Context, req *cortexpbv2.WriteRequest, userID string, limits *validation.Limits) ([]uint32, []cortexpbv2.TimeSeries, int64, int64, int64, int64, error, error) {
func (d *Distributor) prepareSeriesKeysV2(ctx context.Context, req *cortexpbv2.WriteRequest, userID string, limits *validation.Limits, b labels.ScratchBuilder, removeReplica bool) ([]uint32, []cortexpbv2.TimeSeries, int64, int64, int64, int64, error, error) {
pSpan, _ := opentracing.StartSpanFromContext(ctx, "prepareSeriesKeysV2")
defer pSpan.Finish()

b := labels.NewScratchBuilder(0)
// For each timeseries or samples, we compute a hash to distribute across ingesters;
// check each sample/metadata and discard if outside limits.
validatedTimeseries := make([]cortexpbv2.TimeSeries, 0, len(req.Timeseries))
Expand All @@ -643,6 +653,7 @@ func (d *Distributor) prepareSeriesKeysV2(ctx context.Context, req *cortexpbv2.W
}
}()

st := writev2.NewSymbolTable()
// For each timeseries, compute a hash to distribute across ingesters;
// check each sample and discard if outside limits.
skipLabelNameValidation := d.cfg.SkipLabelNameValidation || req.GetSkipLabelNameValidation()
Expand All @@ -656,12 +667,37 @@ func (d *Distributor) prepareSeriesKeysV2(ctx context.Context, req *cortexpbv2.W
}

lbs := ts.ToLabels(&b, req.Symbols)
las := cortexpb.FromLabelsToLabelAdapters(lbs)

// TODO(Sungjin1212): Implement relabel
// TODO(Sunghin1212): Implement ha tracker
if mrc := limits.MetricRelabelConfigs; len(mrc) > 0 {
l, _ := relabel.Process(lbs, mrc...)
if len(l) == 0 {
// all labels are gone, samples will be discarded
d.validateMetrics.DiscardedSamples.WithLabelValues(
validation.DroppedByRelabelConfiguration,
userID,
).Add(float64(len(ts.Samples) + len(ts.Histograms)))

// all labels are gone, exemplars will be discarded
d.validateMetrics.DiscardedExemplars.WithLabelValues(
validation.DroppedByRelabelConfiguration,
userID,
).Add(float64(len(ts.Exemplars)))
continue
}
lbs = l
}

// If we found both the cluster and replica labels, we only want to include the cluster label when
// storing series in Cortex. If we kept the replica label we would end up with another series for the same
// series we're trying to dedupe when HA tracking moves over to a different replica.
if removeReplica {
removeLabelV2(limits.HAReplicaLabel, &lbs)
}

if len(las) == 0 {
for _, labelName := range limits.DropLabels {
removeLabelV2(labelName, &lbs)
}
if len(lbs) == 0 {
d.validateMetrics.DiscardedSamples.WithLabelValues(
validation.DroppedByUserConfigurationOverride,
userID,
Expand All @@ -674,6 +710,10 @@ func (d *Distributor) prepareSeriesKeysV2(ctx context.Context, req *cortexpbv2.W
continue
}

// update label refs
ts.LabelsRefs = st.SymbolizeLabels(lbs, nil)
las := cortexpb.FromLabelsToLabelAdapters(lbs)

// We rely on sorted labels in different places:
// 1) When computing token for labels, and sharding by all labels. Here different order of labels returns
// different tokens, which is bad.
Expand Down Expand Up @@ -714,6 +754,7 @@ func (d *Distributor) prepareSeriesKeysV2(ctx context.Context, req *cortexpbv2.W
validatedHistogramSamples += len(ts.Histograms)
validatedExemplars += len(ts.Exemplars)
}

return seriesKeys, validatedTimeseries, int64(validatedMetadata), int64(validatedFloatSamples), int64(validatedHistogramSamples), int64(validatedExemplars), firstPartialErr, nil
}

Expand Down Expand Up @@ -917,12 +958,37 @@ func (d *Distributor) PushV2(ctx context.Context, req *cortexpbv2.WriteRequest)
}
}

b := labels.NewScratchBuilder(0)
removeReplica := false
// Cache user limit with overrides so we spend less CPU doing locking. See issue #4904
limits := d.limits.GetOverridesForUser(userID)

// TODO(Sungjin1212): Add ha tracker
if limits.AcceptHASamples && len(req.Timeseries) > 0 {
cluster, replica := findHALabels(limits.HAReplicaLabel, limits.HAClusterLabel, cortexpb.FromLabelsToLabelAdapters(req.Timeseries[0].ToLabels(&b, req.Symbols)))
removeReplica, err = d.checkSample(ctx, userID, cluster, replica, limits)
if err != nil {
// TODO(Sungjin1212): reuse timeseries slice

if errors.Is(err, ha.ReplicasNotMatchError{}) {
// These samples have been deduped.
d.dedupedSamples.WithLabelValues(userID, cluster).Add(float64(numFloatSamples + numHistogramSamples))
return nil, httpgrpc.Errorf(http.StatusAccepted, err.Error())
}

if errors.Is(err, ha.TooManyReplicaGroupsError{}) {
d.validateMetrics.DiscardedSamples.WithLabelValues(validation.TooManyHAClusters, userID).Add(float64(numFloatSamples + numHistogramSamples))
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
}

return nil, err
}
// If there wasn't an error but removeReplica is false that means we didn't find both HA labels.
if !removeReplica { // False, Nil
d.nonHASamples.WithLabelValues(userID).Add(float64(numFloatSamples + numHistogramSamples))
}
}

seriesKeys, validatedTimeseries, validatedMetadatas, validatedFloatSamples, validatedHistogramSamples, validatedExemplars, firstPartialErr, err := d.prepareSeriesKeysV2(ctx, req, userID, limits)
seriesKeys, validatedTimeseries, validatedMetadatas, validatedFloatSamples, validatedHistogramSamples, validatedExemplars, firstPartialErr, err := d.prepareSeriesKeysV2(ctx, req, userID, limits, b, removeReplica)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit bf1e65f

Please sign in to comment.