Merge branch 'main' into fix-action-names-in-main

grafana · Feb 13, 2024 · c8ad24a · c8ad24a
2 parents 663aeac + b2e4cc3
commit c8ad24a
Show file tree

Hide file tree

Showing 239 changed files with 12,042 additions and 7,618 deletions.
diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet
@@ -610,23 +610,6 @@ local build_image_tag = '0.33.0';
         'cd -',
       ]) { depends_on: ['clone'], when: onPRs },
       make('test', container=false) { depends_on: ['clone-target-branch', 'check-generated-files'] },
-      run('test-target-branch', commands=['cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test']) { depends_on: ['clone-target-branch'], when: onPRs },
-      make('compare-coverage', container=false, args=[
-        'old=../loki-target-branch/test_results.txt',
-        'new=test_results.txt',
-        'packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki',
-        '> diff.txt',
-      ]) { depends_on: ['test', 'test-target-branch'], when: onPRs },
-      run('report-coverage', commands=[
-        "total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}')",
-        'if [ $total_diff = 0 ]; then exit 0; fi',
-        "pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}')",
-        "body=$(jq -Rs '{body: . }' diff.txt)",
-        'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments -d "$body" > /dev/null',
-      ], env={
-        USER: 'grafanabot',
-        TOKEN: { from_secret: github_secret.name },
-      }) { depends_on: ['compare-coverage'], when: onPRs },
       make('lint', container=false) { depends_on: ['check-generated-files'] },
       make('check-mod', container=false) { depends_on: ['test', 'lint'] },
       {

diff --git a/.drone/drone.yml b/.drone/drone.yml
@@ -212,47 +212,6 @@ steps:
   environment: {}
   image: grafana/loki-build-image:0.33.0
   name: test
-- commands:
-  - cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test
-  depends_on:
-  - clone-target-branch
-  environment: {}
-  image: grafana/loki-build-image:0.33.0
-  name: test-target-branch
-  when:
-    event:
-    - pull_request
-- commands:
-  - make BUILD_IN_CONTAINER=false compare-coverage old=../loki-target-branch/test_results.txt
-    new=test_results.txt packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki
-    > diff.txt
-  depends_on:
-  - test
-  - test-target-branch
-  environment: {}
-  image: grafana/loki-build-image:0.33.0
-  name: compare-coverage
-  when:
-    event:
-    - pull_request
-- commands:
-  - total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}')
-  - if [ $total_diff = 0 ]; then exit 0; fi
-  - pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}')
-  - 'body=$(jq -Rs ''{body: . }'' diff.txt)'
-  - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments
-    -d "$body" > /dev/null'
-  depends_on:
-  - compare-coverage
-  environment:
-    TOKEN:
-      from_secret: github_token
-    USER: grafanabot
-  image: grafana/loki-build-image:0.33.0
-  name: report-coverage
-  when:
-    event:
-    - pull_request
 - commands:
   - make BUILD_IN_CONTAINER=false lint
   depends_on:
@@ -2113,6 +2072,6 @@ kind: secret
 name: gpg_private_key
 ---
 kind: signature
-hmac: 457592d17208477ceb480f81dbdb88f7b95a5ad015c88d9d6fed06c2422a52f9
+hmac: 51861919f0ba5370a152bdb9267828c742f2042819fb01388c6d23bf44e3cbb7
 
 ...
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -14,6 +14,11 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+      - name: golangci-lint
+        uses: golangci/golangci-lint-action@08e2f20817b15149a52b5b3ebe7de50aff2ba8c5
+        with:
+          version: v1.55.1
+          only-new-issues: true
       - run: make lint
       - run: make check-doc
       - run: make check-mod

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -53,6 +53,7 @@
 * [11679](https://github.com/grafana/loki/pull/11679) **dannykopping** Cache: extending #11535 to align custom ingester query split with cache keys for correct caching of results.
 * [11143](https://github.com/grafana/loki/pull/11143) **sandeepsukhani** otel: Add support for per tenant configuration for mapping otlp data to loki format
 * [11499](https://github.com/grafana/loki/pull/11284) **jmichalek132** Config: Adds `frontend.log-query-request-headers` to enable logging of request headers in query logs.
+* [11817](https://github.com/grafana/loki/pull/11817) **ashwanthgoli** Ruler: Add support for filtering results of `/prometheus/api/v1/rules` endpoint by rule_name, rule_group, file and type.
 
 ##### Fixes
 * [11074](https://github.com/grafana/loki/pull/11074) **hainenber** Fix panic in lambda-promtail due to mishandling of empty DROP_LABELS env var.
@@ -65,6 +66,7 @@
 * [11657](https://github.com/grafana/loki/pull/11657) **ashwanthgoli** Log results cache: compose empty response based on the request being served to avoid returning incorrect limit or direction.
 * [11587](https://github.com/grafana/loki/pull/11587) **trevorwhitney** Fix semantics of label parsing logic of metrics and logs queries. Both only parse the first label if multiple extractions into the same label are requested.
 * [11776](https://github.com/grafana/loki/pull/11776) **ashwanthgoli** Background Cache: Fixes a bug that is causing the background queue size to be incremented twice for each enqueued item.
+* [11921](https://github.com/grafana/loki/pull/11921) **paul1r**: Parsing: String array elements were not being parsed correctly in JSON processing 
 
 ##### Changes
 

diff --git a/clients/cmd/fluent-bit/Dockerfile b/clients/cmd/fluent-bit/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:1.21.3-bullseye AS builder
+FROM golang:1.22.0-bullseye AS builder
 
 COPY . /src
 

diff --git a/clients/pkg/promtail/targets/cloudflare/target.go b/clients/pkg/promtail/targets/cloudflare/target.go
@@ -8,13 +8,13 @@ import (
 	"sync"
 	"time"
 
-	"github.com/buger/jsonparser"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
 	"github.com/grafana/cloudflare-go"
 	"github.com/grafana/dskit/backoff"
 	"github.com/grafana/dskit/concurrency"
 	"github.com/grafana/dskit/multierror"
+	"github.com/grafana/jsonparser"
 	"github.com/prometheus/common/model"
 	"go.uber.org/atomic"
 

diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md
@@ -2327,27 +2327,26 @@ bloom_shipper:
     [max_tasks_enqueued_per_tenant: <int> | default = 10000]
 
   blocks_cache:
-    # Whether embedded cache is enabled.
-    # CLI flag: -blocks-cache.enabled
+    # Cache for bloom blocks. Whether embedded cache is enabled.
+    # CLI flag: -bloom.blocks-cache.enabled
     [enabled: <boolean> | default = false]
 
-    # Maximum memory size of the cache in MB.
-    # CLI flag: -blocks-cache.max-size-mb
+    # Cache for bloom blocks. Maximum memory size of the cache in MB.
+    # CLI flag: -bloom.blocks-cache.max-size-mb
     [max_size_mb: <int> | default = 100]
 
-    # Maximum number of entries in the cache.
-    # CLI flag: -blocks-cache.max-size-items
+    # Cache for bloom blocks. Maximum number of entries in the cache.
+    # CLI flag: -bloom.blocks-cache.max-size-items
     [max_size_items: <int> | default = 0]
 
-    # The time to live for items in the cache before they get purged.
-    # CLI flag: -blocks-cache.ttl
-    [ttl: <duration> | default = 0s]
+    # Cache for bloom blocks. The time to live for items in the cache before
+    # they get purged.
+    # CLI flag: -bloom.blocks-cache.ttl
+    [ttl: <duration> | default = 24h]
 
-    # During this period the process waits until the directory becomes not used
-    # and only after this it will be deleted. If the timeout is reached, the
-    # directory is force deleted.
-    # CLI flag: -blocks-cache.remove-directory-graceful-period
-    [remove_directory_graceful_period: <duration> | default = 5m]
+  # The cache block configures the cache backend.
+  # The CLI flags prefix for this block configuration is: bloom.metas-cache
+  [metas_cache: <cache_config>]
 ```
 
 ### chunk_store_config
@@ -2642,14 +2641,27 @@ ring:
 # CLI flag: -bloom-compactor.enabled
 [enabled: <boolean> | default = false]
 
-# Directory where files can be downloaded for compaction.
-# CLI flag: -bloom-compactor.working-directory
-[working_directory: <string> | default = ""]
-
 # Interval at which to re-run the compaction operation.
 # CLI flag: -bloom-compactor.compaction-interval
 [compaction_interval: <duration> | default = 10m]
 
+# How many index periods (days) to wait before compacting a table. This can be
+# used to lower cost by not re-writing data to object storage too frequently
+# since recent data changes more often.
+# CLI flag: -bloom-compactor.min-table-compaction-period
+[min_table_compaction_period: <int> | default = 1]
+
+# How many index periods (days) to wait before compacting a table. This can be
+# used to lower cost by not trying to compact older data which doesn't change.
+# This can be optimized by aligning it with the maximum
+# `reject_old_samples_max_age` setting of any tenant.
+# CLI flag: -bloom-compactor.max-table-compaction-period
+[max_table_compaction_period: <int> | default = 7]
+
+# Number of workers to run in parallel for compaction.
+# CLI flag: -bloom-compactor.worker-parallelism
+[worker_parallelism: <int> | default = 1]
+
 # Minimum backoff time between retries.
 # CLI flag: -bloom-compactor.compaction-retries-min-backoff
 [compaction_retries_min_backoff: <duration> | default = 10s]
@@ -3129,6 +3141,12 @@ shard_streams:
 # CLI flag: -bloom-gateway.cache-key-interval
 [bloom_gateway_cache_key_interval: <duration> | default = 15m]
 
+# The maximum bloom block size. A value of 0 sets an unlimited size. Default is
+# 200MB. The actual block size might exceed this limit since blooms will be
+# added to blocks until the block exceeds the maximum block size.
+# CLI flag: -bloom-compactor.max-block-size
+[bloom_compactor_max_block_size: <int> | default = 200MB]
+
 # Allow user to send structured metadata in push payload.
 # CLI flag: -validation.allow-structured-metadata
 [allow_structured_metadata: <boolean> | default = false]
@@ -3143,14 +3161,22 @@ shard_streams:
 
 # OTLP log ingestion configurations
 otlp_config:
+  # Configuration for resource attributes to store them as index labels or
+  # Structured Metadata or drop them altogether
   resource_attributes:
-    [ignore_defaults: <boolean>]
+    # Configure whether to ignore the default list of resource attributes to be
+    # stored as index labels and only use the given resource attributes config
+    [ignore_defaults: <boolean> | default = false]
 
-    [attributes: <list of AttributesConfigs>]
+    [attributes_config: <list of attributes_configs>]
 
-  [scope_attributes: <list of AttributesConfigs>]
+  # Configuration for scope attributes to store them as Structured Metadata or
+  # drop them altogether
+  [scope_attributes: <list of attributes_configs>]
 
-  [log_attributes: <list of AttributesConfigs>]
+  # Configuration for log attributes to store them as Structured Metadata or
+  # drop them altogether
+  [log_attributes: <list of attributes_configs>]
 ```
 
 ### frontend_worker
@@ -4346,6 +4372,7 @@ The TLS configuration.
 The cache block configures the cache backend. The supported CLI flags `<prefix>` used to reference this configuration block are:
 
 - `bloom-gateway-client.cache`
+- `bloom.metas-cache`
 - `frontend`
 - `frontend.index-stats-results-cache`
 - `frontend.label-results-cache`
@@ -4577,7 +4604,7 @@ chunks:
   [tags: <map of string to string>]
 
 # How many shards will be created. Only used if schema is v10 or greater.
-[row_shards: <int>]
+[row_shards: <int> | default = 16]
 ```
 
 ### aws_storage_config
@@ -5292,6 +5319,24 @@ Named store from this example can be used by setting object_store to store-1 in
 [cos: <map of string to cos_storage_config>]
 ```
 
+### attributes_config
+
+Define actions for matching OpenTelemetry (OTEL) attributes.
+
+```yaml
+# Configures action to take on matching attributes. It allows one of
+# [structured_metadata, drop] for all attribute types. It additionally allows
+# index_label action for resource attributes
+[action: <string> | default = ""]
+
+# List of attributes to configure how to store them or drop them altogether
+[attributes: <list of strings>]
+
+# Regex to choose attributes to configure how to store them or drop them
+# altogether
+[regex: <Regexp>]
+```
+
 ## Runtime Configuration file
 
 Loki has a concept of "runtime config" file, which is simply a file that is reloaded while Loki is running. It is used by some Loki components to allow operator to change some aspects of Loki configuration without restarting it. File is specified by using `-runtime-config.file=<filename>` flag and reload period (which defaults to 10 seconds) can be changed by `-runtime-config.reload-period=<duration>` flag. Previously this mechanism was only used by limits overrides, and flags were called `-limits.per-user-override-config=<filename>` and `-limits.per-user-override-period=10s` respectively. These are still used, if `-runtime-config.file=<filename>` is not specified.
@@ -5345,7 +5390,8 @@ place in the `limits_config` section:
 configure a runtime configuration file:
 
     ```
-    runtime_config: overrides.yaml
+    runtime_config:
+      file: overrides.yaml
     ```
 
     In the `overrides.yaml` file, add `unordered_writes` for each tenant

diff --git a/docs/sources/configure/index.template b/docs/sources/configure/index.template
@@ -152,7 +152,8 @@ place in the `limits_config` section:
 configure a runtime configuration file:
 
     ```
-    runtime_config: overrides.yaml
+    runtime_config:
+      file: overrides.yaml
     ```
 
     In the `overrides.yaml` file, add `unordered_writes` for each tenant

diff --git a/docs/sources/get-started/labels/structured-metadata.md b/docs/sources/get-started/labels/structured-metadata.md
@@ -5,25 +5,19 @@ description: Describes how to enable structure metadata for logs and how to quer
 ---
 # What is structured metadata
 
-{{% admonition type="warning" %}}
-Structured metadata is an experimental feature and is subject to change in future releases of Grafana Loki.
-{{% /admonition %}}
-
 {{% admonition type="warning" %}}
 Structured metadata was added to chunk format V4 which is used if the schema version is greater or equal to `13`. (See [Schema Config]({{< relref "../../storage#schema-config" >}}) for more details about schema versions. )
 {{% /admonition %}}
 
-One of the powerful features of Loki is parsing logs at query time to extract metadata and build labels out of it.
-However, the parsing of logs at query time comes with a cost which can be significantly high for, as an example,
-large JSON blobs or a poorly written query using complex regex patterns.
+Selecting proper, low cardinality labels is critical to operating and querying Loki effectively. Some metadata, especially infrastructure related metadata, can be difficult to embed in log lines, and is too high cardinality to effectively store as indexed labels (and therefore reducing performance of the index).
 
-In addition, the data extracted from logs at query time is usually high cardinality, which can’t be stored
-in the index as it would increase the cardinality too much, and therefore reduce the performance of the index.
-
-Structured metadata is a way to attach metadata to logs without indexing them. Examples of useful metadata are
-trace IDs, user IDs, and any other label that is often used in queries but has high cardinality and is expensive
+Structured metadata is a way to attach metadata to logs without indexing them or including them in the log line content itself. Examples of useful metadata are
+kubernetes pod names, process ID's, or any other label that is often used in queries but has high cardinality and is expensive
 to extract at query time.
 
+Structured metadata can also be used to query commonly needed metadata from log lines without needing to apply a parser at query time. Large json blobs or a poorly written query using complex regex patterns, for example, come with a high performance cost. Examples of useful metadata include trace IDs or user IDs.
+
+
 ## Attaching structured metadata to log lines
 
 You have the option to attach structured metadata to log lines in the push payload along with each log line and the timestamp.
@@ -34,25 +28,37 @@ See the [Promtail: Structured metadata stage]({{< relref "../../send-data/promta
 
 With Loki version 1.2.0, support for structured metadata has been added to the Logstash output plugin. For more information, see [logstash]({{< relref "../../send-data/logstash/_index.md" >}}).
 
+{{% admonition type="warning" %}}
+There are defaults for how much structured metadata can be attached per log line.
+```
+# Maximum size accepted for structured metadata per log line.
+# CLI flag: -limits.max-structured-metadata-size
+[max_structured_metadata_size: <int> | default = 64KB]
+
+# Maximum number of structured metadata entries per log line.
+# CLI flag: -limits.max-structured-metadata-entries-count
+[max_structured_metadata_entries_count: <int> | default = 128]
+```
+{{% /admonition %}}
+
 ## Querying structured metadata
 
 Structured metadata is extracted automatically for each returned log line and added to the labels returned for the query.
 You can use labels of structured metadata to filter log line using a [label filter expression]({{< relref "../../query/log_queries#label-filter-expression" >}}).
 
-For example, if you have a label `trace_id` attached to some of your log lines as structured metadata, you can filter log lines using:
+For example, if you have a label `pod` attached to some of your log lines as structured metadata, you can filter log lines using:
 
 ```logql
-{job="example"} | trace_id="0242ac120002"
+{job="example"} | pod="myservice-abc1234-56789"`
 ```
 
 Of course, you can filter by multiple labels of structured metadata at the same time:
 
 ```logql
-{job="example"} | trace_id="0242ac120002" | user_id="superUser123"
+{job="example"} | pod="myservice-abc1234-56789" | trace_id="0242ac120002"
 ```
 
-Note that since structured metadata is extracted automatically to the results labels, some metric queries might return
-an error like `maximum of series (50000) reached for a single query`. You can use the [Keep]({{< relref "../../query/log_queries#keep-labels-expression" >}}) and [Drop]({{< relref "../../query/log_queries#drop-labels-expression" >}}) stages to filter out labels that you don't need.
+Note that since structured metadata is extracted automatically to the results labels, some metric queries might return an error like `maximum of series (50000) reached for a single query`. You can use the [Keep]({{< relref "../../query/log_queries#keep-labels-expression" >}}) and [Drop]({{< relref "../../query/log_queries#drop-labels-expression" >}}) stages to filter out labels that you don't need.
 For example:
 
 ```logql

diff --git a/docs/sources/operations/query-fairness/_index.md b/docs/sources/operations/query-fairness/_index.md
@@ -115,7 +115,7 @@ you would usually want to avoid this scenario and control yourself where the hea
 
 When using Grafana as the Loki user interface, you can, for example, create multiple data sources
 with the same tenant, but with a different additional HTTP header
-`X-Loki-Scope-Actor` and restrict which Grafana user can use which data source.
+`X-Loki-Actor-Path` and restrict which Grafana user can use which data source.
 
 Alternatively, if you have a proxy for authentication in front of Loki, you can
 pass the (hashed) user from the authentication as downstream header to Loki.