From d5012d989d01a017fd5983c5d4da5fc92f5f72a1 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Fri, 18 Oct 2024 11:00:59 +0200 Subject: [PATCH 01/15] ref(metric-meta): Removes all metric meta code from Relay --- CHANGELOG.md | 12 +- relay-cogs/src/lib.rs | 4 - relay-config/src/config.rs | 38 +--- relay-config/src/redis.rs | 63 ------- relay-metrics/src/lib.rs | 4 - relay-metrics/src/meta/aggregator.rs | 152 ---------------- relay-metrics/src/meta/mod.rs | 11 -- relay-metrics/src/meta/protocol.rs | 165 ----------------- relay-metrics/src/meta/redis.rs | 170 ------------------ relay-metrics/src/statsd.rs | 13 -- relay-redis/src/noop.rs | 2 - relay-redis/src/real.rs | 2 - relay-server/src/endpoints/common.rs | 12 +- relay-server/src/envelope.rs | 13 +- relay-server/src/service.rs | 7 +- relay-server/src/services/processor.rs | 140 +-------------- relay-server/src/services/processor/event.rs | 1 - relay-server/src/services/projects/cache.rs | 40 +---- .../src/services/projects/project/mod.rs | 114 +----------- relay-server/src/services/stats.rs | 2 - relay-server/src/statsd.rs | 9 - relay-server/src/utils/rate_limits.rs | 2 - relay-server/src/utils/sizes.rs | 2 - tests/integration/test_metric_meta.py | 99 ---------- tests/integration/test_metrics.py | 50 +----- tests/integration/test_redis.py | 64 ------- 26 files changed, 33 insertions(+), 1158 deletions(-) delete mode 100644 relay-metrics/src/meta/aggregator.rs delete mode 100644 relay-metrics/src/meta/mod.rs delete mode 100644 relay-metrics/src/meta/protocol.rs delete mode 100644 relay-metrics/src/meta/redis.rs delete mode 100644 tests/integration/test_metric_meta.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d600429163..c75570ec2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,14 @@ # Changelog +## Unreleased + +**Breaking Changes**: + +- Removes support for metric meta envelope items. ([#4152](https://github.com/getsentry/relay/pull/4152)) + ## 24.10.0 -**Breaking Changes:** +**Breaking Changes**: - Only allow processing enabled in managed mode. ([#4087](https://github.com/getsentry/relay/pull/4087)) @@ -12,7 +18,7 @@ - Use the duration reported by the profiler instead of the transaction. ([#4058](https://github.com/getsentry/relay/pull/4058)) - Incorrect pattern matches involving adjacent any and wildcard matchers. ([#4072](https://github.com/getsentry/relay/pull/4072)) -**Features:** +**Features**: - Add a config option to add default tags to all Relay Sentry events. ([#3944](https://github.com/getsentry/relay/pull/3944)) - Automatically derive `client.address` and `user.geo` for standalone spans. ([#4047](https://github.com/getsentry/relay/pull/4047)) @@ -23,7 +29,7 @@ - Add support for creating User from LoginId in Unreal Crash Context. ([#4093](https://github.com/getsentry/relay/pull/4093)) - Add multi-write Redis client. ([#4064](https://github.com/getsentry/relay/pull/4064)) -**Internal:** +**Internal**: - Remove unused `cogs.enabled` configuration option. ([#4060](https://github.com/getsentry/relay/pull/4060)) - Add the dynamic sampling rate to standalone spans as a measurement so that it can be stored, queried, and used for extrapolation. ([#4063](https://github.com/getsentry/relay/pull/4063)) diff --git a/relay-cogs/src/lib.rs b/relay-cogs/src/lib.rs index 7c5a08fbfe..68401d11eb 100644 --- a/relay-cogs/src/lib.rs +++ b/relay-cogs/src/lib.rs @@ -132,9 +132,6 @@ pub enum AppFeature { /// This app feature is for continuous profiling. Profiles, - /// Metric metadata. - MetricMeta, - /// Metrics in the transactions namespace. MetricsTransactions, /// Metrics in the spans namespace. @@ -168,7 +165,6 @@ impl AppFeature { Self::ClientReports => "client_reports", Self::CheckIns => "check_ins", Self::Replays => "replays", - Self::MetricMeta => "metric_meta", Self::MetricsTransactions => "metrics_transactions", Self::MetricsSpans => "metrics_spans", Self::MetricsProfiles => "metrics_profiles", diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 54a6b35b60..2a98bc63a3 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -559,17 +559,9 @@ impl Default for Metrics { } /// Controls processing of Sentry metrics and metric metadata. -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Default)] #[serde(default)] struct SentryMetrics { - /// Code locations expiry in seconds. - /// - /// Defaults to 15 days. - pub meta_locations_expiry: u64, - /// Maximum amount of code locations to store per metric. - /// - /// Defaults to 5. - pub meta_locations_max: usize, /// Whether metric stats are collected and emitted. /// /// Metric stats are always collected and emitted when processing @@ -583,16 +575,6 @@ struct SentryMetrics { pub metric_stats_enabled: bool, } -impl Default for SentryMetrics { - fn default() -> Self { - Self { - meta_locations_expiry: 15 * 24 * 60 * 60, - meta_locations_max: 5, - metric_stats_enabled: false, - } - } -} - /// Controls various limits #[derive(Serialize, Deserialize, Debug)] #[serde(default)] @@ -633,8 +615,6 @@ struct Limits { max_statsd_size: ByteSize, /// The maximum payload size for metric buckets. max_metric_buckets_size: ByteSize, - /// The maximum payload size for metric metadata. - max_metric_meta_size: ByteSize, /// The maximum payload size for a compressed replay. max_replay_compressed_size: ByteSize, /// The maximum payload size for an uncompressed replay. @@ -686,7 +666,6 @@ impl Default for Limits { max_span_size: ByteSize::mebibytes(1), max_statsd_size: ByteSize::mebibytes(1), max_metric_buckets_size: ByteSize::mebibytes(1), - max_metric_meta_size: ByteSize::mebibytes(1), max_replay_compressed_size: ByteSize::mebibytes(10), max_replay_uncompressed_size: ByteSize::mebibytes(100), max_replay_message_size: ByteSize::mebibytes(15), @@ -2071,16 +2050,6 @@ impl Config { self.values.metrics.sample_rate } - /// Returns the maximum amount of code locations per metric. - pub fn metrics_meta_locations_max(&self) -> usize { - self.values.sentry_metrics.meta_locations_max - } - - /// Returns the expiry for code locations. - pub fn metrics_meta_locations_expiry(&self) -> Duration { - Duration::from_secs(self.values.sentry_metrics.meta_locations_expiry) - } - /// Returns the interval for periodic metrics emitted from Relay. /// /// `None` if periodic metrics are disabled. @@ -2296,11 +2265,6 @@ impl Config { self.values.limits.max_metric_buckets_size.as_bytes() } - /// Returns the maximum payload size of metric metadata in bytes. - pub fn max_metric_meta_size(&self) -> usize { - self.values.limits.max_metric_meta_size.as_bytes() - } - /// Whether metric stats are collected and emitted. /// /// Metric stats are always collected and emitted when processing diff --git a/relay-config/src/redis.rs b/relay-config/src/redis.rs index 90d29617c9..f1749bdf83 100644 --- a/relay-config/src/redis.rs +++ b/relay-config/src/redis.rs @@ -178,8 +178,6 @@ pub enum RedisConfigs { cardinality: Box, /// Configuration for the `quotas` pool. quotas: Box, - /// Configuration for the `misc` pool. - misc: Box, }, } @@ -221,8 +219,6 @@ pub enum RedisPoolConfigs<'a> { cardinality: RedisConfigRef<'a>, /// Configuration for the `quotas` pool. quotas: RedisConfigRef<'a>, - /// Configuration for the `misc` pool. - misc: RedisConfigRef<'a>, }, } @@ -292,18 +288,15 @@ pub(super) fn create_redis_pools(configs: &RedisConfigs, cpu_concurrency: u32) - project_configs, cardinality, quotas, - misc, } => { let project_configs = create_redis_pool(project_configs, project_configs_default_connections); let cardinality = create_redis_pool(cardinality, cpu_concurrency); let quotas = create_redis_pool(quotas, cpu_concurrency); - let misc = create_redis_pool(misc, cpu_concurrency); RedisPoolConfigs::Individual { project_configs, cardinality, quotas, - misc, } } } @@ -378,16 +371,6 @@ quotas: - "redis://127.0.0.2:6379" max_connections: 17 connection_timeout: 5 -misc: - configs: - - cluster_nodes: - - "redis://127.0.0.1:6379" - - "redis://127.0.0.2:6379" - max_connections: 42 - connection_timeout: 5 - - server: "redis://127.0.0.1:6379" - max_connections: 84 - connection_timeout: 10 "#; let configs: RedisConfigs = serde_yaml::from_str(yaml) @@ -417,29 +400,6 @@ misc: ..Default::default() }, }), - misc: Box::new(RedisConfig::MultiWrite { - configs: vec![ - RedisConfig::Cluster { - cluster_nodes: vec![ - "redis://127.0.0.1:6379".to_owned(), - "redis://127.0.0.2:6379".to_owned(), - ], - options: PartialRedisConfigOptions { - max_connections: Some(42), - connection_timeout: 5, - ..Default::default() - }, - }, - RedisConfig::Single(SingleRedisConfig::Detailed { - server: "redis://127.0.0.1:6379".to_owned(), - options: PartialRedisConfigOptions { - max_connections: Some(84), - connection_timeout: 10, - ..Default::default() - }, - }), - ], - }), }; assert_eq!(configs, expected); @@ -733,17 +693,6 @@ read_timeout: 10 }), ], }), - misc: Box::new(RedisConfig::Cluster { - cluster_nodes: vec![ - "redis://127.0.0.1:6379".to_owned(), - "redis://127.0.0.2:6379".to_owned(), - ], - options: PartialRedisConfigOptions { - max_connections: Some(84), - connection_timeout: 10, - ..Default::default() - }, - }), }; assert_json_snapshot!(configs, @r###" @@ -789,18 +738,6 @@ read_timeout: 10 "write_timeout": 3 } ] - }, - "misc": { - "cluster_nodes": [ - "redis://127.0.0.1:6379", - "redis://127.0.0.2:6379" - ], - "max_connections": 84, - "connection_timeout": 10, - "max_lifetime": 300, - "idle_timeout": 60, - "read_timeout": 3, - "write_timeout": 3 } } "###); diff --git a/relay-metrics/src/lib.rs b/relay-metrics/src/lib.rs index 396236c2c4..4897553525 100644 --- a/relay-metrics/src/lib.rs +++ b/relay-metrics/src/lib.rs @@ -70,7 +70,6 @@ pub mod aggregator; pub mod cogs; -pub mod meta; mod bucket; mod finite; @@ -80,8 +79,5 @@ mod view; pub use bucket::*; pub use finite::*; -#[cfg(feature = "redis")] -pub use meta::RedisMetricMetaStore; -pub use meta::{MetaAggregator, MetricMeta}; pub use protocol::*; pub use view::*; diff --git a/relay-metrics/src/meta/aggregator.rs b/relay-metrics/src/meta/aggregator.rs deleted file mode 100644 index db7c051efb..0000000000 --- a/relay-metrics/src/meta/aggregator.rs +++ /dev/null @@ -1,152 +0,0 @@ -use std::{ - collections::{HashMap, HashSet}, - hash::Hash, -}; - -use relay_base_schema::project::ProjectKey; - -use super::{Item, Location, MetricMeta, StartOfDayUnixTimestamp}; -use crate::{statsd::MetricCounters, MetricResourceIdentifier}; - -/// A metrics meta aggregator. -/// -/// Aggregates metric metadata based on their scope (project, mri, timestamp) and -/// only keeps the most relevant entries. -/// -/// Currently we track the first N amount of unique metric meta elements we get. -/// -/// This should represent the actual adoption rate of different code versions. -/// -/// This aggregator is purely in memeory and will lose its state on restart, -/// which may cause multiple different items being emitted after restarts. -/// For this we have de-deuplication in the storage and the volume overall -/// of this happening is small enough to just add it to the storage worst case. -#[derive(Debug)] -pub struct MetaAggregator { - /// All tracked code locations. - locations: hashbrown::HashMap>, - - /// Maximum tracked locations. - max_locations: usize, -} - -impl MetaAggregator { - /// Creates a new metrics meta aggregator. - pub fn new(max_locations: usize) -> Self { - Self { - locations: hashbrown::HashMap::new(), - max_locations, - } - } - - /// Adds a new meta item to the aggregator. - /// - /// Returns a new [`MetricMeta`] element when the element should be stored - /// or sent upstream for storage. - /// - /// Returns `None` when the meta item was already seen or is not considered relevant. - pub fn add(&mut self, project_key: ProjectKey, meta: MetricMeta) -> Option { - let mut send_upstream = HashMap::new(); - - let mut total = 0; - for (mri, items) in meta.mapping { - let scope = Scope { - timestamp: meta.timestamp, - project_key, - mri, - }; - - total += items.len(); - if let Some(items) = self.add_scoped(&scope, items) { - send_upstream.insert(scope.mri, items); - } - } - - relay_statsd::metric!(counter(MetricCounters::MetaAggregatorItems) += total as i64); - - if send_upstream.is_empty() { - return None; - } - - relay_statsd::metric!(counter(MetricCounters::MetaAggregatorUpdate) += 1); - Some(MetricMeta { - timestamp: meta.timestamp, - mapping: send_upstream, - }) - } - - /// Retrieves all currently relevant metric meta for a project. - pub fn get_all_relevant(&self, project_key: ProjectKey) -> impl Iterator { - let locations = self - .locations - .iter() - .filter(|(scope, _)| scope.project_key == project_key); - - let mut result = HashMap::new(); - - for (scope, locations) in locations { - result - .entry(scope.timestamp) - .or_insert_with(|| MetricMeta { - timestamp: scope.timestamp, - mapping: HashMap::new(), - }) - .mapping - .entry(scope.mri.clone()) // This clone sucks - .or_insert_with(Vec::new) - .extend(locations.iter().cloned().map(Item::Location)); - } - - result.into_values() - } - - /// Remove all contained state related to a project. - pub fn clear(&mut self, project_key: ProjectKey) { - self.locations - .retain(|scope, _| scope.project_key != project_key); - } - - fn add_scoped(&mut self, scope: &Scope, items: Vec) -> Option> { - // Entry ref needs hashbrown, we would have to clone the scope without or do a separate lookup. - let locations = self.locations.entry_ref(scope).or_default(); - let mut send_upstream = Vec::new(); - - for item in items { - match item { - Item::Location(location) => { - if locations.len() > self.max_locations { - break; - } - - if !locations.contains(&location) { - locations.insert(location.clone()); - send_upstream.push(Item::Location(location)); - } - } - Item::Unknown => {} - } - } - - (!send_upstream.is_empty()).then_some(send_upstream) - } -} - -/// The metadata scope. -/// -/// We scope metadata by project, mri and day, -/// represented as a unix timestamp at the beginning of the day. -/// -/// The technical scope (e.g. redis key) also includes the organization id, but this -/// can be inferred from the project. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -struct Scope { - pub timestamp: StartOfDayUnixTimestamp, - pub project_key: ProjectKey, - pub mri: MetricResourceIdentifier<'static>, -} - -impl From<&Scope> for Scope { - fn from(value: &Scope) -> Self { - value.clone() - } -} diff --git a/relay-metrics/src/meta/mod.rs b/relay-metrics/src/meta/mod.rs deleted file mode 100644 index 5504e0dc5a..0000000000 --- a/relay-metrics/src/meta/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Functionality for aggregating and storing of metrics metadata. - -mod aggregator; -mod protocol; -#[cfg(feature = "redis")] -mod redis; - -pub use self::aggregator::*; -pub use self::protocol::*; -#[cfg(feature = "redis")] -pub use self::redis::*; diff --git a/relay-metrics/src/meta/protocol.rs b/relay-metrics/src/meta/protocol.rs deleted file mode 100644 index 7a1223cf8e..0000000000 --- a/relay-metrics/src/meta/protocol.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::collections::HashMap; - -use chrono::{DateTime, Utc}; -use relay_common::time::UnixTimestamp; -use serde::{Deserialize, Serialize}; - -use crate::MetricResourceIdentifier; - -/// A metric metadata item. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct MetricMeta { - /// Timestamp scope for the contained metadata. - /// - /// Metric metadata is collected in daily intervals, so this may be truncated - /// to the start of the day (UTC) already. - pub timestamp: StartOfDayUnixTimestamp, - - /// The contained metadata mapped by MRI. - pub mapping: HashMap, Vec>, -} - -/// A metadata item. -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(tag = "type", rename_all = "lowercase")] -pub enum Item { - /// A location metadata pointing to the code location where the metric originates from. - Location(Location), - /// Unknown item. - #[serde(other)] - Unknown, -} - -/// A code location. -#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)] -pub struct Location { - /// The relative file path. - #[serde(skip_serializing_if = "Option::is_none")] - pub filename: Option, - /// The absolute file path. - #[serde(skip_serializing_if = "Option::is_none")] - pub abs_path: Option, - /// The containing module name or path. - #[serde(skip_serializing_if = "Option::is_none")] - pub module: Option, - /// The containing function name. - #[serde(skip_serializing_if = "Option::is_none")] - pub function: Option, - /// The line number. - #[serde(skip_serializing_if = "Option::is_none")] - pub lineno: Option, - /// Source code leading up to `lineno`. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub pre_context: Vec>, - /// Source code of the current line (`lineno`). - #[serde(skip_serializing_if = "Option::is_none")] - pub context_line: Option, - /// Source code of the lines after `lineno`. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub post_context: Vec>, -} - -/// A Unix timestamp that is truncated to the start of the day. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct StartOfDayUnixTimestamp(UnixTimestamp); - -impl StartOfDayUnixTimestamp { - /// Creates a new `StartOfDayUnixTimestamp` from a timestamp by truncating it. - /// - /// May return none when passed an invalid date, but in practice this never fails - /// since the [`UnixTimestamp`] is already sufficiently validated. - pub fn new(ts: UnixTimestamp) -> Option { - let dt: DateTime = DateTime::from_timestamp(ts.as_secs().try_into().ok()?, 0)?; - let beginning_of_day = dt.date_naive().and_hms_opt(0, 0, 0)?.and_utc(); - Some(Self(UnixTimestamp::from_datetime(beginning_of_day)?)) - } - - /// Returns the underlying unix timestamp, truncated to the start of the day. - pub fn as_timestamp(&self) -> UnixTimestamp { - self.0 - } -} - -impl std::ops::Deref for StartOfDayUnixTimestamp { - type Target = UnixTimestamp; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Serialize for StartOfDayUnixTimestamp { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.0.serialize(serializer) - } -} - -impl<'de> Deserialize<'de> for StartOfDayUnixTimestamp { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let ts = UnixTimestamp::deserialize(deserializer)?; - StartOfDayUnixTimestamp::new(ts) - .ok_or_else(|| serde::de::Error::custom("invalid timestamp")) - } -} - -#[cfg(test)] -mod tests { - use insta::assert_json_snapshot; - - use super::*; - - #[test] - fn test_deserialize_null_context() { - let json = r#"{ - "timestamp": 1715904000, - "mapping": { - "d:memory.allocations@allocations": [{ - "abs_path": "/rails/config/initializers/active_job.rb", - "function": "block in
", - "lineno": 5, - "filename": "config/initializers/active_job.rb", - "pre_context": [null, " allocations = event.allocations\n", " allocations=#{allocations}\"\n"], - "context_line": " Sentry::Metrics.distribution('memory.allocations')\n", - "post_context": ["end\n",null,null], - "type":"location" - }] - } - }"#; - - let r: MetricMeta = serde_json::from_str(json).unwrap(); - - assert_json_snapshot!(r, @r###" - { - "timestamp": 1715904000, - "mapping": { - "d:custom/memory.allocations@allocations": [ - { - "type": "location", - "filename": "config/initializers/active_job.rb", - "abs_path": "/rails/config/initializers/active_job.rb", - "function": "block in
", - "lineno": 5, - "pre_context": [ - null, - " allocations = event.allocations\n", - " allocations=#{allocations}\"\n" - ], - "context_line": " Sentry::Metrics.distribution('memory.allocations')\n", - "post_context": [ - "end\n", - null, - null - ] - } - ] - } - } - "###); - } -} diff --git a/relay-metrics/src/meta/redis.rs b/relay-metrics/src/meta/redis.rs deleted file mode 100644 index 60cda8d50f..0000000000 --- a/relay-metrics/src/meta/redis.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::time::Duration; - -use hash32::{FnvHasher, Hasher as _}; -use relay_base_schema::project::ProjectId; -use relay_common::time::UnixTimestamp; -use relay_redis::{RedisError, RedisPool}; - -use super::{Item, MetricMeta}; -use crate::{statsd::MetricCounters, MetricResourceIdentifier}; - -/// Redis metric meta -pub struct RedisMetricMetaStore { - redis: RedisPool, - expiry: Duration, -} - -impl RedisMetricMetaStore { - /// Creates a new Redis metrics meta store. - pub fn new(redis: RedisPool, expiry: Duration) -> Self { - Self { redis, expiry } - } - - /// Stores metric metadata in Redis. - pub fn store( - &self, - organization_id: u64, - project_id: ProjectId, - meta: MetricMeta, - ) -> Result<(), RedisError> { - let mut client = self.redis.client()?; - let mut connection = client.connection()?; - - let mut redis_updates = 0; - - let mut pipe = relay_redis::redis::pipe(); - for (mri, items) in meta.mapping { - let key = self.build_redis_key(organization_id, project_id, *meta.timestamp, &mri); - - // Should be fine if we don't batch here, we expect a very small amount of locations - // from the aggregator. - let location_cmd = pipe.cmd("SADD").arg(&key); - for item in items { - match item { - Item::Location(location) => { - let member = serde_json::to_string(&location).unwrap(); - location_cmd.arg(member); - } - Item::Unknown => {} - } - } - location_cmd.ignore(); - - redis_updates += 1; - relay_log::trace!("storing metric meta for project {organization_id}:{project_id}"); - - // use original timestamp to not bump expiry - let expire_at = meta.timestamp.as_secs() + self.expiry.as_secs(); - pipe.cmd("EXPIREAT").arg(key).arg(expire_at).ignore(); - } - pipe.query::<()>(&mut connection) - .map_err(RedisError::Redis)?; - - relay_statsd::metric!(counter(MetricCounters::MetaRedisUpdate) += redis_updates); - - Ok(()) - } - - fn build_redis_key( - &self, - organization_id: u64, - project_id: ProjectId, - timestamp: UnixTimestamp, - mri: &MetricResourceIdentifier<'_>, - ) -> String { - let mri_hash = mri_to_fnv1a32(mri); - - format!("mm:l:{{{organization_id}}}:{project_id}:{mri_hash}:{timestamp}") - } -} - -/// Converts an MRI to a fnv1a32 hash. -/// -/// Sentry also uses the exact same algorithm to hash MRIs. -fn mri_to_fnv1a32(mri: &MetricResourceIdentifier<'_>) -> u32 { - let mut hasher = FnvHasher::default(); - - let s = mri.to_string(); - // hash the bytes directly, `write_str` on the hasher adds a 0xff byte at the end - std::hash::Hasher::write(&mut hasher, s.as_bytes()); - hasher.finish32() -} - -#[cfg(test)] -mod tests { - use std::collections::HashMap; - - use relay_redis::RedisConfigOptions; - - use crate::meta::{Location, StartOfDayUnixTimestamp}; - - use super::*; - - fn build_store() -> RedisMetricMetaStore { - let url = std::env::var("RELAY_REDIS_URL") - .unwrap_or_else(|_| "redis://127.0.0.1:6379".to_owned()); - - let redis = RedisPool::single(&url, RedisConfigOptions::default()).unwrap(); - - RedisMetricMetaStore::new(redis, Duration::from_secs(86400)) - } - - #[test] - fn test_store() { - let store = build_store(); - - let organization_id = 1000; - let project_id = ProjectId::new(2); - let mri = MetricResourceIdentifier::parse("c:foo").unwrap(); - - let timestamp = StartOfDayUnixTimestamp::new(UnixTimestamp::now()).unwrap(); - - let location = Location { - filename: Some("foobar".to_owned()), - abs_path: None, - module: None, - function: None, - lineno: Some(42), - pre_context: Vec::new(), - context_line: None, - post_context: Vec::new(), - }; - - store - .store( - organization_id, - project_id, - MetricMeta { - timestamp, - mapping: HashMap::from([(mri.clone(), vec![Item::Location(location.clone())])]), - }, - ) - .unwrap(); - - let mut client = store.redis.client().unwrap(); - let mut connection = client.connection().unwrap(); - let key = store.build_redis_key(organization_id, project_id, *timestamp, &mri); - let locations: Vec = relay_redis::redis::cmd("SMEMBERS") - .arg(key) - .query(&mut connection) - .unwrap(); - - assert_eq!(locations, vec![serde_json::to_string(&location).unwrap()]); - } - - #[test] - fn test_mri_hash() { - fn test_mri(s: &str, expected_hash: u32) { - let mri = MetricResourceIdentifier::parse(s).unwrap(); - assert_eq!(mri_to_fnv1a32(&mri), expected_hash); - } - - // Sentry has the same tests. - test_mri("c:transactions/count_per_root_project@none", 2684394786); - test_mri("d:transactions/duration@millisecond", 1147819254); - test_mri("s:transactions/user@none", 1739810785); - test_mri("c:custom/user.click@none", 1248146441); - test_mri("d:custom/page.load@millisecond", 2103554973); - test_mri("s:custom/username@none", 670706478); - } -} diff --git a/relay-metrics/src/statsd.rs b/relay-metrics/src/statsd.rs index ae7976a1fa..f77ff107a2 100644 --- a/relay-metrics/src/statsd.rs +++ b/relay-metrics/src/statsd.rs @@ -40,15 +40,6 @@ pub enum MetricCounters { /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). /// - `namespace`: The namespace of the metric. MergeMiss, - - /// Incremented every time the meta aggregator emitted an update that needs to be stored or - /// sent upstream. - MetaAggregatorUpdate, - /// Incremnted for every metric meta item added to the metric meta aggregator. - MetaAggregatorItems, - /// Incremented every time a redis key is updated to store or update metadata. - #[cfg(feature = "redis")] - MetaRedisUpdate, } impl CounterMetric for MetricCounters { @@ -56,10 +47,6 @@ impl CounterMetric for MetricCounters { match *self { Self::MergeHit => "metrics.buckets.merge.hit", Self::MergeMiss => "metrics.buckets.merge.miss", - Self::MetaAggregatorUpdate => "metrics.meta.agg.update", - Self::MetaAggregatorItems => "metrics.meta.agg.items", - #[cfg(feature = "redis")] - Self::MetaRedisUpdate => "metrics.meta.redis.update", } } } diff --git a/relay-redis/src/noop.rs b/relay-redis/src/noop.rs index 452fed960a..944c9929c5 100644 --- a/relay-redis/src/noop.rs +++ b/relay-redis/src/noop.rs @@ -52,6 +52,4 @@ pub struct RedisPools { pub cardinality: RedisPool, /// The pool used for rate limiting/quotas. pub quotas: RedisPool, - /// The pool used for metrics metadata. - pub misc: RedisPool, } diff --git a/relay-redis/src/real.rs b/relay-redis/src/real.rs index 4f6e3cfa8c..30fa44f773 100644 --- a/relay-redis/src/real.rs +++ b/relay-redis/src/real.rs @@ -373,8 +373,6 @@ pub struct RedisPools { pub cardinality: RedisPool, /// The pool used for rate limiting/quotas. pub quotas: RedisPool, - /// The pool used for metrics metadata. - pub misc: RedisPool, } /// Stats about how the [`RedisPool`] is performing. diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 5a124b84b7..6368c4fbe7 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -12,7 +12,7 @@ use crate::envelope::{AttachmentType, Envelope, EnvelopeError, Item, ItemType, I use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{MetricData, ProcessMetricMeta, ProcessingGroup}; +use crate::services::processor::{MetricData, ProcessingGroup}; use crate::services::projects::cache::{CheckEnvelope, ProcessMetrics, ValidateEnvelope}; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -282,16 +282,6 @@ fn queue_envelope( source: envelope.meta().into(), }); } - - // Remove metric meta from the envelope and send them directly to processing. - let metric_meta = envelope.take_items_by(|item| matches!(item.ty(), ItemType::MetricMeta)); - if !metric_meta.is_empty() { - relay_log::trace!("sending metric meta into processing queue"); - state.processor().send(ProcessMetricMeta { - items: metric_meta.into_vec(), - project_key: envelope.meta().public_key(), - }) - } } // Split off the envelopes by item type. diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index afc9b3fb78..5e9673509c 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -105,8 +105,6 @@ pub enum ItemType { Statsd, /// Buckets of preaggregated metrics encoded as JSON. MetricBuckets, - /// Additional metadata for metrics - MetricMeta, /// Client internal report (eg: outcomes). ClientReport, /// Profile event payload encoded as JSON. @@ -168,7 +166,6 @@ impl ItemType { Self::Sessions => "sessions", Self::Statsd => "statsd", Self::MetricBuckets => "metric_buckets", - Self::MetricMeta => "metric_meta", Self::ClientReport => "client_report", Self::Profile => "profile", Self::ReplayEvent => "replay_event", @@ -192,10 +189,7 @@ impl ItemType { /// Returns `true` if the item is a metric type. pub fn is_metrics(&self) -> bool { - matches!( - self, - ItemType::Statsd | ItemType::MetricBuckets | ItemType::MetricMeta - ) + matches!(self, ItemType::Statsd | ItemType::MetricBuckets) } } @@ -224,7 +218,6 @@ impl std::str::FromStr for ItemType { "sessions" => Self::Sessions, "statsd" => Self::Statsd, "metric_buckets" => Self::MetricBuckets, - "metric_meta" => Self::MetricMeta, "client_report" => Self::ClientReport, "profile" => Self::Profile, "replay_event" => Self::ReplayEvent, @@ -685,7 +678,7 @@ impl Item { ItemType::UnrealReport => Some(DataCategory::Error), ItemType::Attachment => Some(DataCategory::Attachment), ItemType::Session | ItemType::Sessions => None, - ItemType::Statsd | ItemType::MetricBuckets | ItemType::MetricMeta => None, + ItemType::Statsd | ItemType::MetricBuckets => None, ItemType::FormData => None, ItemType::UserReport => None, ItemType::UserReportV2 => Some(DataCategory::UserReportV2), @@ -915,7 +908,6 @@ impl Item { | ItemType::Sessions | ItemType::Statsd | ItemType::MetricBuckets - | ItemType::MetricMeta | ItemType::ClientReport | ItemType::ReplayEvent | ItemType::ReplayRecording @@ -952,7 +944,6 @@ impl Item { ItemType::Sessions => false, ItemType::Statsd => false, ItemType::MetricBuckets => false, - ItemType::MetricMeta => false, ItemType::ClientReport => false, ItemType::ReplayRecording => false, ItemType::ReplayVideo => false, diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index d7e954fe86..f4c8051284 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -431,25 +431,21 @@ pub fn create_redis_pools(configs: RedisPoolConfigs) -> Result { let project_configs = create_redis_pool(project_configs)?; let cardinality = create_redis_pool(cardinality)?; let quotas = create_redis_pool(quotas)?; - let misc = create_redis_pool(misc)?; Ok(RedisPools { project_configs, cardinality, quotas, - misc, }) } } @@ -460,11 +456,10 @@ fn initialize_redis_scripts_for_pools(redis_pools: &RedisPools) -> Result<(), Re let project_configs = redis_pools.project_configs.client()?; let cardinality = redis_pools.cardinality.client()?; let quotas = redis_pools.quotas.client()?; - let misc = redis_pools.misc.client()?; let scripts = RedisScripts::all(); - let pools = [project_configs, cardinality, quotas, misc]; + let pools = [project_configs, cardinality, quotas]; for pool in pools { initialize_redis_scripts(pool, &scripts)?; } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 494630a01b..f1b2d3e93f 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -30,7 +30,7 @@ use relay_event_schema::protocol::{ ClientReport, Event, EventId, EventType, IpAddr, Metrics, NetworkReportError, }; use relay_filter::FilterStatKey; -use relay_metrics::{Bucket, BucketMetadata, BucketView, BucketsView, MetricMeta, MetricNamespace}; +use relay_metrics::{Bucket, BucketMetadata, BucketView, BucketsView, MetricNamespace}; use relay_pii::PiiConfigError; use relay_profiling::ProfileId; use relay_protocol::Annotated; @@ -52,7 +52,6 @@ use { RedisSetLimiterOptions, }, relay_dynamic_config::{CardinalityLimiterMode, GlobalConfig, MetricExtractionGroups}, - relay_metrics::RedisMetricMetaStore, relay_quotas::{Quota, RateLimitingError, RedisRateLimiter}, relay_redis::{RedisPool, RedisPools}, std::iter::Chain, @@ -73,7 +72,7 @@ use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; use crate::services::projects::cache::{ - AddMetricMeta, BucketSource, ProcessMetrics, ProjectCache, UpdateRateLimits, + BucketSource, ProcessMetrics, ProjectCache, UpdateRateLimits, }; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; @@ -992,15 +991,6 @@ pub struct ProcessBatchedMetrics { pub sent_at: Option>, } -/// Parses a list of metric meta items and pushes them to the project cache for aggregation. -#[derive(Debug)] -pub struct ProcessMetricMeta { - /// A list of metric meta items. - pub items: Vec, - /// The target project. - pub project_key: ProjectKey, -} - /// Metric buckets with additional project. #[derive(Debug, Clone)] pub struct ProjectMetrics { @@ -1019,18 +1009,6 @@ pub struct EncodeMetrics { pub scopes: BTreeMap, } -/// Encodes metric meta into an [`Envelope`] and sends it upstream. -/// -/// At the moment, upstream means directly into Redis for processing relays -/// and otherwise submitting the Envelope via HTTP to the [`UpstreamRelay`]. -#[derive(Debug)] -pub struct EncodeMetricMeta { - /// Scoping of the meta. - pub scoping: Scoping, - /// The metric meta. - pub meta: MetricMeta, -} - /// Sends an envelope to the upstream or Kafka. #[derive(Debug)] pub struct SubmitEnvelope { @@ -1052,9 +1030,7 @@ pub enum EnvelopeProcessor { ProcessEnvelope(Box), ProcessProjectMetrics(Box), ProcessBatchedMetrics(Box), - ProcessMetricMeta(Box), EncodeMetrics(Box), - EncodeMetricMeta(Box), SubmitEnvelope(Box), SubmitClientReports(Box), } @@ -1066,9 +1042,7 @@ impl EnvelopeProcessor { EnvelopeProcessor::ProcessEnvelope(_) => "ProcessEnvelope", EnvelopeProcessor::ProcessProjectMetrics(_) => "ProcessProjectMetrics", EnvelopeProcessor::ProcessBatchedMetrics(_) => "ProcessBatchedMetrics", - EnvelopeProcessor::ProcessMetricMeta(_) => "ProcessMetricMeta", EnvelopeProcessor::EncodeMetrics(_) => "EncodeMetrics", - EnvelopeProcessor::EncodeMetricMeta(_) => "EncodeMetricMeta", EnvelopeProcessor::SubmitEnvelope(_) => "SubmitEnvelope", EnvelopeProcessor::SubmitClientReports(_) => "SubmitClientReports", } @@ -1101,14 +1075,6 @@ impl FromMessage for EnvelopeProcessor { } } -impl FromMessage for EnvelopeProcessor { - type Response = NoResponse; - - fn from_message(message: ProcessMetricMeta, _: ()) -> Self { - Self::ProcessMetricMeta(Box::new(message)) - } -} - impl FromMessage for EnvelopeProcessor { type Response = NoResponse; @@ -1117,14 +1083,6 @@ impl FromMessage for EnvelopeProcessor { } } -impl FromMessage for EnvelopeProcessor { - type Response = NoResponse; - - fn from_message(message: EncodeMetricMeta, _: ()) -> Self { - Self::EncodeMetricMeta(Box::new(message)) - } -} - impl FromMessage for EnvelopeProcessor { type Response = NoResponse; @@ -1186,8 +1144,6 @@ struct InnerProcessor { rate_limiter: Option, geoip_lookup: Option, #[cfg(feature = "processing")] - metric_meta_store: Option, - #[cfg(feature = "processing")] cardinality_limiter: Option, metric_outcomes: MetricOutcomes, } @@ -1214,14 +1170,13 @@ impl EnvelopeProcessorService { }); #[cfg(feature = "processing")] - let (cardinality, quotas, misc) = match redis { + let (cardinality, quotas) = match redis { Some(RedisPools { cardinality, quotas, - misc, .. - }) => (Some(cardinality), Some(quotas), Some(misc)), - None => (None, None, None), + }) => (Some(cardinality), Some(quotas)), + None => (None, None), }; let inner = InnerProcessor { @@ -1236,10 +1191,6 @@ impl EnvelopeProcessorService { addrs, geoip_lookup, #[cfg(feature = "processing")] - metric_meta_store: misc.map(|misc| { - RedisMetricMetaStore::new(misc, config.metrics_meta_locations_expiry()) - }), - #[cfg(feature = "processing")] cardinality_limiter: cardinality .map(|cardinality| { RedisSetLimiter::new( @@ -2214,35 +2165,6 @@ impl EnvelopeProcessorService { } } - fn handle_process_metric_meta(&self, message: ProcessMetricMeta) { - let ProcessMetricMeta { items, project_key } = message; - - for item in items { - if item.ty() != &ItemType::MetricMeta { - relay_log::error!( - "invalid item of type {} passed to ProcessMetricMeta", - item.ty() - ); - continue; - } - - let payload = item.payload(); - match serde_json::from_slice::(&payload) { - Ok(meta) => { - relay_log::trace!("adding metric metadata to project cache"); - self.inner - .addrs - .project_cache - .send(AddMetricMeta { project_key, meta }); - } - Err(error) => { - metric!(counter(RelayCounters::MetricMetaParsingFailed) += 1); - relay_log::debug!(error = &error as &dyn Error, "failed to parse metric meta"); - } - } - } - } - fn handle_submit_envelope(&self, message: SubmitEnvelope) { let SubmitEnvelope { mut envelope } = message; @@ -2805,54 +2727,6 @@ impl EnvelopeProcessorService { } } - fn handle_encode_metric_meta(&self, message: EncodeMetricMeta) { - #[cfg(feature = "processing")] - if self.inner.config.processing_enabled() { - return self.store_metric_meta(message); - } - - self.encode_metric_meta(message); - } - - fn encode_metric_meta(&self, message: EncodeMetricMeta) { - let EncodeMetricMeta { scoping, meta } = message; - - let upstream = self.inner.config.upstream_descriptor(); - let dsn = PartialDsn::outbound(&scoping, upstream); - - let mut item = Item::new(ItemType::MetricMeta); - item.set_payload(ContentType::Json, serde_json::to_vec(&meta).unwrap()); - let mut envelope = Envelope::from_request(None, RequestMeta::outbound(dsn)); - envelope.add_item(item); - - let envelope = ManagedEnvelope::new( - envelope, - self.inner.addrs.outcome_aggregator.clone(), - self.inner.addrs.test_store.clone(), - ProcessingGroup::Metrics, - ); - self.handle_submit_envelope(SubmitEnvelope { - envelope: envelope.into_processed(), - }); - } - - #[cfg(feature = "processing")] - fn store_metric_meta(&self, message: EncodeMetricMeta) { - let EncodeMetricMeta { scoping, meta } = message; - - let Some(ref metric_meta_store) = self.inner.metric_meta_store else { - return; - }; - - let r = metric_meta_store.store(scoping.organization_id, scoping.project_id, meta); - if let Err(error) = r { - relay_log::error!( - error = &error as &dyn std::error::Error, - "failed to store metric meta in redis" - ) - } - } - #[cfg(all(test, feature = "processing"))] fn redis_rate_limiter_enabled(&self) -> bool { self.inner.rate_limiter.is_some() @@ -2873,9 +2747,7 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessBatchedMetrics(m) => { self.handle_process_batched_metrics(&mut cogs, *m) } - EnvelopeProcessor::ProcessMetricMeta(m) => self.handle_process_metric_meta(*m), EnvelopeProcessor::EncodeMetrics(m) => self.handle_encode_metrics(*m), - EnvelopeProcessor::EncodeMetricMeta(m) => self.handle_encode_metric_meta(*m), EnvelopeProcessor::SubmitEnvelope(m) => self.handle_submit_envelope(*m), EnvelopeProcessor::SubmitClientReports(m) => self.handle_submit_client_reports(*m), } @@ -2887,7 +2759,6 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessEnvelope(v) => AppFeature::from(v.envelope.group()).into(), EnvelopeProcessor::ProcessProjectMetrics(_) => AppFeature::Unattributed.into(), EnvelopeProcessor::ProcessBatchedMetrics(_) => AppFeature::Unattributed.into(), - EnvelopeProcessor::ProcessMetricMeta(_) => AppFeature::MetricMeta.into(), EnvelopeProcessor::EncodeMetrics(v) => v .scopes .values() @@ -2901,7 +2772,6 @@ impl EnvelopeProcessorService { } }) .fold(FeatureWeights::none(), FeatureWeights::merge), - EnvelopeProcessor::EncodeMetricMeta(_) => AppFeature::MetricMeta.into(), EnvelopeProcessor::SubmitEnvelope(v) => AppFeature::from(v.envelope.group()).into(), EnvelopeProcessor::SubmitClientReports(_) => AppFeature::ClientReports.into(), } diff --git a/relay-server/src/services/processor/event.rs b/relay-server/src/services/processor/event.rs index 1759d0d078..8f9b22ae14 100644 --- a/relay-server/src/services/processor/event.rs +++ b/relay-server/src/services/processor/event.rs @@ -435,7 +435,6 @@ fn is_duplicate(item: &Item, processing_enabled: bool) -> bool { ItemType::Sessions => false, ItemType::Statsd => false, ItemType::MetricBuckets => false, - ItemType::MetricMeta => false, ItemType::ClientReport => false, ItemType::Profile => false, ItemType::ReplayEvent => false, diff --git a/relay-server/src/services/projects/cache.rs b/relay-server/src/services/projects/cache.rs index dc29f20bc2..658900e873 100644 --- a/relay-server/src/services/projects/cache.rs +++ b/relay-server/src/services/projects/cache.rs @@ -14,7 +14,7 @@ use chrono::{DateTime, Utc}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use relay_metrics::{Bucket, MetricMeta}; +use relay_metrics::Bucket; use relay_quotas::RateLimits; use relay_redis::RedisPool; use relay_statsd::metric; @@ -214,15 +214,6 @@ pub struct ProcessMetrics { pub sent_at: Option>, } -/// Add metric metadata to the aggregator. -#[derive(Debug)] -pub struct AddMetricMeta { - /// The project key. - pub project_key: ProjectKey, - /// The metadata. - pub meta: MetricMeta, -} - /// Updates the buffer index for [`ProjectKey`] with the [`QueueKey`] keys. /// /// This message is sent from the project buffer in case of the error while fetching the data from @@ -281,7 +272,6 @@ pub enum ProjectCache { ValidateEnvelope(ValidateEnvelope), UpdateRateLimits(UpdateRateLimits), ProcessMetrics(ProcessMetrics), - AddMetricMeta(AddMetricMeta), FlushBuckets(FlushBuckets), UpdateSpoolIndex(UpdateSpoolIndex), RefreshIndexCache(RefreshIndexCache), @@ -298,7 +288,6 @@ impl ProjectCache { Self::ValidateEnvelope(_) => "ValidateEnvelope", Self::UpdateRateLimits(_) => "UpdateRateLimits", Self::ProcessMetrics(_) => "ProcessMetrics", - Self::AddMetricMeta(_) => "AddMetricMeta", Self::FlushBuckets(_) => "FlushBuckets", Self::UpdateSpoolIndex(_) => "UpdateSpoolIndex", Self::RefreshIndexCache(_) => "RefreshIndexCache", @@ -384,14 +373,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: AddMetricMeta, _: ()) -> Self { - Self::AddMetricMeta(message) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -580,14 +561,11 @@ impl ProjectCacheBroker { } = message; let project_cache = self.services.project_cache.clone(); - let envelope_processor = self.services.envelope_processor.clone(); - let old_state = self.get_or_create_project(project_key).update_state( - &project_cache, - state, - &envelope_processor, - no_cache, - ); + let old_state = + self.get_or_create_project(project_key) + .update_state(&project_cache, state, no_cache); + if let Some(old_state) = old_state { self.garbage_disposal.dispose(old_state); } @@ -847,13 +825,6 @@ impl ProjectCacheBroker { self.services.envelope_processor.send(message); } - fn handle_add_metric_meta(&mut self, message: AddMetricMeta) { - let envelope_processor = self.services.envelope_processor.clone(); - - self.get_or_create_project(message.project_key) - .add_metric_meta(message.meta, envelope_processor); - } - fn handle_flush_buckets(&mut self, message: FlushBuckets) { let aggregator = self.services.aggregator.clone(); let project_cache = self.services.project_cache.clone(); @@ -1150,7 +1121,6 @@ impl ProjectCacheBroker { } ProjectCache::UpdateRateLimits(message) => self.handle_rate_limits(message), ProjectCache::ProcessMetrics(message) => self.handle_process_metrics(message), - ProjectCache::AddMetricMeta(message) => self.handle_add_metric_meta(message), ProjectCache::FlushBuckets(message) => self.handle_flush_buckets(message), ProjectCache::UpdateSpoolIndex(message) => self.handle_buffer_index(message), ProjectCache::RefreshIndexCache(message) => { diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index a553ebd89b..cf4f387c9a 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -4,7 +4,7 @@ use std::time::Duration; use relay_base_schema::project::ProjectKey; use relay_config::Config; use relay_dynamic_config::{ErrorBoundary, Feature}; -use relay_metrics::{Bucket, MetaAggregator, MetricMeta}; +use relay_metrics::Bucket; use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits, Scoping}; use relay_sampling::evaluation::ReservoirCounters; use relay_statsd::metric; @@ -15,7 +15,7 @@ use tokio::time::Instant; use crate::envelope::ItemType; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{EncodeMetricMeta, EnvelopeProcessor, ProcessProjectMetrics}; +use crate::services::processor::ProcessProjectMetrics; use crate::services::projects::cache::{ CheckedEnvelope, ProcessMetrics, ProjectCache, RequestUpdate, }; @@ -89,8 +89,6 @@ pub struct Project { rate_limits: CachedRateLimits, last_no_cache: Instant, reservoir_counters: ReservoirCounters, - metric_meta_aggregator: MetaAggregator, - has_pending_metric_meta: bool, } impl Project { @@ -106,8 +104,6 @@ impl Project { rate_limits: CachedRateLimits::new(), last_no_cache: Instant::now(), reservoir_counters: Arc::default(), - metric_meta_aggregator: MetaAggregator::new(config.metrics_meta_locations_max()), - has_pending_metric_meta: false, config, } } @@ -194,91 +190,6 @@ impl Project { )); } - pub fn add_metric_meta( - &mut self, - meta: MetricMeta, - envelope_processor: Addr, - ) { - // Only track metadata if custom metrics are enabled, or we don't know yet whether they are - // enabled. - let is_enabled = match self.current_state() { - ProjectState::Enabled(info) => info.has_feature(Feature::CustomMetrics), - ProjectState::Disabled => false, - ProjectState::Pending => true, - }; - - if !is_enabled { - relay_log::trace!("metric meta not enabled for project {}", self.project_key); - return; - } - - let Some(meta) = self.metric_meta_aggregator.add(self.project_key, meta) else { - // Nothing to do. Which means there is also no pending data. - relay_log::trace!("metric meta aggregator already has data, nothing to send upstream"); - return; - }; - - let scoping = self.scoping(); - match scoping { - Some(scoping) => { - // We can only have a scoping if we also have a state, which means at this point feature - // flags are already checked. - envelope_processor.send(EncodeMetricMeta { scoping, meta }) - } - None => self.has_pending_metric_meta = true, - } - } - - fn flush_metric_meta(&mut self, envelope_processor: &Addr) { - if !self.has_pending_metric_meta { - return; - } - let is_enabled = match self.current_state() { - ProjectState::Enabled(project_info) => project_info.has_feature(Feature::CustomMetrics), - ProjectState::Disabled => false, - ProjectState::Pending => { - // Cannot flush, wait for project state to be loaded. - return; - } - }; - - let Some(scoping) = self.scoping() else { - return; - }; - - // All relevant info has been gathered, consider us flushed. - self.has_pending_metric_meta = false; - - if !is_enabled { - relay_log::debug!( - "clearing metric meta aggregator, because project {} does not have feature flag enabled", - self.project_key, - ); - // Project/Org does not have the feature, forget everything. - self.metric_meta_aggregator.clear(self.project_key); - return; - } - - // Flush the entire aggregator containing all code locations for the project. - // - // There is the rare case when this flushes items which have already been flushed before. - // This happens only if we temporarily lose a previously valid and loaded project state - // and then reveive an update for the project state. - // While this is a possible occurence the effect should be relatively limited, - // especially since the final store does de-deuplication. - for meta in self - .metric_meta_aggregator - .get_all_relevant(self.project_key) - { - relay_log::debug!( - "flushing aggregated metric meta for project {}", - self.project_key - ); - metric!(counter(RelayCounters::ProjectStateFlushAllMetricMeta) += 1); - envelope_processor.send(EncodeMetricMeta { scoping, meta }); - } - } - /// Returns `true` if backoff expired and new attempt can be triggered. fn can_fetch(&self) -> bool { self.next_fetch_attempt @@ -438,7 +349,6 @@ impl Project { &mut self, project_cache: &Addr, state: ProjectFetchState, - envelope_processor: &Addr, no_cache: bool, ) -> Option { // Initiate the backoff if the incoming state is invalid. Reset it otherwise. @@ -491,7 +401,7 @@ impl Project { relay_log::debug!("project state {} updated", self.project_key); channel.inner.send(self.state.current_state(&self.config)); - self.after_state_updated(envelope_processor); + self.after_state_updated(); Some(old_state) } @@ -499,8 +409,7 @@ impl Project { /// Called after all state validations and after the project state is updated. /// /// See also: [`Self::update_state`]. - fn after_state_updated(&mut self, envelope_processor: &Addr) { - self.flush_metric_meta(envelope_processor); + fn after_state_updated(&mut self) { // Check if the new sampling config got rid of any reservoir rules we have counters for. self.remove_expired_reservoir_rules(); } @@ -692,7 +601,6 @@ mod tests { #[tokio::test] async fn test_stale_cache() { let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); - let (envelope_processor, _) = mock_service("envelope_processor", (), |&mut (), _| {}); let config = Arc::new( Config::from_json_value(json!( @@ -717,12 +625,7 @@ mod tests { assert!(project.next_fetch_attempt.is_none()); // Try to update project with errored project state. - project.update_state( - &addr, - ProjectFetchState::pending(), - &envelope_processor, - false, - ); + project.update_state(&addr, ProjectFetchState::pending(), false); // Since we got invalid project state we still keep the old one meaning there // still must be the project id set. assert!(matches!(project.current_state(), ProjectState::Enabled(_))); @@ -738,12 +641,7 @@ mod tests { // * without backoff it would just panic, not able to call the ProjectCache service let channel = StateChannel::new(); project.state_channel = Some(channel); - project.update_state( - &addr, - ProjectFetchState::pending(), - &envelope_processor, - false, - ); + project.update_state(&addr, ProjectFetchState::pending(), false); project.fetch_state(addr, false); } diff --git a/relay-server/src/services/stats.rs b/relay-server/src/services/stats.rs index a7b5adc66e..8fe4747454 100644 --- a/relay-server/src/services/stats.rs +++ b/relay-server/src/services/stats.rs @@ -122,13 +122,11 @@ impl RelayStats { project_configs, cardinality, quotas, - misc, }) = self.redis_pools.as_ref() { Self::redis_pool(project_configs, "project_configs"); Self::redis_pool(cardinality, "cardinality"); Self::redis_pool(quotas, "quotas"); - Self::redis_pool(misc, "misc"); } } } diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index e7f3a498db..644beb882f 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -733,11 +733,6 @@ pub enum RelayCounters { /// Failure can happen, for example, when there's a network error. Refer to /// [`UpstreamRequestError`](crate::services::upstream::UpstreamRequestError) for all cases. ProjectUpstreamFailed, - /// Number of full metric data flushes. - /// - /// A full flush takes all contained items of the aggregator and flushes them upstream, - /// at best this happens once per freshly loaded project. - ProjectStateFlushAllMetricMeta, /// Number of Relay server starts. /// /// This can be used to track unwanted restarts due to crashes or termination. @@ -822,8 +817,6 @@ pub enum RelayCounters { EvictingStaleProjectCaches, /// Number of times that parsing a metrics bucket item from an envelope failed. MetricBucketsParsingFailed, - /// Number of times that parsing a metric meta item from an envelope failed. - MetricMetaParsingFailed, /// Count extraction of transaction names. Tag with the decision to drop / replace / use original. MetricsTransactionNameExtracted, /// Number of Events with an OpenTelemetry Context @@ -882,7 +875,6 @@ impl CounterMetric for RelayCounters { RelayCounters::ProjectStateGet => "project_state.get", RelayCounters::ProjectStateRequest => "project_state.request", RelayCounters::ProjectStateNoCache => "project_state.no_cache", - RelayCounters::ProjectStateFlushAllMetricMeta => "project_state.flush_all_metric_meta", #[cfg(feature = "processing")] RelayCounters::ProjectStateRedis => "project_state.redis.requests", RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed", @@ -899,7 +891,6 @@ impl CounterMetric for RelayCounters { RelayCounters::ResponsesStatusCodes => "responses.status_codes", RelayCounters::EvictingStaleProjectCaches => "project_cache.eviction", RelayCounters::MetricBucketsParsingFailed => "metrics.buckets.parsing_failed", - RelayCounters::MetricMetaParsingFailed => "metrics.meta.parsing_failed", RelayCounters::MetricsTransactionNameExtracted => "metrics.transaction_name", RelayCounters::OpenTelemetryEvent => "event.opentelemetry", RelayCounters::GlobalConfigFetched => "global_config.fetch", diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index 2dabb2ed3e..8db1f37a84 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -120,7 +120,6 @@ fn infer_event_category(item: &Item) -> Option { ItemType::Sessions => None, ItemType::Statsd => None, ItemType::MetricBuckets => None, - ItemType::MetricMeta => None, ItemType::FormData => None, ItemType::UserReport => None, ItemType::Profile => None, @@ -492,7 +491,6 @@ impl Enforcement { | ItemType::Sessions | ItemType::Statsd | ItemType::MetricBuckets - | ItemType::MetricMeta | ItemType::ClientReport | ItemType::UserReportV2 | ItemType::ProfileChunk diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index 725306d1d8..4bb4b9d88d 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -16,7 +16,6 @@ use crate::utils::{ItemAction, ManagedEnvelope}; /// - `max_check_in_size` /// - `max_event_size` /// - `max_metric_buckets_size` -/// - `max_metric_meta_size` /// - `max_profile_size` /// - `max_replay_compressed_size` /// - `max_session_count` @@ -62,7 +61,6 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> Resul ItemType::UserReport => NO_LIMIT, ItemType::Statsd => config.max_statsd_size(), ItemType::MetricBuckets => config.max_metric_buckets_size(), - ItemType::MetricMeta => config.max_metric_meta_size(), ItemType::Span | ItemType::OtelSpan => config.max_span_size(), ItemType::ProfileChunk => config.max_profile_size(), ItemType::Unknown(_) => NO_LIMIT, diff --git a/tests/integration/test_metric_meta.py b/tests/integration/test_metric_meta.py deleted file mode 100644 index 8dc0822008..0000000000 --- a/tests/integration/test_metric_meta.py +++ /dev/null @@ -1,99 +0,0 @@ -import json -import time - -from sentry_sdk.envelope import Envelope, Item, PayloadRef -from datetime import datetime, timezone, time as dttime - - -def assert_location(expected, actual): - # Compare without the 'type' which is not persisted into storage. - assert {k: expected[k] for k in expected.keys() - {"type"}} == actual - - -def test_metric_meta(mini_sentry, redis_client, relay_with_processing): - project_id = 42 - now = datetime.now(tz=timezone.utc) - start_of_day = datetime.combine(now, dttime.min, tzinfo=timezone.utc) - - # Key magic number is the fnv32 hash from the MRI. - redis_key = "mm:l:{1}:42:2718098263:" + str(int(start_of_day.timestamp())) - # Clear just so there is no overlap with previous tests. - redis_client.delete(redis_key) - - relay = relay_with_processing() - - project_config = mini_sentry.add_full_project_config(project_id)["config"] - project_config.setdefault("features", []).append("organizations:custom-metrics") - - location1 = { - "type": "location", - "function": "_scan_for_suspect_projects", - "module": "sentry.tasks.low_priority_symbolication", - "filename": "sentry/tasks/low_priority_symbolication.py", - "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", - "lineno": 45, - } - location2 = { - "type": "location", - "function": "_scan_for_suspect_projects_the_second", - "module": "sentry.tasks.low_priority_symbolication", - "filename": "sentry/tasks/low_priority_symbolication.py", - "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", - "lineno": 120, - } - location3 = { - "type": "location", - "function": "_scan_for_suspect_projects_the_second", - "module": "sentry.tasks.low_priority_symbolication", - "filename": "sentry/tasks/low_priority_symbolication.py", - "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", - "lineno": 123, - } - - envelope = Envelope() - payload = { - "timestamp": now.isoformat(), - "mapping": { - "d:custom/sentry.process_profile.track_outcome@second": [ - location1, - location2, - ] - }, - } - envelope.add_item(Item(PayloadRef(json=payload), type="metric_meta")) - relay.send_envelope(project_id, envelope) - - time.sleep(1) - - stored = sorted( - (json.loads(v) for v in redis_client.smembers(redis_key)), - key=lambda x: x["lineno"], - ) - assert len(stored) == 2 - assert_location(location1, stored[0]) - assert_location(location2, stored[1]) - - # Resend one location and add a new location. - envelope = Envelope() - payload = { - "timestamp": now.isoformat(), - "mapping": { - "d:custom/sentry.process_profile.track_outcome@second": [ - location1, - location3, - ] - }, - } - envelope.add_item(Item(PayloadRef(json=payload), type="metric_meta")) - relay.send_envelope(project_id, envelope) - - time.sleep(1) - - stored = sorted( - (json.loads(v) for v in redis_client.smembers(redis_key)), - key=lambda x: x["lineno"], - ) - assert len(stored) == 3 - assert_location(location1, stored[0]) - assert_location(location2, stored[1]) - assert_location(location3, stored[2]) diff --git a/tests/integration/test_metrics.py b/tests/integration/test_metrics.py index c168a1b2e2..8ef8953408 100644 --- a/tests/integration/test_metrics.py +++ b/tests/integration/test_metrics.py @@ -2,7 +2,6 @@ from collections import defaultdict from datetime import UTC, datetime, timedelta, timezone from pathlib import Path -from sentry_sdk.envelope import Envelope, Item, PayloadRef import json import signal import time @@ -134,52 +133,9 @@ def test_metrics_proxy_mode_statsd(mini_sentry, relay): relay.send_metrics(project_id, metrics_payload) envelope = mini_sentry.captured_events.get(timeout=3) assert len(envelope.items) == 1 - metric_meta_item = envelope.items[0] - assert metric_meta_item.type == "statsd" - assert metric_meta_item.get_bytes().decode() == metrics_payload - - -def test_metrics_proxy_mode_metrics_meta(mini_sentry, relay): - relay = relay( - mini_sentry, - options={ - "relay": {"mode": "proxy"}, - "aggregator": { - "bucket_interval": 1, - "initial_delay": 0, - "shift_key": "none", - }, - }, - ) - - location = { - "type": "location", - "function": "_scan_for_suspect_projects", - "module": "sentry.tasks.low_priority_symbolication", - "filename": "sentry/tasks/low_priority_symbolication.py", - "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", - "lineno": 45, - } - - meta_payload = { - "timestamp": datetime.now(tz=timezone.utc).isoformat(), - "mapping": { - "d:custom/sentry.process_profile.track_outcome@second": [ - location, - ] - }, - } - meta_payload = json.dumps(meta_payload, sort_keys=True) - - envelope = Envelope() - envelope.add_item(Item(PayloadRef(json=meta_payload), type="metric_meta")) - relay.send_envelope(42, envelope) - - envelope = mini_sentry.captured_events.get(timeout=3) - assert len(envelope.items) == 1 - metric_meta_item = envelope.items[0] - assert metric_meta_item.type == "metric_meta" - assert json.loads(metric_meta_item.get_bytes().decode()) == meta_payload + item = envelope.items[0] + assert item.type == "statsd" + assert item.get_bytes().decode() == metrics_payload def test_metrics(mini_sentry, relay): diff --git a/tests/integration/test_redis.py b/tests/integration/test_redis.py index c263d01ae6..8520e08b83 100644 --- a/tests/integration/test_redis.py +++ b/tests/integration/test_redis.py @@ -1,70 +1,6 @@ -import json -import time import uuid from sentry_sdk.envelope import Envelope, Item, PayloadRef -from datetime import datetime, timezone, time as dttime - - -def test_multi_write_redis_client_with_metric_meta( - mini_sentry, redis_client, secondary_redis_client, relay_with_processing -): - project_id = 42 - now = datetime.now(tz=timezone.utc) - start_of_day = datetime.combine(now, dttime.min, tzinfo=timezone.utc) - - # Key magic number is the fnv32 hash from the MRI. - redis_key = "mm:l:{1}:42:2718098263:" + str(int(start_of_day.timestamp())) - # Clear just so there is no overlap with previous tests. - redis_client.delete(redis_key) - - relay = relay_with_processing( - { - "processing": { - "redis": { - "configs": [ - {"server": "redis://127.0.0.1:6379"}, - # We want to test with multiple nesting levels to make sure the multi-write - # works nonetheless. - {"configs": ["redis://127.0.0.1:6380"]}, - ] - } - } - } - ) - - project_config = mini_sentry.add_full_project_config(project_id)["config"] - project_config.setdefault("features", []).append("organizations:custom-metrics") - - location = { - "type": "location", - "function": "_scan_for_suspect_projects", - "module": "sentry.tasks.low_priority_symbolication", - "filename": "sentry/tasks/low_priority_symbolication.py", - "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", - "lineno": 45, - } - - envelope = Envelope() - payload = { - "timestamp": now.isoformat(), - "mapping": { - "d:custom/sentry.process_profile.track_outcome@second": [ - location, - ] - }, - } - envelope.add_item(Item(PayloadRef(json=payload), type="metric_meta")) - relay.send_envelope(project_id, envelope) - - time.sleep(1) - - for client in [redis_client, secondary_redis_client]: - stored = sorted( - (json.loads(v) for v in client.smembers(redis_key)), - key=lambda x: x["lineno"], - ) - assert len(stored) == 1 def test_multi_write_redis_client_with_rate_limiting( From 5c09bffb70d0ac13976cbfe8036733940e5f1287 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 16 Oct 2024 15:17:19 +0200 Subject: [PATCH 02/15] after rebase for project source move --- Cargo.lock | 85 ++ Cargo.toml | 1 + relay-dynamic-config/src/error_boundary.rs | 8 + relay-quotas/Cargo.toml | 1 + relay-quotas/src/rate_limit.rs | 49 +- relay-server/Cargo.toml | 1 + relay-server/src/endpoints/common.rs | 13 +- relay-server/src/endpoints/project_configs.rs | 33 +- relay-server/src/service.rs | 22 +- relay-server/src/services/buffer/mod.rs | 15 +- relay-server/src/services/processor.rs | 107 ++- relay-server/src/services/projects/cache.rs | 612 ++------------ .../src/services/projects/cache2/handle.rs | 52 ++ .../src/services/projects/cache2/mod.rs | 8 + .../src/services/projects/cache2/project.rs | 376 +++++++++ .../src/services/projects/cache2/service.rs | 136 ++++ .../src/services/projects/cache2/state.rs | 437 ++++++++++ relay-server/src/services/projects/mod.rs | 1 + .../src/services/projects/project/mod.rs | 750 +----------------- .../projects/project/state/fetch_state.rs | 68 +- .../services/projects/project/state/info.rs | 13 +- .../services/projects/project/state/mod.rs | 5 +- 22 files changed, 1316 insertions(+), 1477 deletions(-) create mode 100644 relay-server/src/services/projects/cache2/handle.rs create mode 100644 relay-server/src/services/projects/cache2/mod.rs create mode 100644 relay-server/src/services/projects/cache2/project.rs create mode 100644 relay-server/src/services/projects/cache2/service.rs create mode 100644 relay-server/src/services/projects/cache2/state.rs diff --git a/Cargo.lock b/Cargo.lock index b5703e7f5c..79da6b4549 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,6 +217,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic-wait" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55b94919229f2c42292fd71ffa4b75e83193bffdd77b1e858cd55fd2d0b0ea8" +dependencies = [ + "libc", + "windows-sys 0.42.0", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -2680,6 +2690,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "papaya" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "674d893d27388726e4020eaa60257b3df760392003c14f46455e3277af72cd04" +dependencies = [ + "atomic-wait", + "seize", +] + [[package]] name = "parking" version = "2.2.0" @@ -3649,6 +3669,7 @@ dependencies = [ name = "relay-quotas" version = "24.10.0" dependencies = [ + "arc-swap", "hashbrown", "insta", "itertools 0.13.0", @@ -3741,6 +3762,7 @@ dependencies = [ "minidump", "multer", "once_cell", + "papaya", "pin-project-lite", "priority-queue", "rand", @@ -4154,6 +4176,12 @@ dependencies = [ "libc", ] +[[package]] +name = "seize" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d659fa6f19e82a52ab8d3fff3c380bd8cc16462eaea411395618a38760eb85bc" + [[package]] name = "semver" version = "1.0.23" @@ -5769,6 +5797,21 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -5827,6 +5870,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -5839,6 +5888,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5851,6 +5906,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5869,6 +5930,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5881,6 +5948,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5893,6 +5966,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -5905,6 +5984,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index 2865dccce0..3fd9e67567 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,6 +119,7 @@ num-traits = "0.2.18" num_cpus = "1.13.0" once_cell = "1.13.1" opentelemetry-proto = { version = "0.7.0", default-features = false } +papaya = "0.1.4" parking_lot = "0.12.1" path-slash = "0.2.1" pest = "2.1.3" diff --git a/relay-dynamic-config/src/error_boundary.rs b/relay-dynamic-config/src/error_boundary.rs index c80d2ab8b7..dc4948a3b0 100644 --- a/relay-dynamic-config/src/error_boundary.rs +++ b/relay-dynamic-config/src/error_boundary.rs @@ -42,6 +42,14 @@ impl ErrorBoundary { } } + /// Converts from `ErrorBoundary` to `ErrorBoundary<&T>`. + pub fn as_ref(&self) -> ErrorBoundary<&T> { + match self { + Self::Ok(t) => ErrorBoundary::Ok(t), + Self::Err(e) => ErrorBoundary::Err(Arc::clone(e)), + } + } + /// Returns the contained [`Ok`] value or computes it from a closure. #[inline] pub fn unwrap_or_else(self, op: F) -> T diff --git a/relay-quotas/Cargo.toml b/relay-quotas/Cargo.toml index 4ac6cdb070..f5a3d03d21 100644 --- a/relay-quotas/Cargo.toml +++ b/relay-quotas/Cargo.toml @@ -17,6 +17,7 @@ redis = ["dep:thiserror", "dep:relay-log", "relay-redis/impl"] workspace = true [dependencies] +arc-swap = { workspace = true } hashbrown = { workspace = true } relay-base-schema = { workspace = true } relay-common = { workspace = true } diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index c1162d340e..0c2a80de54 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -1,7 +1,9 @@ use std::fmt; use std::str::FromStr; +use std::sync::Arc; use std::time::{Duration, Instant}; +use arc_swap::ArcSwap; use relay_base_schema::metrics::MetricNamespace; use relay_base_schema::project::{ProjectId, ProjectKey}; use smallvec::SmallVec; @@ -402,7 +404,7 @@ impl<'a> IntoIterator for &'a RateLimits { /// /// The data structure makes sure no expired rate limits are enforced. #[derive(Debug, Default)] -pub struct CachedRateLimits(RateLimits); +pub struct CachedRateLimits(ArcSwap); impl CachedRateLimits { /// Creates a new, empty instance without any rate limits enforced. @@ -413,25 +415,52 @@ impl CachedRateLimits { /// Adds a limit to this collection. /// /// See also: [`RateLimits::add`]. - pub fn add(&mut self, limit: RateLimit) { - self.0.add(limit); + pub fn add(&self, limit: RateLimit) { + self.0.rcu(|current| { + let mut current = current.as_ref().clone(); + current.add(limit.clone()); + current + }); } /// Merges more rate limits into this instance. /// /// See also: [`RateLimits::merge`]. - pub fn merge(&mut self, rate_limits: RateLimits) { - for limit in rate_limits { - self.add(limit) - } + pub fn merge(&self, limits: RateLimits) { + self.0.rcu(|current| { + let mut current = current.as_ref().clone(); + for limit in limits.clone() { + current.add(limit) + } + current + }); } /// Returns a reference to the contained rate limits. /// /// This call gurantuess that at the time of call no returned rate limit is expired. - pub fn current_limits(&mut self) -> &RateLimits { - self.0.clean_expired(); - &self.0 + pub fn current_limits(&self) -> Arc { + let now = Instant::now(); + + let mut current = self.0.load(); + while current.iter().any(|rl| rl.retry_after.expired_at(now)) { + let new = { + let mut new = current.as_ref().clone(); + new.clean_expired(); + Arc::new(new) + }; + + let prev = self.0.compare_and_swap(&*current, Arc::clone(&new)); + + // If there was a swap, we know `new` is now stored and the most recent value. + if Arc::ptr_eq(¤t, &prev) { + return new; + } + + current = prev; + } + + arc_swap::Guard::into_inner(current) } } diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index dbec10dc95..4ae8cbe9e6 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -53,6 +53,7 @@ mime = { workspace = true } minidump = { workspace = true, optional = true } multer = { workspace = true } once_cell = { workspace = true } +papaya = { workspace = true } pin-project-lite = { workspace = true } priority-queue = { workspace = true } rand = { workspace = true } diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 6368c4fbe7..e7a86e1d0d 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -12,8 +12,8 @@ use crate::envelope::{AttachmentType, Envelope, EnvelopeError, Item, ItemType, I use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{MetricData, ProcessingGroup}; -use crate::services::projects::cache::{CheckEnvelope, ProcessMetrics, ValidateEnvelope}; +use crate::services::processor::{MetricData, ProcessMetrics, ProcessingGroup}; +use crate::services::projects::cache::ValidateEnvelope; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -274,7 +274,7 @@ fn queue_envelope( if !metric_items.is_empty() { relay_log::trace!("sending metrics into processing queue"); - state.project_cache().send(ProcessMetrics { + state.processor().send(ProcessMetrics { data: MetricData::Raw(metric_items.into_vec()), start_time: envelope.meta().start_time().into(), sent_at: envelope.sent_at(), @@ -367,10 +367,9 @@ pub async fn handle_envelope( } let checked = state - .project_cache() - .send(CheckEnvelope::new(managed_envelope)) - .await - .map_err(|_| BadStoreRequest::ScheduleFailed)? + .project_cache_handle() + .get(managed_envelope.scoping().project_key) + .check_envelope(managed_envelope) .map_err(BadStoreRequest::EventRejected)?; let Some(mut managed_envelope) = checked.envelope else { diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 391687a31f..50cd859fa1 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -5,8 +5,6 @@ use axum::extract::{Query, Request}; use axum::handler::Handler; use axum::response::{IntoResponse, Result}; use axum::{Json, RequestExt}; -use futures::stream::FuturesUnordered; -use futures::StreamExt; use relay_base_schema::project::ProjectKey; use relay_dynamic_config::{ErrorBoundary, GlobalConfig}; use serde::{Deserialize, Serialize}; @@ -16,7 +14,6 @@ use crate::endpoints::forward; use crate::extractors::SignedJson; use crate::service::ServiceState; use crate::services::global_config::{self, StatusResponse}; -use crate::services::projects::cache::{GetCachedProjectState, GetProjectState}; use crate::services::projects::project::{ LimitedParsedProjectState, ParsedProjectState, ProjectState, }; @@ -106,8 +103,6 @@ struct GetProjectStatesRequest { #[serde(default)] full_config: bool, #[serde(default)] - no_cache: bool, - #[serde(default)] global: bool, } @@ -139,30 +134,9 @@ fn into_valid_keys( async fn inner( state: ServiceState, - Query(version): Query, body: SignedJson, ) -> Result { let SignedJson { inner, relay } = body; - let project_cache = &state.project_cache().clone(); - - let no_cache = inner.no_cache; - let keys_len = inner.public_keys.len(); - - let mut futures: FuturesUnordered<_> = into_valid_keys(inner.public_keys, inner.revisions) - .map(|(project_key, revision)| async move { - let state_result = if version.version >= ENDPOINT_V3 && !no_cache { - project_cache - .send(GetCachedProjectState::new(project_key)) - .await - } else { - project_cache - .send(GetProjectState::new(project_key).no_cache(no_cache)) - .await - }; - - (project_key, revision, state_result) - }) - .collect(); let (global, global_status) = if inner.global { match state.global_config().send(global_config::Get).await? { @@ -178,12 +152,15 @@ async fn inner( (None, None) }; + let keys_len = inner.public_keys.len(); let mut pending = Vec::with_capacity(keys_len); let mut unchanged = Vec::with_capacity(keys_len); let mut configs = HashMap::with_capacity(keys_len); - while let Some((project_key, revision, state_result)) = futures.next().await { - let project_info = match state_result? { + for (project_key, revision) in into_valid_keys(inner.public_keys, inner.revisions) { + let project = state.project_cache_handle().get(project_key); + + let project_info = match project.project_state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { // Don't insert project config. Downstream Relay will consider it disabled. diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index f4c8051284..47b31a815e 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -1,5 +1,4 @@ use std::convert::Infallible; -use std::fmt; use std::sync::Arc; use std::time::Duration; @@ -13,6 +12,7 @@ use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOut use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; use crate::services::projects::cache::{ProjectCache, ProjectCacheService, Services}; +use crate::services::projects::cache2::ProjectCacheHandle; use crate::services::relays::{RelayCache, RelayCacheService}; use crate::services::stats::RelayStats; #[cfg(feature = "processing")] @@ -52,7 +52,7 @@ pub enum ServiceError { Redis, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Registry { pub aggregator: Addr, pub health_check: Addr, @@ -65,18 +65,8 @@ pub struct Registry { pub project_cache: Addr, pub upstream_relay: Addr, pub envelope_buffer: Option, -} -impl fmt::Debug for Registry { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Registry") - .field("aggregator", &self.aggregator) - .field("health_check", &self.health_check) - .field("outcome_producer", &self.outcome_producer) - .field("outcome_aggregator", &self.outcome_aggregator) - .field("processor", &format_args!("Addr")) - .finish() - } + pub project_cache_handle: ProjectCacheHandle, } /// Constructs a tokio [`Runtime`] configured for running [services](relay_system::Service). @@ -144,6 +134,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, + registry: Registry, } @@ -357,6 +348,11 @@ impl ServiceState { &self.inner.registry.project_cache } + /// Returns a [`ProjectCacheHandle`]. + pub fn project_cache_handle(&self) -> &ProjectCacheHandle { + &self.inner.registry.project_cache_handle + } + /// Returns the address of the [`RelayCache`] service. pub fn relay_cache(&self) -> &Addr { &self.inner.registry.relay_cache diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 3bcc0c5fea..10221615b8 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -21,8 +21,10 @@ use crate::services::outcome::DiscardReason; use crate::services::outcome::Outcome; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{DequeuedEnvelope, ProjectCache, UpdateProject}; +use crate::services::projects::cache::DequeuedEnvelope; +use crate::services::projects::cache2::ProjectCacheHandle; +use crate::services::projects::cache2::ProjectEvent; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::ManagedEnvelope; @@ -100,7 +102,7 @@ pub struct Services { /// Bounded channel used exclusively to handle backpressure when sending envelopes to the /// project cache. pub envelopes_tx: mpsc::Sender, - pub project_cache: Addr, + pub project_cache_handle: ProjectCacheHandle, pub outcome_aggregator: Addr, pub test_store: Addr, } @@ -269,12 +271,12 @@ impl EnvelopeBufferService { relay_log::trace!("EnvelopeBufferService: requesting project(s) update"); let own_key = envelope.meta().public_key(); - services.project_cache.send(UpdateProject(own_key)); + services.project_cache_handle.fetch(own_key); match envelope.sampling_key() { None => {} Some(sampling_key) if sampling_key == own_key => {} // already sent. Some(sampling_key) => { - services.project_cache.send(UpdateProject(sampling_key)); + services.project_cache_handle.fetch(sampling_key); } } @@ -398,6 +400,7 @@ impl Service for EnvelopeBufferService { buffer.initialize().await; let mut shutdown = Controller::shutdown_handle(); + let mut project_events = self.services.project_cache_handle.events(); relay_log::info!("EnvelopeBufferService: starting"); loop { @@ -427,6 +430,10 @@ impl Service for EnvelopeBufferService { } } } + ProjectEvent::Ready(project_key) = project_events.recv() => { + Self::handle_message(&mut buffer, EnvelopeBuffer::Ready(project_key)).await; + sleep = Duration::ZERO; + } Some(message) = rx.recv() => { Self::handle_message(&mut buffer, message).await; sleep = Duration::ZERO; diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index f1b2d3e93f..572c252817 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -71,9 +71,8 @@ use crate::services::global_config::GlobalConfigHandle; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; -use crate::services::projects::cache::{ - BucketSource, ProcessMetrics, ProjectCache, UpdateRateLimits, -}; +use crate::services::projects::cache::{BucketSource, ProjectCache}; +use crate::services::projects::cache2::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; use crate::services::upstream::{ @@ -875,7 +874,7 @@ pub struct ProcessEnvelope { /// The project info. pub project_info: Arc, /// Currently active cached rate limits for this project. - pub rate_limits: RateLimits, + pub rate_limits: Arc, /// Root sampling project info. pub sampling_project_info: Option>, /// Sampling reservoir counters. @@ -894,16 +893,7 @@ pub struct ProcessEnvelope { /// Additionally, processing applies clock drift correction using the system clock of this Relay, if /// the Envelope specifies the [`sent_at`](Envelope::sent_at) header. #[derive(Debug)] -pub struct ProcessProjectMetrics { - /// The project state the metrics belong to. - /// - /// The project state can be pending, in which case cached rate limits - /// and other project specific operations are skipped and executed once - /// the project state becomes available. - pub project_state: ProjectState, - /// Currently active cached rate limits for this project. - pub rate_limits: RateLimits, - +pub struct ProcessMetrics { /// A list of metric items. pub data: MetricData, /// The target project. @@ -999,7 +989,7 @@ pub struct ProjectMetrics { /// Project info for extracting quotas. pub project_info: Arc, /// Currently cached rate limits. - pub rate_limits: RateLimits, + pub rate_limits: Arc, } /// Encodes metrics into an envelope ready to be sent upstream. @@ -1028,7 +1018,7 @@ pub struct SubmitClientReports { #[derive(Debug)] pub enum EnvelopeProcessor { ProcessEnvelope(Box), - ProcessProjectMetrics(Box), + ProcessProjectMetrics(Box), ProcessBatchedMetrics(Box), EncodeMetrics(Box), SubmitEnvelope(Box), @@ -1059,10 +1049,10 @@ impl FromMessage for EnvelopeProcessor { } } -impl FromMessage for EnvelopeProcessor { +impl FromMessage for EnvelopeProcessor { type Response = NoResponse; - fn from_message(message: ProcessProjectMetrics, _: ()) -> Self { + fn from_message(message: ProcessMetrics, _: ()) -> Self { Self::ProcessProjectMetrics(Box::new(message)) } } @@ -1136,6 +1126,7 @@ struct InnerProcessor { workers: WorkerGroup, config: Arc, global_config: GlobalConfigHandle, + project_cache_handle: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] quotas_pool: Option, @@ -1318,10 +1309,11 @@ impl EnvelopeProcessorService { // Update cached rate limits with the freshly computed ones. if !limits.is_empty() { - self.inner.addrs.project_cache.send(UpdateRateLimits::new( - state.managed_envelope.scoping().project_key, - limits, - )); + self.inner + .project_cache_handle + .get(state.managed_envelope.scoping().project_key) + .rate_limits() + .merge(limits); } Ok(()) @@ -2064,10 +2056,8 @@ impl EnvelopeProcessorService { } } - fn handle_process_project_metrics(&self, cogs: &mut Token, message: ProcessProjectMetrics) { - let ProcessProjectMetrics { - project_state, - rate_limits, + fn handle_process_metrics(&self, cogs: &mut Token, message: ProcessMetrics) { + let ProcessMetrics { data, project_key, start_time, @@ -2106,13 +2096,16 @@ impl EnvelopeProcessorService { true }); + let project = self.inner.project_cache_handle.get(project_key); + // Best effort check to filter and rate limit buckets, if there is no project state // available at the current time, we will check again after flushing. - let buckets = match project_state.enabled() { - Some(project_info) => { + let buckets = match project.project_state() { + ProjectState::Enabled(project_info) => { + let rate_limits = project.cached_rate_limits(); self.check_buckets(project_key, &project_info, &rate_limits, buckets) } - None => buckets, + _ => buckets, }; relay_log::trace!("merging metric buckets into the aggregator"); @@ -2147,21 +2140,17 @@ impl EnvelopeProcessorService { } }; - let mut feature_weights = FeatureWeights::none(); for (project_key, buckets) in buckets { - feature_weights = feature_weights.merge(relay_metrics::cogs::BySize(&buckets).into()); - - self.inner.addrs.project_cache.send(ProcessMetrics { - data: MetricData::Parsed(buckets), - project_key, - source, - start_time: start_time.into(), - sent_at, - }); - } - - if !feature_weights.is_empty() { - cogs.update(feature_weights); + self.handle_process_metrics( + cogs, + ProcessMetrics { + data: MetricData::Parsed(buckets), + project_key, + source, + start_time: start_time.into(), + sent_at, + }, + ) } } @@ -2362,10 +2351,11 @@ impl EnvelopeProcessorService { Outcome::RateLimited(reason_code), ); - self.inner.addrs.project_cache.send(UpdateRateLimits::new( - item_scoping.scoping.project_key, - limits, - )); + self.inner + .project_cache_handle + .get(item_scoping.scoping.project_key) + .rate_limits() + .merge(limits); } } @@ -2440,9 +2430,10 @@ impl EnvelopeProcessorService { if was_enforced { // Update the rate limits in the project cache. self.inner - .addrs - .project_cache - .send(UpdateRateLimits::new(scoping.project_key, rate_limits)); + .project_cache_handle + .get(scoping.project_key) + .rate_limits() + .merge(rate_limits); } } } @@ -2742,7 +2733,7 @@ impl EnvelopeProcessorService { match message { EnvelopeProcessor::ProcessEnvelope(m) => self.handle_process_envelope(*m), EnvelopeProcessor::ProcessProjectMetrics(m) => { - self.handle_process_project_metrics(&mut cogs, *m) + self.handle_process_metrics(&mut cogs, *m) } EnvelopeProcessor::ProcessBatchedMetrics(m) => { self.handle_process_batched_metrics(&mut cogs, *m) @@ -2888,7 +2879,7 @@ pub struct SendEnvelope { envelope: TypedEnvelope, body: Bytes, http_encoding: HttpEncoding, - project_cache: Addr, + project_cache_handle: ProjectCacheHandle, } impl UpstreamRequest for SendEnvelope { @@ -2940,10 +2931,10 @@ impl UpstreamRequest for SendEnvelope { self.envelope.accept(); if let UpstreamRequestError::RateLimited(limits) = error { - self.project_cache.send(UpdateRateLimits::new( - scoping.project_key, - limits.scope(&scoping), - )); + self.project_cache_handle + .get(scoping.project_key) + .rate_limits() + .merge(limits.scope(&scoping)); } } Err(error) => { @@ -3683,7 +3674,7 @@ mod tests { ), (BucketSource::Internal, None), ] { - let message = ProcessProjectMetrics { + let message = ProcessMetrics { data: MetricData::Raw(vec![item.clone()]), project_state: ProjectState::Pending, rate_limits: Default::default(), @@ -3692,7 +3683,7 @@ mod tests { start_time, sent_at: Some(Utc::now()), }; - processor.handle_process_project_metrics(&mut token, message); + processor.handle_process_metrics(&mut token, message); let value = aggregator_rx.recv().await.unwrap(); let Aggregator::MergeBuckets(merge_buckets) = value else { diff --git a/relay-server/src/services/projects/cache.rs b/relay-server/src/services/projects/cache.rs index 658900e873..2d99733db1 100644 --- a/relay-server/src/services/projects/cache.rs +++ b/relay-server/src/services/projects/cache.rs @@ -7,25 +7,22 @@ use crate::extractors::RequestMeta; use crate::services::buffer::{EnvelopeBuffer, EnvelopeBufferError}; use crate::services::global_config; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, MetricData, ProcessEnvelope, ProcessingGroup, ProjectMetrics, + EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; +use crate::services::projects::cache2::{CheckedEnvelope, ProjectCacheHandle, ProjectEvent}; use crate::Envelope; -use chrono::{DateTime, Utc}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; use relay_metrics::Bucket; -use relay_quotas::RateLimits; use relay_redis::RedisPool; use relay_statsd::metric; -use relay_system::{Addr, FromMessage, Interface, Sender, Service}; +use relay_system::{Addr, FromMessage, Interface, Service}; use tokio::sync::{mpsc, watch}; -use tokio::time::Instant; -use crate::services::metrics::{Aggregator, FlushBuckets}; +use crate::services::metrics::{Aggregator, FlushBuckets, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::services::projects::project::{Project, ProjectFetchState, ProjectSender, ProjectState}; -use crate::services::projects::source::ProjectSource; +use crate::services::projects::project::{ProjectFetchState, ProjectState}; use crate::services::spooler::{ self, Buffer, BufferService, DequeueMany, Enqueue, QueueKey, RemoveMany, RestoreIndex, UnspooledEnvelope, BATCH_KEY_COUNT, @@ -36,102 +33,6 @@ use crate::services::upstream::UpstreamRelay; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; -/// Requests a refresh of a project state from one of the available sources. -/// -/// The project state is resolved in the following precedence: -/// -/// 1. Local file system -/// 2. Redis cache (processing mode only) -/// 3. Upstream (managed and processing mode only) -/// -/// Requests to the upstream are performed via `UpstreamProjectSource`, which internally batches -/// individual requests. -#[derive(Clone, Debug)] -pub struct RequestUpdate { - /// The public key to fetch the project by. - pub project_key: ProjectKey, - /// If true, all caches should be skipped and a fresh state should be computed. - pub no_cache: bool, - /// Previously cached fetch state, if available. - /// - /// The upstream request will include the revision of the currently cached state, - /// if the upstream does not have a different revision, this cached - /// state is re-used and its expiry bumped. - pub cached_state: ProjectFetchState, -} - -/// Returns the project state. -/// -/// The project state is fetched if it is missing or outdated. If `no_cache` is specified, then the -/// state is always refreshed. -#[derive(Debug)] -pub struct GetProjectState { - project_key: ProjectKey, - no_cache: bool, -} - -impl GetProjectState { - /// Fetches the project state and uses the cached version if up-to-date. - pub fn new(project_key: ProjectKey) -> Self { - Self { - project_key, - no_cache: false, - } - } - - /// Fetches the project state and conditionally skips the cache. - pub fn no_cache(mut self, no_cache: bool) -> Self { - self.no_cache = no_cache; - self - } -} - -/// Returns the project state if it is already cached. -/// -/// This is used for cases when we only want to perform operations that do -/// not require waiting for network requests. -#[derive(Debug)] -pub struct GetCachedProjectState { - project_key: ProjectKey, -} - -impl GetCachedProjectState { - pub fn new(project_key: ProjectKey) -> Self { - Self { project_key } - } -} - -/// A checked envelope and associated rate limits. -/// -/// Items violating the rate limits have been removed from the envelope. If all items are removed -/// from the envelope, `None` is returned in place of the envelope. -#[derive(Debug)] -pub struct CheckedEnvelope { - pub envelope: Option, - pub rate_limits: RateLimits, -} - -/// Checks the envelope against project configuration and rate limits. -/// -/// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated -/// project state may be used, or otherwise the envelope is passed through unaltered. -/// -/// To check the envelope, this runs: -/// - Validate origins and public keys -/// - Quotas with a limit of `0` -/// - Cached rate limits -#[derive(Debug)] -pub struct CheckEnvelope { - envelope: ManagedEnvelope, -} - -impl CheckEnvelope { - /// Uses a cached project state and checks the envelope. - pub fn new(envelope: ManagedEnvelope) -> Self { - Self { envelope } - } -} - /// Validates the envelope against project configuration and rate limits. /// /// This ensures internally that the project state is up to date and then runs the same checks as @@ -154,21 +55,6 @@ impl ValidateEnvelope { } } -#[derive(Debug)] -pub struct UpdateRateLimits { - project_key: ProjectKey, - rate_limits: RateLimits, -} - -impl UpdateRateLimits { - pub fn new(project_key: ProjectKey, rate_limits: RateLimits) -> UpdateRateLimits { - Self { - project_key, - rate_limits, - } - } -} - /// Source information where a metric bucket originates from. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum BucketSource { @@ -195,25 +81,6 @@ impl From<&RequestMeta> for BucketSource { } } -/// Starts the processing flow for received metrics. -/// -/// Enriches the raw data with projcet information and forwards -/// the metrics using [`ProcessProjectMetrics`](crate::services::processor::ProcessProjectMetrics). -#[derive(Debug)] -pub struct ProcessMetrics { - /// A list of metric items. - pub data: MetricData, - /// The target project. - pub project_key: ProjectKey, - /// Whether to keep or reset the metric metadata. - pub source: BucketSource, - /// The instant at which the request was received. - pub start_time: Instant, - /// The value of the Envelope's [`sent_at`](crate::envelope::Envelope::sent_at) - /// header for clock drift correction. - pub sent_at: Option>, -} - /// Updates the buffer index for [`ProjectKey`] with the [`QueueKey`] keys. /// /// This message is sent from the project buffer in case of the error while fetching the data from @@ -262,36 +129,19 @@ pub struct UpdateProject(pub ProjectKey); /// See the enumerated variants for a full list of available messages for this service. #[derive(Debug)] pub enum ProjectCache { - RequestUpdate(RequestUpdate), - Get(GetProjectState, ProjectSender), - GetCached(GetCachedProjectState, Sender), - CheckEnvelope( - CheckEnvelope, - Sender>, - ), ValidateEnvelope(ValidateEnvelope), - UpdateRateLimits(UpdateRateLimits), - ProcessMetrics(ProcessMetrics), FlushBuckets(FlushBuckets), UpdateSpoolIndex(UpdateSpoolIndex), RefreshIndexCache(RefreshIndexCache), - UpdateProject(ProjectKey), } impl ProjectCache { pub fn variant(&self) -> &'static str { match self { - Self::RequestUpdate(_) => "RequestUpdate", - Self::Get(_, _) => "Get", - Self::GetCached(_, _) => "GetCached", - Self::CheckEnvelope(_, _) => "CheckEnvelope", Self::ValidateEnvelope(_) => "ValidateEnvelope", - Self::UpdateRateLimits(_) => "UpdateRateLimits", - Self::ProcessMetrics(_) => "ProcessMetrics", Self::FlushBuckets(_) => "FlushBuckets", Self::UpdateSpoolIndex(_) => "UpdateSpoolIndex", Self::RefreshIndexCache(_) => "RefreshIndexCache", - Self::UpdateProject(_) => "UpdateProject", } } } @@ -314,41 +164,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: RequestUpdate, _: ()) -> Self { - Self::RequestUpdate(message) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::BroadcastResponse; - - fn from_message(message: GetProjectState, sender: ProjectSender) -> Self { - Self::Get(message, sender) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::AsyncResponse; - - fn from_message(message: GetCachedProjectState, sender: Sender) -> Self { - Self::GetCached(message, sender) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::AsyncResponse>; - - fn from_message( - message: CheckEnvelope, - sender: Sender>, - ) -> Self { - Self::CheckEnvelope(message, sender) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -357,22 +172,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: UpdateRateLimits, _: ()) -> Self { - Self::UpdateRateLimits(message) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: ProcessMetrics, _: ()) -> Self { - Self::ProcessMetrics(message) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -381,27 +180,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: UpdateProject, _: ()) -> Self { - let UpdateProject(project_key) = message; - Self::UpdateProject(project_key) - } -} - -/// Updates the cache with new project state information. -struct UpdateProjectState { - /// The public key to fetch the project by. - project_key: ProjectKey, - - /// New project state information. - state: ProjectFetchState, - - /// If true, all caches should be skipped and a fresh state should be computed. - no_cache: bool, -} - /// Holds the addresses of all services required for [`ProjectCache`]. #[derive(Debug, Clone)] pub struct Services { @@ -423,14 +201,9 @@ struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, services: Services, - // Need hashbrown because extract_if is not stable in std yet. - projects: hashbrown::HashMap, + projects: ProjectCacheHandle, /// Utility for disposing of expired project data in a background thread. garbage_disposal: GarbageDisposal, - /// Source for fetching project states from the upstream or from disk. - source: ProjectSource, - /// Tx channel used to send the updated project state whenever requested. - state_tx: mpsc::UnboundedSender, /// Handle to schedule periodic unspooling of buffered envelopes (spool V1). spool_v1_unspool_handle: SleepHandle, @@ -493,194 +266,35 @@ impl ProjectCacheBroker { .send(DequeueMany::new(keys, spool_v1.buffer_tx.clone())) } - /// Evict projects that are over its expiry date. - /// - /// Ideally, we would use `check_expiry` to determine expiry here. - /// However, for eviction, we want to add an additional delay, such that we do not delete - /// a project that has expired recently and for which a fetch is already underway in - /// [`super::source::upstream`]. - fn evict_stale_project_caches(&mut self) { - let eviction_start = Instant::now(); - let delta = 2 * self.config.project_cache_expiry() + self.config.project_grace_period(); - - let expired = self - .projects - .extract_if(|_, entry| entry.last_updated_at() + delta <= eviction_start); - - // Defer dropping the projects to a dedicated thread: - let mut count = 0; - for (project_key, project) in expired { - if let Some(spool_v1) = self.spool_v1.as_mut() { - let keys = spool_v1 - .index - .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) - .collect::>(); - - if !keys.is_empty() { - spool_v1.buffer.send(RemoveMany::new(project_key, keys)) - } - } - - self.garbage_disposal.dispose(project); - count += 1; - } - metric!(counter(RelayCounters::EvictingStaleProjectCaches) += count); - - // Log garbage queue size: - let queue_size = self.garbage_disposal.queue_size() as f64; - metric!(gauge(RelayGauges::ProjectCacheGarbageQueueSize) = queue_size); - - metric!(timer(RelayTimers::ProjectStateEvictionDuration) = eviction_start.elapsed()); - } - - fn get_or_create_project(&mut self, project_key: ProjectKey) -> &mut Project { - metric!(histogram(RelayHistograms::ProjectStateCacheSize) = self.projects.len() as u64); - - let config = self.config.clone(); - - self.projects - .entry(project_key) - .and_modify(|_| { - metric!(counter(RelayCounters::ProjectCacheHit) += 1); - }) - .or_insert_with(move || { - metric!(counter(RelayCounters::ProjectCacheMiss) += 1); - Project::new(project_key, config) - }) - } - - /// Updates the [`Project`] with received [`ProjectState`]. - /// - /// If the project state is valid we also send the message to the buffer service to dequeue the - /// envelopes for this project. - fn merge_state(&mut self, message: UpdateProjectState) { - let UpdateProjectState { - project_key, - state, - no_cache, - } = message; - - let project_cache = self.services.project_cache.clone(); - - let old_state = - self.get_or_create_project(project_key) - .update_state(&project_cache, state, no_cache); - - if let Some(old_state) = old_state { - self.garbage_disposal.dispose(old_state); - } - - // Try to schedule unspool if it's not scheduled yet. - self.schedule_unspool(); - - if let Some(envelope_buffer) = self.services.envelope_buffer.as_ref() { - envelope_buffer.send(EnvelopeBuffer::Ready(project_key)) + fn evict_project(&mut self, project_key: ProjectKey) { + let Some(ref mut spool_v1) = self.spool_v1 else { + return; }; - } - - fn handle_request_update(&mut self, message: RequestUpdate) { - let RequestUpdate { - project_key, - no_cache, - cached_state, - } = message; - - // Bump the update time of the project in our hashmap to evade eviction. - let project = self.get_or_create_project(project_key); - project.refresh_updated_timestamp(); - let next_attempt = project.next_fetch_attempt(); - - let source = self.source.clone(); - let sender = self.state_tx.clone(); - - tokio::spawn(async move { - // Wait on the new attempt time when set. - if let Some(next_attempt) = next_attempt { - tokio::time::sleep_until(next_attempt).await; - } - let state = source - .fetch(project_key, no_cache, cached_state) - .await - .unwrap_or_else(|e| { - relay_log::error!( - error = &e as &dyn Error, - tags.project_key = %project_key, - "Failed to fetch project from source" - ); - // TODO: change this to ProjectFetchState::pending() once we consider it safe to do so. - // see https://github.com/getsentry/relay/pull/4140. - ProjectFetchState::disabled() - }); - - let message = UpdateProjectState { - project_key, - no_cache, - state, - }; - - sender.send(message).ok(); - }); - } - fn handle_get(&mut self, message: GetProjectState, sender: ProjectSender) { - let GetProjectState { - project_key, - no_cache, - } = message; - let project_cache = self.services.project_cache.clone(); - let project = self.get_or_create_project(project_key); - - project.get_state(project_cache, sender, no_cache); - } + let keys = spool_v1 + .index + .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) + .collect::>(); - fn handle_get_cached(&mut self, message: GetCachedProjectState) -> ProjectState { - let project_cache = self.services.project_cache.clone(); - self.get_or_create_project(message.project_key) - .get_cached_state(project_cache, false) + if !keys.is_empty() { + spool_v1.buffer.send(RemoveMany::new(project_key, keys)) + } } - fn handle_check_envelope( - &mut self, - message: CheckEnvelope, - ) -> Result { - let CheckEnvelope { envelope: context } = message; - let project_cache = self.services.project_cache.clone(); - let project_key = context.envelope().meta().public_key(); - if let Some(sampling_key) = context.envelope().sampling_key() { - if sampling_key != project_key { - let sampling_project = self.get_or_create_project(sampling_key); - sampling_project.prefetch(project_cache.clone(), false); - } + fn handle_project_event(&mut self, event: ProjectEvent) { + match event { + ProjectEvent::Ready(_) => self.schedule_unspool(), + ProjectEvent::Evicted(project_key) => self.evict_project(project_key), } - let project = self.get_or_create_project(project_key); - - // Preload the project cache so that it arrives a little earlier in processing. However, - // do not pass `no_cache`. In case the project is rate limited, we do not want to force - // a full reload. Fetching must not block the store request. - project.prefetch(project_cache, false); - - project.check_envelope(context) } /// Handles the processing of the provided envelope. fn handle_processing(&mut self, key: QueueKey, mut managed_envelope: ManagedEnvelope) { let project_key = managed_envelope.envelope().meta().public_key(); - let Some(project) = self.projects.get_mut(&project_key) else { - relay_log::error!( - tags.project_key = %project_key, - "project could not be found in the cache", - ); + let project = self.projects.get(project_key); - let mut project = Project::new(project_key, self.config.clone()); - project.prefetch(self.services.project_cache.clone(), false); - self.projects.insert(project_key, project); - self.enqueue(key, managed_envelope); - return; - }; - - let project_cache = self.services.project_cache.clone(); - let project_info = match project.get_cached_state(project_cache.clone(), false) { + let project_info = match project.project_state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -707,20 +321,19 @@ impl ProjectCacheBroker { .. }) = project.check_envelope(managed_envelope) { - let rate_limits = project.current_rate_limits().clone(); - - let reservoir_counters = project.reservoir_counters(); + let rate_limits = project.rate_limits().current_limits(); + let reservoir_counters = project.reservoir_counters().clone(); let sampling_project_info = managed_envelope .envelope() .sampling_key() - .and_then(|key| self.projects.get_mut(&key)) - .and_then(|p| p.get_cached_state(project_cache, false).enabled()) - .filter(|state| state.organization_id == project_info.organization_id); + .map(|key| self.projects.get(key)) + .and_then(|p| p.project_state().clone().enabled()) + .filter(|info| info.organization_id == project_info.organization_id); let process = ProcessEnvelope { envelope: managed_envelope, - project_info, + project_info: Arc::clone(&project_info), rate_limits, sampling_project_info, reservoir_counters, @@ -748,17 +361,13 @@ impl ProjectCacheBroker { envelope: mut managed_envelope, } = message; - let project_cache = self.services.project_cache.clone(); let envelope = managed_envelope.envelope(); // Fetch the project state for our key and make sure it's not invalid. let own_key = envelope.meta().public_key(); - let project = self.get_or_create_project(own_key); - - let project_state = - project.get_cached_state(project_cache.clone(), envelope.meta().no_cache()); + let project = self.projects.get(own_key); - let project_state = match project_state { + let project_state = match project.project_state() { ProjectState::Enabled(state) => Some(state), ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -770,12 +379,10 @@ impl ProjectCacheBroker { // Also, fetch the project state for sampling key and make sure it's not invalid. let sampling_key = envelope.sampling_key(); let mut requires_sampling_state = sampling_key.is_some(); - let sampling_state = if let Some(sampling_key) = sampling_key { - let state = self - .get_or_create_project(sampling_key) - .get_cached_state(project_cache, envelope.meta().no_cache()); - match state { - ProjectState::Enabled(state) => Some(state), + let sampling_info = if let Some(sampling_key) = sampling_key { + let sampling_project = self.projects.get(sampling_key); + match sampling_project.project_state() { + ProjectState::Enabled(info) => Some(Arc::clone(&info)), ProjectState::Disabled => { relay_log::trace!("Sampling state is disabled ({sampling_key})"); // We accept events even if its root project has been disabled. @@ -796,7 +403,7 @@ impl ProjectCacheBroker { // Trigger processing once we have a project state and we either have a sampling project // state or we do not need one. if project_state.is_some() - && (sampling_state.is_some() || !requires_sampling_state) + && (sampling_info.is_some() || !requires_sampling_state) && self.memory_checker.check_memory().has_capacity() && self.global_config.is_ready() { @@ -809,37 +416,20 @@ impl ProjectCacheBroker { self.enqueue(key, managed_envelope); } - fn handle_rate_limits(&mut self, message: UpdateRateLimits) { - self.get_or_create_project(message.project_key) - .merge_rate_limits(message.rate_limits); - } - - fn handle_process_metrics(&mut self, message: ProcessMetrics) { - let project_cache = self.services.project_cache.clone(); - - let message = self - .get_or_create_project(message.project_key) - .prefetch(project_cache, false) - .process_metrics(message); - - self.services.envelope_processor.send(message); - } - fn handle_flush_buckets(&mut self, message: FlushBuckets) { let aggregator = self.services.aggregator.clone(); - let project_cache = self.services.project_cache.clone(); let mut no_project = 0; let mut scoped_buckets = BTreeMap::new(); for (project_key, buckets) in message.buckets { - let project = self.get_or_create_project(project_key); + let project = self.projects.get(project_key); - let project_info = match project.current_state() { + let project_info = match project.project_state() { ProjectState::Pending => { no_project += 1; - // Schedule an update for the project just in case. - project.prefetch(project_cache.clone(), false); - project.return_buckets(&aggregator, buckets); + + // Return the buckets to the aggregator. + aggregator.send(MergeBuckets::new(project_key, buckets)); continue; } ProjectState::Disabled => { @@ -854,7 +444,7 @@ impl ProjectCacheBroker { ProjectState::Enabled(project_info) => project_info, }; - let Some(scoping) = project.scoping() else { + let Some(scoping) = project_info.scoping(project_key) else { relay_log::error!( tags.project_key = project_key.as_str(), "there is no scoping: dropping {} buckets", @@ -867,8 +457,8 @@ impl ProjectCacheBroker { match scoped_buckets.entry(scoping) { Vacant(entry) => { entry.insert(ProjectMetrics { - project_info, - rate_limits: project.current_rate_limits().clone(), + project_info: Arc::clone(project_info), + rate_limits: project.rate_limits().current_limits(), buckets, }); } @@ -895,16 +485,14 @@ impl ProjectCacheBroker { fn handle_refresh_index_cache(&mut self, message: RefreshIndexCache) { let RefreshIndexCache(index) = message; - let project_cache = self.services.project_cache.clone(); for key in index { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); spool_v1.index.insert(key); - self.get_or_create_project(key.own_key) - .prefetch(project_cache.clone(), false); + + self.projects.fetch(key.own_key); if key.own_key != key.sampling_key { - self.get_or_create_project(key.sampling_key) - .prefetch(project_cache.clone(), false); + self.projects.fetch(key.sampling_key); } } } @@ -918,11 +506,10 @@ impl ProjectCacheBroker { let services = self.services.clone(); let own_key = envelope.meta().public_key(); - let project = self.get_or_create_project(own_key); - let project_state = project.get_cached_state(services.project_cache.clone(), false); + let project = self.projects.get(own_key); // Check if project config is enabled. - let project_info = match project_state { + let project_info = match project.project_state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( @@ -944,8 +531,7 @@ impl ProjectCacheBroker { let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, - self.get_or_create_project(sampling_key) - .get_cached_state(services.project_cache, false), + self.projects.get(sampling_key).project_state().clone(), ) }) { Some((_, ProjectState::Enabled(info))) => { @@ -964,8 +550,6 @@ impl ProjectCacheBroker { None => None, }; - let project = self.get_or_create_project(own_key); - // Reassign processing groups and proceed to processing. for (group, envelope) in ProcessingGroup::split_envelope(*envelope) { let managed_envelope = ManagedEnvelope::new( @@ -983,11 +567,11 @@ impl ProjectCacheBroker { continue; // Outcomes are emitted by check_envelope }; - let reservoir_counters = project.reservoir_counters(); + let reservoir_counters = project.reservoir_counters().clone(); services.envelope_processor.send(ProcessEnvelope { envelope: managed_envelope, project_info: project_info.clone(), - rate_limits: project.current_rate_limits().clone(), + rate_limits: project.rate_limits().current_limits(), sampling_project_info: sampling_project_info.clone(), reservoir_counters, }); @@ -996,22 +580,6 @@ impl ProjectCacheBroker { Ok(()) } - fn handle_update_project(&mut self, project_key: ProjectKey) { - let project_cache = self.services.project_cache.clone(); - let envelope_buffer = self.services.envelope_buffer.clone(); - let project = self.get_or_create_project(project_key); - - // If the project is already loaded, inform the envelope buffer. - if !project.current_state().is_pending() { - if let Some(envelope_buffer) = envelope_buffer { - envelope_buffer.send(EnvelopeBuffer::Ready(project_key)); - } - } - - let no_cache = false; - project.prefetch(project_cache, no_cache); - } - /// Returns backoff timeout for an unspool attempt. fn next_unspool_attempt(&mut self) -> Duration { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); @@ -1036,15 +604,9 @@ impl ProjectCacheBroker { /// Which includes the own key and the sampling key for the project. /// Note: this function will trigger [`ProjectState`] refresh if it's already expired. fn is_state_cached(&mut self, key: &QueueKey) -> bool { - key.unique_keys().iter().all(|key| { - self.projects.get_mut(key).is_some_and(|project| { - // Returns `Some` if the project is cached otherwise None and also triggers refresh - // in background. - !project - .get_cached_state(self.services.project_cache.clone(), false) - .is_pending() - }) - }) + key.unique_keys() + .iter() + .all(|key| !self.projects.get(*key).project_state().is_pending()) } /// Iterates the buffer index and tries to unspool the envelopes for projects with a valid @@ -1108,25 +670,14 @@ impl ProjectCacheBroker { message = ty, { match message { - ProjectCache::RequestUpdate(message) => self.handle_request_update(message), - ProjectCache::Get(message, sender) => self.handle_get(message, sender), - ProjectCache::GetCached(message, sender) => { - sender.send(self.handle_get_cached(message)) - } - ProjectCache::CheckEnvelope(message, sender) => { - sender.send(self.handle_check_envelope(message)) - } ProjectCache::ValidateEnvelope(message) => { self.handle_validate_envelope(message) } - ProjectCache::UpdateRateLimits(message) => self.handle_rate_limits(message), - ProjectCache::ProcessMetrics(message) => self.handle_process_metrics(message), ProjectCache::FlushBuckets(message) => self.handle_flush_buckets(message), ProjectCache::UpdateSpoolIndex(message) => self.handle_buffer_index(message), ProjectCache::RefreshIndexCache(message) => { self.handle_refresh_index_cache(message) } - ProjectCache::UpdateProject(project) => self.handle_update_project(project), } } ) @@ -1153,6 +704,7 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, + project_cache: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, /// Bounded channel used exclusively to receive envelopes from the envelope buffer. @@ -1165,6 +717,7 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, + project_cache: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, envelopes_rx: mpsc::Receiver, @@ -1173,6 +726,7 @@ impl ProjectCacheService { Self { config, memory_checker, + project_cache, services, global_config_rx, envelopes_rx, @@ -1188,22 +742,20 @@ impl Service for ProjectCacheService { let Self { config, memory_checker, + project_cache, services, mut global_config_rx, mut envelopes_rx, redis, } = self; + let project_events = project_cache.events(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); tokio::spawn(async move { - let mut ticker = tokio::time::interval(config.cache_eviction_interval()); relay_log::info!("project cache started"); - // Channel for async project state responses back into the project cache. - let (state_tx, mut state_rx) = mpsc::unbounded_channel(); - let global_config = match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => { relay_log::info!("global config received"); @@ -1256,20 +808,12 @@ impl Service for ProjectCacheService { }), }; - // Main broker that serializes public and internal messages, and triggers project state - // fetches via the project source. let mut broker = ProjectCacheBroker { config: config.clone(), memory_checker, - projects: hashbrown::HashMap::new(), + projects: todo!(), garbage_disposal: GarbageDisposal::new(), - source: ProjectSource::start( - config.clone(), - services.upstream_relay.clone(), - redis, - ), services, - state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1, global_config, @@ -1289,9 +833,9 @@ impl Service for ProjectCacheService { } }) }, - Some(message) = state_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "merge_state", { - broker.merge_state(message) + project_event = project_events.recv() => { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_project_event", { + broker.handle_project_event(project_event); }) } // Buffer will not dequeue the envelopes from the spool if there is not enough @@ -1301,11 +845,6 @@ impl Service for ProjectCacheService { broker.handle_processing(key, managed_envelope) }) }, - _ = ticker.tick() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "evict_project_caches", { - broker.evict_stale_project_caches() - }) - } () = &mut broker.spool_v1_unspool_handle => { metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "periodic_unspool", { broker.handle_periodic_unspool() @@ -1334,17 +873,10 @@ impl Service for ProjectCacheService { #[derive(Debug)] #[allow(dead_code)] // Fields are never read, only used for discarding/dropping data. enum ProjectGarbage { - Project(Project), ProjectFetchState(ProjectFetchState), Metrics(Vec), } -impl From for ProjectGarbage { - fn from(value: Project) -> Self { - Self::Project(value) - } -} - impl From for ProjectGarbage { fn from(value: ProjectFetchState) -> Self { Self::ProjectFetchState(value) @@ -1390,7 +922,6 @@ mod tests { async fn project_cache_broker_setup( services: Services, - state_tx: mpsc::UnboundedSender, buffer_tx: mpsc::UnboundedSender, ) -> (ProjectCacheBroker, Addr) { let config: Arc<_> = Config::from_json_value(serde_json::json!({ @@ -1429,11 +960,9 @@ mod tests { ProjectCacheBroker { config: config.clone(), memory_checker, - projects: hashbrown::HashMap::new(), + projects: todo!(), garbage_disposal: GarbageDisposal::new(), - source: ProjectSource::start(config, services.upstream_relay.clone(), None), services, - state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1: Some(SpoolV1 { buffer_tx, @@ -1452,10 +981,9 @@ mod tests { relay_log::init_test!(); let services = mocked_services(); - let (state_tx, _) = mpsc::unbounded_channel(); let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); let (mut broker, _buffer_svc) = - project_cache_broker_setup(services.clone(), state_tx, buffer_tx).await; + project_cache_broker_setup(services.clone(), buffer_tx).await; broker.global_config = GlobalConfigStatus::Ready; let (tx_update, mut rx_update) = mpsc::unbounded_channel(); @@ -1529,10 +1057,9 @@ mod tests { #[tokio::test] async fn handle_processing_without_project() { let services = mocked_services(); - let (state_tx, _) = mpsc::unbounded_channel(); let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); let (mut broker, buffer_svc) = - project_cache_broker_setup(services.clone(), state_tx, buffer_tx.clone()).await; + project_cache_broker_setup(services.clone(), buffer_tx.clone()).await; let dsn = "111d836b15bb49d7bbf99e64295d995b"; let project_key = ProjectKey::parse(dsn).unwrap(); @@ -1548,7 +1075,6 @@ mod tests { ); // Index and projects are empty. - assert!(broker.projects.is_empty()); assert!(broker.spool_v1.as_mut().unwrap().index.is_empty()); // Since there is no project we should not process anything but create a project and spool @@ -1556,7 +1082,11 @@ mod tests { broker.handle_processing(key, envelope); // Assert that we have a new project and also added an index. - assert!(broker.projects.get(&project_key).is_some()); + assert!(!broker + .projects + .get(project_key) + .project_state() + .is_pending()); assert!(broker.spool_v1.as_mut().unwrap().index.contains(&key)); // Check is we actually spooled anything. diff --git a/relay-server/src/services/projects/cache2/handle.rs b/relay-server/src/services/projects/cache2/handle.rs new file mode 100644 index 0000000000..e7afb43454 --- /dev/null +++ b/relay-server/src/services/projects/cache2/handle.rs @@ -0,0 +1,52 @@ +use core::fmt; +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_system::Addr; +use tokio::sync::broadcast; + +use super::state::Shared; +use crate::services::projects::cache2::service::ProjectEvent; +use crate::services::projects::cache2::{Project, ProjectCache}; + +#[derive(Clone)] +pub struct ProjectCacheHandle { + shared: Arc, + config: Arc, + service: Addr, + project_events: broadcast::Sender, +} + +impl ProjectCacheHandle { + /// Returns the current project state for the `project_key`. + pub fn get(&self, project_key: ProjectKey) -> Project<'_> { + // TODO: maybe we should always trigger a fetch? + // We need a way to continously keep projects updated while at the same time + // let unused projects expire. + // TODO: trigger prefetch for the sampling projects, maybe take a resolver trait which can + // also resolve the sampling project and fetch? Or do it explicit. + let project = match self.shared.get_or_create(project_key) { + Ok(project) => project, + Err(missing) => missing.fetch(&self.service), + }; + Project::new(project, &self.config) + } + + pub fn fetch(&self, project_key: ProjectKey) { + // TODO: does this make sense? + self.service.send(ProjectCache::Fetch(project_key)); + } + + pub fn events(&self) -> broadcast::Receiver { + self.project_events.subscribe() + } +} + +impl fmt::Debug for ProjectCacheHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ProjectCacheHandle") + .field("shared", &self.shared) + .finish() + } +} diff --git a/relay-server/src/services/projects/cache2/mod.rs b/relay-server/src/services/projects/cache2/mod.rs new file mode 100644 index 0000000000..62d55aff70 --- /dev/null +++ b/relay-server/src/services/projects/cache2/mod.rs @@ -0,0 +1,8 @@ +mod handle; +mod project; +mod service; +mod state; + +pub use self::handle::ProjectCacheHandle; +pub use self::project::{CheckedEnvelope, Project}; +pub use self::service::{ProjectCache, ProjectCacheService, ProjectEvent}; diff --git a/relay-server/src/services/projects/cache2/project.rs b/relay-server/src/services/projects/cache2/project.rs new file mode 100644 index 0000000000..2722360954 --- /dev/null +++ b/relay-server/src/services/projects/cache2/project.rs @@ -0,0 +1,376 @@ +use std::sync::Arc; + +use relay_config::Config; +use relay_dynamic_config::Feature; +use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits}; +use relay_sampling::evaluation::ReservoirCounters; + +use crate::envelope::ItemType; +use crate::services::outcome::{DiscardReason, Outcome}; +use crate::services::projects::cache2::state::SharedProject; +use crate::services::projects::project::ProjectState; +use crate::utils::{CheckLimits, Enforcement, EnvelopeLimiter, ManagedEnvelope}; + +/// A loaded project. +pub struct Project<'a> { + shared: SharedProject, + config: &'a Config, +} + +impl<'a> Project<'a> { + pub(crate) fn new(shared: SharedProject, config: &'a Config) -> Self { + Self { shared, config } + } + + pub fn project_state(&self) -> &ProjectState { + self.shared.project_state() + } + + pub fn rate_limits(&self) -> &CachedRateLimits { + self.shared.cached_rate_limits() + } + + pub fn reservoir_counters(&self) -> &ReservoirCounters { + self.shared.reservoir_counters() + } + + /// Runs the checks on incoming envelopes. + /// + /// See, [`crate::services::projects::cache::CheckEnvelope`] for more information + /// + /// * checks the rate limits + /// * validates the envelope meta in `check_request` - determines whether the given request + /// should be accepted or discarded + /// + /// IMPORTANT: If the [`ProjectState`] is invalid, the `check_request` will be skipped and only + /// rate limits will be validated. This function **must not** be called in the main processing + /// pipeline. + pub fn check_envelope( + &self, + mut envelope: ManagedEnvelope, + ) -> Result { + let state = match self.project_state() { + ProjectState::Enabled(state) => Some(Arc::clone(&state)), + ProjectState::Disabled => { + // TODO(jjbayer): We should refactor this function to either return a Result or + // handle envelope rejections internally, but not both. + envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); + return Err(DiscardReason::ProjectId); + } + ProjectState::Pending => None, + }; + + let mut scoping = envelope.scoping(); + + if let Some(ref state) = state { + scoping = state.scope_request(envelope.envelope().meta()); + envelope.scope(scoping); + + if let Err(reason) = state.check_envelope(envelope.envelope(), &self.config) { + envelope.reject(Outcome::Invalid(reason)); + return Err(reason); + } + } + + let current_limits = self.rate_limits().current_limits(); + + let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); + let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { + Ok(current_limits.check_with_quotas(quotas, item_scoping)) + }); + + let (mut enforcement, mut rate_limits) = + envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; + + let check_nested_spans = state + .as_ref() + .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); + + // If we can extract spans from the event, we want to try and count the number of nested + // spans to correctly emit negative outcomes in case the transaction itself is dropped. + if check_nested_spans { + sync_spans_to_enforcement(&envelope, &mut enforcement); + } + + enforcement.apply_with_outcomes(&mut envelope); + + envelope.update(); + + // Special case: Expose active rate limits for all metric namespaces if there is at least + // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary + // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. + if envelope.envelope().items().any(|i| i.ty().is_metrics()) { + let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); + metrics_scoping.namespace = MetricNamespaceScoping::Any; + rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); + } + + let envelope = if envelope.envelope().is_empty() { + // Individual rate limits have already been issued above + envelope.reject(Outcome::RateLimited(None)); + None + } else { + Some(envelope) + }; + + Ok(CheckedEnvelope { + envelope, + rate_limits, + }) + } +} + +/// A checked envelope and associated rate limits. +/// +/// Items violating the rate limits have been removed from the envelope. If all items are removed +/// from the envelope, `None` is returned in place of the envelope. +#[derive(Debug)] +pub struct CheckedEnvelope { + pub envelope: Option, + pub rate_limits: RateLimits, +} + +/// Adds category limits for the nested spans inside a transaction. +/// +/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted +/// as top-level spans, thus if we limited a transaction, we want to count and emit negative +/// outcomes for each of the spans nested inside that transaction. +fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { + if !enforcement.is_event_active() { + return; + } + + let spans_count = count_nested_spans(envelope); + if spans_count == 0 { + return; + } + + if enforcement.event.is_active() { + enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); + } + + if enforcement.event_indexed.is_active() { + enforcement.spans_indexed = enforcement + .event_indexed + .clone_for(DataCategory::SpanIndexed, spans_count); + } +} + +/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). +fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { + #[derive(Debug, serde::Deserialize)] + struct PartialEvent { + spans: crate::utils::SeqCount, + } + + envelope + .envelope() + .items() + .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) + .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) + // We do + 1, since we count the transaction itself because it will be extracted + // as a span and counted during the slow path of rate limiting. + .map_or(0, |event| event.spans.0 + 1) +} + +#[cfg(test)] +mod tests { + use crate::envelope::{ContentType, Envelope, Item}; + use crate::extractors::RequestMeta; + use crate::services::processor::ProcessingGroup; + use relay_base_schema::project::ProjectId; + use relay_event_schema::protocol::EventId; + use relay_test::mock_service; + use serde_json::json; + use smallvec::SmallVec; + + use super::*; + + #[test] + fn get_state_expired() { + for expiry in [9999, 0] { + let config = Arc::new( + Config::from_json_value(json!( + { + "cache": { + "project_expiry": expiry, + "project_grace_period": 0, + "eviction_interval": 9999 // do not evict + } + } + )) + .unwrap(), + ); + + // Initialize project with a state + let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let project_info = ProjectInfo { + project_id: Some(ProjectId::new(123)), + ..Default::default() + }; + let mut project = Project::new(project_key, config.clone()); + project.state = ProjectFetchState::enabled(project_info); + + if expiry > 0 { + // With long expiry, should get a state + assert!(matches!(project.current_state(), ProjectState::Enabled(_))); + } else { + // With 0 expiry, project should expire immediately. No state can be set. + assert!(matches!(project.current_state(), ProjectState::Pending)); + } + } + } + + #[tokio::test] + async fn test_stale_cache() { + let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); + + let config = Arc::new( + Config::from_json_value(json!( + { + "cache": { + "project_expiry": 100, + "project_grace_period": 0, + "eviction_interval": 9999 // do not evict + } + } + )) + .unwrap(), + ); + + let channel = StateChannel::new(); + + // Initialize project with a state. + let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let mut project = Project::new(project_key, config); + project.state_channel = Some(channel); + project.state = ProjectFetchState::allowed(); + + assert!(project.next_fetch_attempt.is_none()); + // Try to update project with errored project state. + project.update_state(&addr, ProjectFetchState::pending(), false); + // Since we got invalid project state we still keep the old one meaning there + // still must be the project id set. + assert!(matches!(project.current_state(), ProjectState::Enabled(_))); + assert!(project.next_fetch_attempt.is_some()); + + // This tests that we actually initiate the backoff and the backoff mechanism works: + // * first call to `update_state` with invalid ProjectState starts the backoff, but since + // it's the first attemt, we get Duration of 0. + // * second call to `update_state` here will bumpt the `next_backoff` Duration to somehing + // like ~ 1s + // * and now, by calling `fetch_state` we test that it's a noop, since if backoff is active + // we should never fetch + // * without backoff it would just panic, not able to call the ProjectCache service + let channel = StateChannel::new(); + project.state_channel = Some(channel); + project.update_state(&addr, ProjectFetchState::pending(), false); + project.fetch_state(addr, false); + } + + fn create_project(config: Option) -> Project { + let project_key = ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(); + let mut project = Project::new(project_key, Arc::new(Config::default())); + let mut project_info = ProjectInfo { + project_id: Some(ProjectId::new(42)), + ..Default::default() + }; + let mut public_keys = SmallVec::new(); + public_keys.push(PublicKeyConfig { + public_key: project_key, + numeric_id: None, + }); + project_info.public_keys = public_keys; + if let Some(config) = config { + project_info.config = serde_json::from_value(config).unwrap(); + } + project.state = ProjectFetchState::enabled(project_info); + project + } + + fn request_meta() -> RequestMeta { + let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" + .parse() + .unwrap(); + + RequestMeta::new(dsn) + } + + #[test] + fn test_track_nested_spans_outcomes() { + let mut project = create_project(Some(json!({ + "features": [ + "organizations:indexed-spans-extraction" + ], + "quotas": [{ + "id": "foo", + "categories": ["transaction"], + "window": 3600, + "limit": 0, + "reasonCode": "foo", + }] + }))); + + let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); + + let mut transaction = Item::new(ItemType::Transaction); + transaction.set_payload( + ContentType::Json, + r#"{ + "event_id": "52df9022835246eeb317dbd739ccd059", + "type": "transaction", + "transaction": "I have a stale timestamp, but I'm recent!", + "start_timestamp": 1, + "timestamp": 2, + "contexts": { + "trace": { + "trace_id": "ff62a8b040f340bda5d830223def1d81", + "span_id": "bd429c44b67a3eb4" + } + }, + "spans": [ + { + "span_id": "bd429c44b67a3eb4", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + }, + { + "span_id": "bd429c44b67a3eb5", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + } + ] +}"#, + ); + + envelope.add_item(transaction); + + let (outcome_aggregator, mut outcome_aggregator_rx) = Addr::custom(); + let (test_store, _) = Addr::custom(); + + let managed_envelope = ManagedEnvelope::new( + envelope, + outcome_aggregator.clone(), + test_store, + ProcessingGroup::Transaction, + ); + + let _ = project.check_envelope(managed_envelope); + drop(outcome_aggregator); + + let expected = [ + (DataCategory::Transaction, 1), + (DataCategory::TransactionIndexed, 1), + (DataCategory::Span, 3), + (DataCategory::SpanIndexed, 3), + ]; + + for (expected_category, expected_quantity) in expected { + let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); + assert_eq!(outcome.category, expected_category); + assert_eq!(outcome.quantity, expected_quantity); + } + } +} diff --git a/relay-server/src/services/projects/cache2/service.rs b/relay-server/src/services/projects/cache2/service.rs new file mode 100644 index 0000000000..1115a8feeb --- /dev/null +++ b/relay-server/src/services/projects/cache2/service.rs @@ -0,0 +1,136 @@ +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use tokio::sync::{broadcast, mpsc}; + +use crate::services::buffer::EnvelopeBuffer; +use crate::services::projects::cache2::state::{CompletedFetch, Fetch}; +use crate::services::projects::project::{ProjectFetchState, ProjectState}; +use crate::services::projects::source::ProjectSource; + +pub enum ProjectCache { + Fetch(ProjectKey), +} + +impl relay_system::Interface for ProjectCache {} + +impl relay_system::FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: Self, _: ()) -> Self { + message + } +} + +pub enum ProjectEvent { + Ready(ProjectKey), + Evicted(ProjectKey), +} + +pub struct ProjectCacheService { + store: super::state::ProjectStore, + source: ProjectSource, + config: Arc, + + buffer: relay_system::Addr, + + project_update_rx: mpsc::UnboundedReceiver, + project_update_tx: mpsc::UnboundedSender, + + project_events_tx: broadcast::Sender, +} + +impl ProjectCacheService { + fn schedule_fetch(&self, fetch: Fetch) { + let source = self.source.clone(); + let project_updates = self.project_update_tx.clone(); + + tokio::spawn(async move { + tokio::time::sleep_until(fetch.when().into()).await; + + // TODO: cached state for delta fetches, maybe this should just be a revision? + let state = match source + .fetch(fetch.project_key(), false, ProjectFetchState::pending()) + .await + { + // TODO: verify if the sanitized here is correct + Ok(state) => state.sanitized().into(), + Err(err) => { + relay_log::error!( + error = &err as &dyn std::error::Error, + "failed to fetch project state for {fetch:?}" + ); + ProjectState::Pending + } + }; + + project_updates.send(fetch.complete(state)); + }); + } +} + +impl ProjectCacheService { + fn handle_fetch(&mut self, project_key: ProjectKey) { + if let Some(fetch) = self.store.try_begin_fetch(project_key, &self.config) { + self.schedule_fetch(fetch); + } + } + + fn handle_project_update(&mut self, fetch: CompletedFetch) { + let project_key = fetch.project_key(); + + if let Some(fetch) = self.store.complete_fetch(fetch, &self.config) { + relay_log::trace!( + project_key = fetch.project_key().as_str(), + "re-scheduling project fetch: {fetch:?}" + ); + self.schedule_fetch(fetch); + return; + } + + self.project_events_tx + .send(ProjectEvent::Ready(project_key)); + } + + fn handle_evict_stale_projects(&mut self) { + let on_evict = |project_key| { + self.project_events_tx + .send(ProjectEvent::Evicted(project_key)); + }; + + self.store.evict_stale_projects(&self.config, on_evict); + } + + fn handle(&mut self, message: ProjectCache) { + match message { + ProjectCache::Fetch(project_key) => self.handle_fetch(project_key), + } + } +} + +impl relay_system::Service for ProjectCacheService { + type Interface = ProjectCache; + + fn spawn_handler(mut self, mut rx: relay_system::Receiver) { + tokio::spawn(async move { + let mut eviction_ticker = tokio::time::interval(self.config.cache_eviction_interval()); + + loop { + tokio::select! { + biased; + + Some(update) = self.project_update_rx.recv() => { + self.handle_project_update(update) + }, + Some(message) = rx.recv() => { + self.handle(message); + }, + _ = eviction_ticker.tick() => { + self.handle_evict_stale_projects() + } + } + } + }); + } +} diff --git a/relay-server/src/services/projects/cache2/state.rs b/relay-server/src/services/projects/cache2/state.rs new file mode 100644 index 0000000000..722e20f182 --- /dev/null +++ b/relay-server/src/services/projects/cache2/state.rs @@ -0,0 +1,437 @@ +use std::fmt; +use std::sync::Arc; +use std::time::Instant; + +use arc_swap::ArcSwap; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_quotas::{CachedRateLimits, RateLimits}; +use relay_sampling::evaluation::ReservoirCounters; + +use crate::services::projects::cache2::ProjectCache; +use crate::services::projects::project::ProjectState; +use crate::utils::RetryBackoff; + +/// The backing storage for a project cache. +/// +/// Exposes the only interface to delete from [`Shared`], gurnatueed by +/// requiring exclusive/mutable access to [`ProjectStore`]. +/// +/// [`Shared`] can be extended through [`Shared::get_or_create`], in which case +/// the private state is missing. Users of [`Shared::get_or_create`] *must* trigger +/// a fetch to create the private state when [`Missing`] is returned. +/// This gurnatuees that eventually the project state is populated, but for a undetermined, +/// time it is possible that shared state exists without the respective private state. +pub struct ProjectStore { + shared: Arc, + private: hashbrown::HashMap, +} + +impl ProjectStore { + pub fn get(&mut self, project_key: ProjectKey) -> Option> { + let private = self.private.get_mut(&project_key)?; + let shared = self.shared.projects.pin().get(&project_key).cloned(); + debug_assert!( + shared.is_some(), + "there must be a shared project if private state exists" + ); + + Some(ProjectRef { + private, + shared: shared?, + }) + } + + pub fn try_begin_fetch(&mut self, project_key: ProjectKey, config: &Config) -> Option { + self.get_or_create(project_key, config) + .try_begin_fetch(config) + } + + #[must_use = "an incomplete fetch must be retried"] + pub fn complete_fetch(&mut self, fetch: CompletedFetch, config: &Config) -> Option { + // TODO: what if in the meantime the state expired and was evicted? + // Should eviction be possible for states that are currently in progress, I don't think so. + // Maybe need to discard outdated fetches (for evicted projects)? + debug_assert!(self.shared.projects.pin().get(&fetch.project_key).is_some()); + debug_assert!(self.private.get(&fetch.project_key).is_some()); + + let mut project = self.get_or_create(fetch.project_key, config); + project.complete_fetch(fetch); + // Schedule another fetch if necessary, usually should only happen if + // the completed fetch is pending. + project.try_begin_fetch(config) + } + + pub fn evict_stale_projects(&mut self, config: &Config, mut on_evict: F) -> usize + where + F: FnMut(ProjectKey), + { + let eviction_start = Instant::now(); + let delta = 2 * config.project_cache_expiry() + config.project_grace_period(); + + // TODO: what do we do with forever fetching projects, do we fail eventually? + let expired = self.private.extract_if(|_, private| { + if private.has_fetch_in_progress() { + return false; + } + + // Invariant: if there is no last successful fetch, + // there must be a fetch currently in progress. + debug_assert!(private.last_non_pending_fetch.is_some()); + + private + .last_non_pending_fetch + .map_or(true, |ts| ts + delta <= eviction_start) + }); + + let mut evicted = 0; + + let shared = self.shared.projects.pin(); + for (project_key, _) in expired { + let _removed = shared.remove(&project_key); + debug_assert!( + _removed.is_some(), + "an expired project must exist in the shared state" + ); + + evicted += 1; + + // TODO: garbage disposal? Do we still need that, we shouldn't have a problem with + // timings anymore. + + on_evict(project_key); + } + drop(shared); + + evicted + } + + fn get_or_create(&mut self, project_key: ProjectKey, config: &Config) -> ProjectRef<'_> { + #[cfg(debug_assertions)] + if self.private.contains_key(&project_key) { + // We have exclusive access to the private part, there are no concurrent deletions + // hence when if we have a private state there must always be a shared state as well. + debug_assert!(self.shared.projects.pin().contains_key(&project_key)); + } + + let private = self + .private + .entry(project_key) + .or_insert_with(|| PrivateProjectState::new(project_key, config)); + + let shared = self + .shared + .projects + .pin() + .get_or_insert_with(project_key, Default::default) + .clone(); + + ProjectRef { private, shared } + } +} + +pub struct ProjectRef<'a> { + shared: SharedProjectState, + private: &'a mut PrivateProjectState, +} + +impl ProjectRef<'_> { + pub fn merge_rate_limits(&mut self, rate_limits: RateLimits) { + self.shared.merge_rate_limits(rate_limits) + } + + fn try_begin_fetch(&mut self, config: &Config) -> Option { + self.private.try_begin_fetch(config) + } + + fn complete_fetch(&mut self, fetch: CompletedFetch) { + self.private.complete_fetch(&fetch); + + // Keep the old state around if the current fetch is pending. + // It may still be useful to callers. + if !fetch.project_state.is_pending() { + self.shared.set_project_state(fetch.project_state); + } + } +} + +pub struct Shared { + projects: papaya::HashMap, +} + +impl Shared { + // pub fn get(&self, project_key: ProjectKey) -> Option { + // self.projects + // .pin() + // .get(&project_key) + // .map(|v| v.state.load().as_ref().clone()) + // } + + /// Returns the existing project state or creates a new one and returns `Err([`Missing`])`. + /// + /// The returned [`Missing`] value must be used to trigger a fetch for this project + /// or it will stay pending forever. + pub fn get_or_create(&self, project_key: ProjectKey) -> Result { + // TODO: do we need to check for expiry here? + // TODO: if yes, we need to include the timestamp in the shared project state. + // TODO: grace periods? + + // The fast path, we expect the project to exist. + let projects = self.projects.pin(); + if let Some(project) = projects.get(&project_key) { + return Ok(project.to_shared_project()); + } + + // The slow path, try to attempt to insert, somebody else may have been faster, but that's okay. + match projects.try_insert(project_key, Default::default()) { + Ok(inserted) => Err(Missing { + project_key, + shared_project: inserted.to_shared_project(), + }), + Err(occupied) => Ok(occupied.current.to_shared_project()), + } + } +} + +impl fmt::Debug for Shared { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Shared") + .field("num_projects", &self.projects.len()) + .finish() + } +} + +#[must_use = "a missing project must be fetched"] +pub struct Missing { + project_key: ProjectKey, + shared_project: SharedProject, +} + +impl Missing { + pub fn fetch(self, project_cache: &relay_system::Addr) -> SharedProject { + project_cache.send(ProjectCache::Fetch(self.project_key)); + self.shared_project + } +} + +pub struct SharedProject(Arc); + +impl SharedProject { + pub fn project_state(&self) -> &ProjectState { + &self.0.state + } + + pub fn cached_rate_limits(&self) -> &CachedRateLimits { + // TODO: exposing cached rate limits may be a bad idea, this allows mutation + // and caching of rate limits for pending projects, which may or may not be fine. + // + // Read only access is easily achievable if we return only the current rate limits. + &self.0.rate_limits + } + + pub fn reservoir_counters(&self) -> &ReservoirCounters { + &self.0.reservoir_counters + } +} + +#[derive(Debug, Default, Clone)] +struct SharedProjectState(Arc>); + +impl SharedProjectState { + fn set_project_state(&self, state: ProjectState) { + let prev = self.0.rcu(|stored| SharedProjectStateInner { + state: state.clone(), + rate_limits: Arc::clone(&stored.rate_limits), + reservoir_counters: Arc::clone(&stored.reservoir_counters), + }); + + // Try clean expired reservoir counters. + // + // We do it after the `rcu`, to not re-run this more often than necessary. + if let Some(state) = state.enabled() { + let config = state.config.sampling.as_ref(); + if let Some(config) = config.and_then(|eb| eb.as_ref().ok()) { + // We can safely use previous here, the `rcu` just replaced the state, the + // reservoir counters did not change. + // + // `try_lock` to not potentially block, it's a best effort cleanup. + // + // TODO: Remove the lock, we already have interior mutability with the `ArcSwap` + // and the counters themselves can be atomics. + if let Ok(mut counters) = prev.reservoir_counters.try_lock() { + counters.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); + } + } + } + } + + fn merge_rate_limits(&self, rate_limits: RateLimits) { + self.0.load().rate_limits.merge(rate_limits) + } + + fn to_shared_project(&self) -> SharedProject { + SharedProject(self.0.as_ref().load_full()) + } +} + +/// The data contained in a [`SharedProjectState`]. +/// +/// All fields must be cheap to clone and are ideally just a single `Arc`. +/// Partial updates to [`SharedProjectState`], are performed using `rcu` cloning all fields. +#[derive(Debug, Default)] +struct SharedProjectStateInner { + state: ProjectState, + rate_limits: Arc, + reservoir_counters: ReservoirCounters, +} + +struct PrivateProjectState { + project_key: ProjectKey, + current_fetch: Option<()>, + + last_non_pending_fetch: Option, + + next_fetch_attempt: Option, + backoff: RetryBackoff, +} + +impl PrivateProjectState { + fn new(project_key: ProjectKey, config: &Config) -> Self { + Self { + project_key, + current_fetch: None, + last_non_pending_fetch: None, + next_fetch_attempt: None, + backoff: RetryBackoff::new(config.http_max_retry_interval()), + } + } + + fn has_fetch_in_progress(&self) -> bool { + self.current_fetch.is_some() + } + + fn try_begin_fetch(&mut self, config: &Config) -> Option { + if self.current_fetch.is_some() { + // Already a fetch in progress. + relay_log::trace!( + project_key = self.project_key.as_str(), + "project fetch skipped, fetch in progress" + ); + return None; + } + + if matches!(self.check_expiry(config), Expiry::Updated) { + // The current state is up to date, no need to start another fetch. + relay_log::trace!( + project_key = self.project_key.as_str(), + "project fetch skipped, already up to date" + ); + return None; + } + + // Mark a current fetch in progress. + self.current_fetch.insert(()); + + // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. + let when = self.next_fetch_attempt.take().unwrap_or_else(Instant::now); + + relay_log::debug!( + project_key = &self.project_key.as_str(), + attempts = self.backoff.attempt() + 1, + "project state fetch scheduled in {:?}", + when.saturating_duration_since(Instant::now()), + ); + + Some(Fetch { + project_key: self.project_key, + when, + }) + } + + fn complete_fetch(&mut self, fetch: &CompletedFetch) { + if fetch.project_state.is_pending() { + self.next_fetch_attempt = Instant::now().checked_add(self.backoff.next_backoff()); + } else { + debug_assert!( + self.next_fetch_attempt.is_none(), + "the scheduled fetch should have cleared the next attempt" + ); + self.next_fetch_attempt = None; + self.backoff.reset(); + self.last_non_pending_fetch = Some(Instant::now()); + } + + let _current_fetch = self.current_fetch.take(); + debug_assert!( + _current_fetch.is_some(), + "fetch completed while there was no current fetch registered" + ); + } + + fn check_expiry(&self, config: &Config) -> Expiry { + let Some(last_fetch) = self.last_non_pending_fetch else { + return Expiry::Expired; + }; + + let expiry = config.project_cache_expiry(); + let elapsed = last_fetch.elapsed(); + + if elapsed >= expiry + config.project_grace_period() { + Expiry::Expired + } else if elapsed >= expiry { + Expiry::Stale + } else { + Expiry::Updated + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +enum Expiry { + /// The project state is perfectly up to date. + Updated, + /// The project state is outdated but events depending on this project state can still be + /// processed. The state should be refreshed in the background though. + Stale, + /// The project state is completely outdated and events need to be buffered up until the new + /// state has been fetched. + Expired, +} + +#[derive(Debug)] +pub struct Fetch { + project_key: ProjectKey, + when: Instant, +} + +impl Fetch { + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + pub fn when(&self) -> Instant { + self.when + } + + pub fn complete(self, state: ProjectState) -> CompletedFetch { + CompletedFetch { + project_key: self.project_key, + project_state: state, + } + } +} + +pub struct CompletedFetch { + project_key: ProjectKey, + project_state: ProjectState, +} + +impl CompletedFetch { + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + pub fn project_state(&self) -> &ProjectState { + &self.project_state + } +} diff --git a/relay-server/src/services/projects/mod.rs b/relay-server/src/services/projects/mod.rs index bc8f253eee..ef8b6fead2 100644 --- a/relay-server/src/services/projects/mod.rs +++ b/relay-server/src/services/projects/mod.rs @@ -1,3 +1,4 @@ pub mod cache; +pub mod cache2; pub mod project; pub mod source; diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index cf4f387c9a..040a8d14a6 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -1,753 +1,5 @@ -use std::sync::Arc; -use std::time::Duration; - -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use relay_dynamic_config::{ErrorBoundary, Feature}; -use relay_metrics::Bucket; -use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits, Scoping}; -use relay_sampling::evaluation::ReservoirCounters; -use relay_statsd::metric; -use relay_system::{Addr, BroadcastChannel}; -use serde::{Deserialize, Serialize}; -use tokio::time::Instant; - -use crate::envelope::ItemType; -use crate::services::metrics::{Aggregator, MergeBuckets}; -use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::ProcessProjectMetrics; -use crate::services::projects::cache::{ - CheckedEnvelope, ProcessMetrics, ProjectCache, RequestUpdate, -}; -use crate::utils::{Enforcement, SeqCount}; - -use crate::statsd::RelayCounters; -use crate::utils::{CheckLimits, EnvelopeLimiter, ManagedEnvelope, RetryBackoff}; - pub mod state; pub use state::{ - ExpiryState, LimitedParsedProjectState, ParsedProjectState, ProjectFetchState, ProjectInfo, - ProjectState, + LimitedParsedProjectState, ParsedProjectState, ProjectFetchState, ProjectInfo, ProjectState, }; - -/// Sender type for messages that respond with project states. -pub type ProjectSender = relay_system::BroadcastSender; - -/// Represents a public key received from the projectconfig endpoint. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct PublicKeyConfig { - /// Public part of key (random hash). - pub public_key: ProjectKey, - - /// The primary key of the DSN in Sentry's main database. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub numeric_id: Option, -} - -/// Channel used to respond to state requests (e.g. by project config endpoint). -#[derive(Debug)] -struct StateChannel { - inner: BroadcastChannel, - no_cache: bool, -} - -impl StateChannel { - pub fn new() -> Self { - Self { - inner: BroadcastChannel::new(), - no_cache: false, - } - } - - pub fn no_cache(&mut self, no_cache: bool) -> &mut Self { - self.no_cache = no_cache; - self - } -} - -#[derive(Debug)] -enum GetOrFetch<'a> { - Cached(ProjectState), - Scheduled(&'a mut StateChannel), -} - -/// Structure representing organization and project configuration for a project key. -/// -/// This structure no longer uniquely identifies a project. Instead, it identifies a project key. -/// Projects can define multiple keys, in which case this structure is duplicated for each instance. -#[derive(Debug)] -pub struct Project { - backoff: RetryBackoff, - next_fetch_attempt: Option, - last_updated_at: Instant, - project_key: ProjectKey, - config: Arc, - state: ProjectFetchState, - state_channel: Option, - rate_limits: CachedRateLimits, - last_no_cache: Instant, - reservoir_counters: ReservoirCounters, -} - -impl Project { - /// Creates a new `Project`. - pub fn new(key: ProjectKey, config: Arc) -> Self { - Project { - backoff: RetryBackoff::new(config.http_max_retry_interval()), - next_fetch_attempt: None, - last_updated_at: Instant::now(), - project_key: key, - state: ProjectFetchState::expired(), - state_channel: None, - rate_limits: CachedRateLimits::new(), - last_no_cache: Instant::now(), - reservoir_counters: Arc::default(), - config, - } - } - - /// Returns the [`ReservoirCounters`] for the project. - pub fn reservoir_counters(&self) -> ReservoirCounters { - self.reservoir_counters.clone() - } - - /// Returns the current [`ProjectState`] attached to the project. - pub fn current_state(&self) -> ProjectState { - self.state.current_state(&self.config) - } - - /// Returns the currently active cached rate limits. - pub fn current_rate_limits(&mut self) -> &RateLimits { - self.rate_limits.current_limits() - } - - /// If a reservoir rule is no longer in the sampling config, we will remove those counters. - fn remove_expired_reservoir_rules(&self) { - let Some(state) = self.current_state().enabled() else { - return; - }; - - let Some(ErrorBoundary::Ok(config)) = state.config.sampling.as_ref() else { - return; - }; - - // Using try_lock to not slow down the project cache service. - if let Ok(mut guard) = self.reservoir_counters.try_lock() { - guard.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); - } - } - - pub fn merge_rate_limits(&mut self, rate_limits: RateLimits) { - self.rate_limits.merge(rate_limits); - } - - /// Returns the next attempt `Instant` if backoff is initiated, or None otherwise. - pub fn next_fetch_attempt(&self) -> Option { - self.next_fetch_attempt - } - - /// The last time the project state was updated - pub fn last_updated_at(&self) -> Instant { - self.last_updated_at - } - - /// Refresh the update time of the project in order to delay eviction. - /// - /// Called by the project cache when the project state is refreshed. - pub fn refresh_updated_timestamp(&mut self) { - self.last_updated_at = Instant::now(); - } - - /// Collects internal project state and assembles a [`ProcessProjectMetrics`] message. - pub fn process_metrics(&mut self, message: ProcessMetrics) -> ProcessProjectMetrics { - let project_state = self.current_state(); - let rate_limits = self.rate_limits.current_limits().clone(); - - ProcessProjectMetrics { - project_state, - rate_limits, - - data: message.data, - project_key: message.project_key, - source: message.source, - start_time: message.start_time.into(), - sent_at: message.sent_at, - } - } - - /// Returns a list of buckets back to the aggregator. - /// - /// This is used to return flushed buckets back to the aggregator if the project has not been - /// loaded at the time of flush. - /// - /// Buckets at this stage are expected to be validated already. - pub fn return_buckets(&self, aggregator: &Addr, buckets: Vec) { - aggregator.send(MergeBuckets::new( - self.project_key, - buckets.into_iter().collect(), - )); - } - - /// Returns `true` if backoff expired and new attempt can be triggered. - fn can_fetch(&self) -> bool { - self.next_fetch_attempt - .map(|next_attempt_at| next_attempt_at <= Instant::now()) - .unwrap_or(true) - } - - /// Triggers a debounced refresh of the project state. - /// - /// If the state is already being updated in the background, this method checks if the request - /// needs to be upgraded with the `no_cache` flag to ensure a more recent update. - fn fetch_state( - &mut self, - project_cache: Addr, - no_cache: bool, - ) -> &mut StateChannel { - // If there is a running request and we do not need to upgrade it to no_cache, or if the - // backoff is started and the new attempt is still somewhere in the future, skip - // scheduling a new fetch. - let should_fetch = !matches!(self.state_channel, Some(ref channel) if channel.no_cache || !no_cache) - && self.can_fetch(); - - let channel = self.state_channel.get_or_insert_with(StateChannel::new); - - if should_fetch { - channel.no_cache(no_cache); - let attempts = self.backoff.attempt() + 1; - relay_log::debug!( - "project {} state requested {attempts} times", - self.project_key - ); - project_cache.send(RequestUpdate { - project_key: self.project_key, - no_cache, - cached_state: self.state.clone(), - }); - } - - channel - } - - fn get_or_fetch_state( - &mut self, - project_cache: Addr, - mut no_cache: bool, - ) -> GetOrFetch<'_> { - // count number of times we are looking for the project state - metric!(counter(RelayCounters::ProjectStateGet) += 1); - - // Allow at most 1 no_cache request per second. Gracefully degrade to cached requests. - if no_cache { - if self.last_no_cache.elapsed() < Duration::from_secs(1) { - no_cache = false; - } else { - metric!(counter(RelayCounters::ProjectStateNoCache) += 1); - self.last_no_cache = Instant::now(); - } - } - - let cached_state = match self.state.expiry_state(&self.config) { - // Never use the cached state if `no_cache` is set. - _ if no_cache => None, - // There is no project state that can be used, fetch a state and return it. - ExpiryState::Expired => None, - // The project is semi-outdated, fetch new state but return old one. - ExpiryState::Stale(state) => Some(state.clone()), - // The project is not outdated, return early here to jump over fetching logic below. - ExpiryState::Updated(state) => return GetOrFetch::Cached(state.clone()), - }; - - let channel = self.fetch_state(project_cache, no_cache); - - match cached_state { - Some(state) => GetOrFetch::Cached(state), - None => GetOrFetch::Scheduled(channel), - } - } - - /// Returns the cached project state if it is valid. - /// - /// Depending on the state of the cache, this method takes different action: - /// - /// - If the cached state is up-to-date, this method simply returns `Some`. - /// - If the cached state is stale, this method triggers a refresh in the background and - /// returns `Some`. The stale period can be configured through - /// [`Config::project_grace_period`]. - /// - If there is no cached state or the cached state is fully outdated, this method triggers a - /// refresh in the background and returns `None`. - /// - /// If `no_cache` is set to true, this method always returns `None` and always triggers a - /// background refresh. - /// - /// To wait for a valid state instead, use [`get_state`](Self::get_state). - pub fn get_cached_state( - &mut self, - project_cache: Addr, - no_cache: bool, - ) -> ProjectState { - match self.get_or_fetch_state(project_cache, no_cache) { - GetOrFetch::Cached(state) => state, - GetOrFetch::Scheduled(_) => ProjectState::Pending, - } - } - - /// Obtains a valid project state and passes it to the sender once ready. - /// - /// This first checks if the state needs to be updated. This is the case if the project state - /// has passed its cache timeout. The `no_cache` flag forces an update. This does nothing if an - /// update is already running in the background. - /// - /// Independent of updating, _stale_ states are passed to the sender immediately as long as they - /// are in the [grace period](Config::project_grace_period). - pub fn get_state( - &mut self, - project_cache: Addr, - sender: ProjectSender, - no_cache: bool, - ) { - match self.get_or_fetch_state(project_cache, no_cache) { - GetOrFetch::Cached(state) => { - sender.send(state); - } - - GetOrFetch::Scheduled(channel) => { - channel.inner.attach(sender); - } - } - } - - /// Ensures the project state gets updated. - /// - /// This first checks if the state needs to be updated. This is the case if the project state - /// has passed its cache timeout. The `no_cache` flag forces another update unless one is - /// already running in the background. - /// - /// If an update is required, the update will start in the background and complete at a later - /// point. Therefore, this method is useful to trigger an update early if it is already clear - /// that the project state will be needed soon. To retrieve an updated state, use - /// [`Project::get_state`] instead. - pub fn prefetch(&mut self, project_cache: Addr, no_cache: bool) -> &mut Self { - self.get_cached_state(project_cache, no_cache); - self - } - - /// Replaces the internal project state with a new one and triggers pending actions. - /// - /// Returns the *old* project state if it was replaced. - /// - /// This flushes pending envelopes from [`ValidateEnvelope`] and - /// notifies all pending receivers from [`get_state`](Self::get_state). - /// - /// `no_cache` should be passed from the requesting call. Updates with `no_cache` will always - /// take precedence. - /// - /// [`ValidateEnvelope`]: crate::services::projects::cache::ValidateEnvelope - pub fn update_state( - &mut self, - project_cache: &Addr, - state: ProjectFetchState, - no_cache: bool, - ) -> Option { - // Initiate the backoff if the incoming state is invalid. Reset it otherwise. - if state.is_pending() { - self.next_fetch_attempt = Instant::now().checked_add(self.backoff.next_backoff()); - } else { - self.next_fetch_attempt = None; - self.backoff.reset(); - } - - let Some(channel) = self.state_channel.take() else { - relay_log::error!(tags.project_key = %self.project_key, "channel is missing for the state update"); - return None; - }; - - // If the channel has `no_cache` set but we are not a `no_cache` request, we have - // been superseeded. Put it back and let the other request take precedence. - if channel.no_cache && !no_cache { - self.state_channel = Some(channel); - return None; - } - - // If the state is pending, return back the taken channel and schedule state update. - if state.is_pending() { - // Only overwrite if the old state is expired: - let is_expired = matches!(self.state.expiry_state(&self.config), ExpiryState::Expired); - let old_state = match is_expired { - true => Some(std::mem::replace(&mut self.state, state)), - false => None, - }; - - self.state_channel = Some(channel); - let attempts = self.backoff.attempt() + 1; - relay_log::debug!( - "project {} state requested {attempts} times", - self.project_key - ); - - project_cache.send(RequestUpdate { - project_key: self.project_key, - no_cache, - cached_state: self.state.clone(), - }); - return old_state; - } - - let old_state = std::mem::replace(&mut self.state, state); - - // Flush all waiting recipients. - relay_log::debug!("project state {} updated", self.project_key); - channel.inner.send(self.state.current_state(&self.config)); - - self.after_state_updated(); - - Some(old_state) - } - - /// Called after all state validations and after the project state is updated. - /// - /// See also: [`Self::update_state`]. - fn after_state_updated(&mut self) { - // Check if the new sampling config got rid of any reservoir rules we have counters for. - self.remove_expired_reservoir_rules(); - } - - /// Creates `Scoping` for this project if the state is loaded. - /// - /// Returns `Some` if the project state has been fetched and contains a project identifier, - /// otherwise `None`. - pub fn scoping(&self) -> Option { - self.current_state().scoping(self.project_key) - } - - /// Runs the checks on incoming envelopes. - /// - /// See, [`crate::services::projects::cache::CheckEnvelope`] for more information - /// - /// * checks the rate limits - /// * validates the envelope meta in `check_request` - determines whether the given request - /// should be accepted or discarded - /// - /// IMPORTANT: If the [`ProjectState`] is invalid, the `check_request` will be skipped and only - /// rate limits will be validated. This function **must not** be called in the main processing - /// pipeline. - pub fn check_envelope( - &mut self, - mut envelope: ManagedEnvelope, - ) -> Result { - let state = match self.current_state() { - ProjectState::Enabled(state) => Some(state.clone()), - ProjectState::Disabled => { - // TODO(jjbayer): We should refactor this function to either return a Result or - // handle envelope rejections internally, but not both. - envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); - return Err(DiscardReason::ProjectId); - } - ProjectState::Pending => None, - }; - let mut scoping = envelope.scoping(); - - if let Some(ref state) = state { - scoping = state.scope_request(envelope.envelope().meta()); - envelope.scope(scoping); - - if let Err(reason) = state.check_envelope(envelope.envelope(), &self.config) { - envelope.reject(Outcome::Invalid(reason)); - return Err(reason); - } - } - - let current_limits = self.rate_limits.current_limits(); - - let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); - let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { - Ok(current_limits.check_with_quotas(quotas, item_scoping)) - }); - - let (mut enforcement, mut rate_limits) = - envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; - - let check_nested_spans = state - .as_ref() - .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); - - // If we can extract spans from the event, we want to try and count the number of nested - // spans to correctly emit negative outcomes in case the transaction itself is dropped. - if check_nested_spans { - sync_spans_to_enforcement(&envelope, &mut enforcement); - } - - enforcement.apply_with_outcomes(&mut envelope); - - envelope.update(); - - // Special case: Expose active rate limits for all metric namespaces if there is at least - // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary - // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. - if envelope.envelope().items().any(|i| i.ty().is_metrics()) { - let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); - metrics_scoping.namespace = MetricNamespaceScoping::Any; - rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); - } - - let envelope = if envelope.envelope().is_empty() { - // Individual rate limits have already been issued above - envelope.reject(Outcome::RateLimited(None)); - None - } else { - Some(envelope) - }; - - Ok(CheckedEnvelope { - envelope, - rate_limits, - }) - } -} - -/// Adds category limits for the nested spans inside a transaction. -/// -/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted -/// as top-level spans, thus if we limited a transaction, we want to count and emit negative -/// outcomes for each of the spans nested inside that transaction. -fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { - if !enforcement.is_event_active() { - return; - } - - let spans_count = count_nested_spans(envelope); - if spans_count == 0 { - return; - } - - if enforcement.event.is_active() { - enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); - } - - if enforcement.event_indexed.is_active() { - enforcement.spans_indexed = enforcement - .event_indexed - .clone_for(DataCategory::SpanIndexed, spans_count); - } -} - -/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). -fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { - #[derive(Debug, Deserialize)] - struct PartialEvent { - spans: SeqCount, - } - - envelope - .envelope() - .items() - .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) - .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) - // We do + 1, since we count the transaction itself because it will be extracted - // as a span and counted during the slow path of rate limiting. - .map_or(0, |event| event.spans.0 + 1) -} - -#[cfg(test)] -mod tests { - use crate::envelope::{ContentType, Envelope, Item}; - use crate::extractors::RequestMeta; - use crate::services::processor::ProcessingGroup; - use relay_base_schema::project::ProjectId; - use relay_event_schema::protocol::EventId; - use relay_test::mock_service; - use serde_json::json; - use smallvec::SmallVec; - - use super::*; - - #[test] - fn get_state_expired() { - for expiry in [9999, 0] { - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": expiry, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - // Initialize project with a state - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let project_info = ProjectInfo { - project_id: Some(ProjectId::new(123)), - ..Default::default() - }; - let mut project = Project::new(project_key, config.clone()); - project.state = ProjectFetchState::enabled(project_info); - - if expiry > 0 { - // With long expiry, should get a state - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - } else { - // With 0 expiry, project should expire immediately. No state can be set. - assert!(matches!(project.current_state(), ProjectState::Pending)); - } - } - } - - #[tokio::test] - async fn test_stale_cache() { - let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); - - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": 100, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - let channel = StateChannel::new(); - - // Initialize project with a state. - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut project = Project::new(project_key, config); - project.state_channel = Some(channel); - project.state = ProjectFetchState::allowed(); - - assert!(project.next_fetch_attempt.is_none()); - // Try to update project with errored project state. - project.update_state(&addr, ProjectFetchState::pending(), false); - // Since we got invalid project state we still keep the old one meaning there - // still must be the project id set. - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - assert!(project.next_fetch_attempt.is_some()); - - // This tests that we actually initiate the backoff and the backoff mechanism works: - // * first call to `update_state` with invalid ProjectState starts the backoff, but since - // it's the first attemt, we get Duration of 0. - // * second call to `update_state` here will bumpt the `next_backoff` Duration to somehing - // like ~ 1s - // * and now, by calling `fetch_state` we test that it's a noop, since if backoff is active - // we should never fetch - // * without backoff it would just panic, not able to call the ProjectCache service - let channel = StateChannel::new(); - project.state_channel = Some(channel); - project.update_state(&addr, ProjectFetchState::pending(), false); - project.fetch_state(addr, false); - } - - fn create_project(config: Option) -> Project { - let project_key = ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(); - let mut project = Project::new(project_key, Arc::new(Config::default())); - let mut project_info = ProjectInfo { - project_id: Some(ProjectId::new(42)), - ..Default::default() - }; - let mut public_keys = SmallVec::new(); - public_keys.push(PublicKeyConfig { - public_key: project_key, - numeric_id: None, - }); - project_info.public_keys = public_keys; - if let Some(config) = config { - project_info.config = serde_json::from_value(config).unwrap(); - } - project.state = ProjectFetchState::enabled(project_info); - project - } - - fn request_meta() -> RequestMeta { - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - RequestMeta::new(dsn) - } - - #[test] - fn test_track_nested_spans_outcomes() { - let mut project = create_project(Some(json!({ - "features": [ - "organizations:indexed-spans-extraction" - ], - "quotas": [{ - "id": "foo", - "categories": ["transaction"], - "window": 3600, - "limit": 0, - "reasonCode": "foo", - }] - }))); - - let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); - - let mut transaction = Item::new(ItemType::Transaction); - transaction.set_payload( - ContentType::Json, - r#"{ - "event_id": "52df9022835246eeb317dbd739ccd059", - "type": "transaction", - "transaction": "I have a stale timestamp, but I'm recent!", - "start_timestamp": 1, - "timestamp": 2, - "contexts": { - "trace": { - "trace_id": "ff62a8b040f340bda5d830223def1d81", - "span_id": "bd429c44b67a3eb4" - } - }, - "spans": [ - { - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - }, - { - "span_id": "bd429c44b67a3eb5", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - } - ] -}"#, - ); - - envelope.add_item(transaction); - - let (outcome_aggregator, mut outcome_aggregator_rx) = Addr::custom(); - let (test_store, _) = Addr::custom(); - - let managed_envelope = ManagedEnvelope::new( - envelope, - outcome_aggregator.clone(), - test_store, - ProcessingGroup::Transaction, - ); - - let _ = project.check_envelope(managed_envelope); - drop(outcome_aggregator); - - let expected = [ - (DataCategory::Transaction, 1), - (DataCategory::TransactionIndexed, 1), - (DataCategory::Span, 3), - (DataCategory::SpanIndexed, 3), - ]; - - for (expected_category, expected_quantity) in expected { - let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); - assert_eq!(outcome.category, expected_category); - assert_eq!(outcome.quantity, expected_quantity); - } - } -} diff --git a/relay-server/src/services/projects/project/state/fetch_state.rs b/relay-server/src/services/projects/project/state/fetch_state.rs index 020150f303..3a61f838b8 100644 --- a/relay-server/src/services/projects/project/state/fetch_state.rs +++ b/relay-server/src/services/projects/project/state/fetch_state.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use tokio::time::Instant; -use relay_config::Config; use relay_dynamic_config::ProjectConfig; use crate::services::projects::project::state::info::ProjectInfo; @@ -88,47 +87,6 @@ impl ProjectFetchState { matches!(self.state, ProjectState::Pending) } - /// Returns information about the expiry of a project state. - /// - /// If no detailed information is needed, use [`Self::current_state`] instead. - pub fn expiry_state(&self, config: &Config) -> ExpiryState { - match self.check_expiry(config) { - Expiry::Updated => ExpiryState::Updated(&self.state), - Expiry::Stale => ExpiryState::Stale(&self.state), - Expiry::Expired => ExpiryState::Expired, - } - } - - /// Returns the current project state, if it has not yet expired. - pub fn current_state(&self, config: &Config) -> ProjectState { - match self.expiry_state(config) { - ExpiryState::Updated(state) | ExpiryState::Stale(state) => state.clone(), - ExpiryState::Expired => ProjectState::Pending, - } - } - - /// Returns whether this state is outdated and needs to be refetched. - fn check_expiry(&self, config: &Config) -> Expiry { - let Some(last_fetch) = self.last_fetch else { - return Expiry::Expired; - }; - let expiry = match &self.state { - ProjectState::Enabled(info) if info.project_id.is_some() => { - config.project_cache_expiry() - } - _ => config.cache_miss_expiry(), - }; - - let elapsed = last_fetch.elapsed(); - if elapsed >= expiry + config.project_grace_period() { - Expiry::Expired - } else if elapsed >= expiry { - Expiry::Stale - } else { - Expiry::Updated - } - } - /// Returns the revision of the contained project state. /// /// See: [`ProjectState::revision`]. @@ -137,26 +95,8 @@ impl ProjectFetchState { } } -/// Wrapper for a project state, with expiry information. -#[derive(Clone, Copy, Debug)] -pub enum ExpiryState<'a> { - /// An up-to-date project state. See [`Expiry::Updated`]. - Updated(&'a ProjectState), - /// A stale project state that can still be used. See [`Expiry::Stale`]. - Stale(&'a ProjectState), - /// An expired project state that should not be used. See [`Expiry::Expired`]. - Expired, -} - -/// The expiry status of a project state. Return value of [`ProjectFetchState::check_expiry`]. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -enum Expiry { - /// The project state is perfectly up to date. - Updated, - /// The project state is outdated but events depending on this project state can still be - /// processed. The state should be refreshed in the background though. - Stale, - /// The project state is completely outdated and events need to be buffered up until the new - /// state has been fetched. - Expired, +impl From for ProjectState { + fn from(value: ProjectFetchState) -> Self { + value.state + } } diff --git a/relay-server/src/services/projects/project/state/info.rs b/relay-server/src/services/projects/project/state/info.rs index eab9b8fb62..b389edc329 100644 --- a/relay-server/src/services/projects/project/state/info.rs +++ b/relay-server/src/services/projects/project/state/info.rs @@ -16,7 +16,6 @@ use url::Url; use crate::envelope::Envelope; use crate::extractors::RequestMeta; use crate::services::outcome::DiscardReason; -use crate::services::projects::project::PublicKeyConfig; /// Information about an enabled project. /// @@ -248,3 +247,15 @@ impl ProjectInfo { self.config.features.has(feature) } } + +/// Represents a public key received from the projectconfig endpoint. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PublicKeyConfig { + /// Public part of key (random hash). + pub public_key: ProjectKey, + + /// The primary key of the DSN in Sentry's main database. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub numeric_id: Option, +} diff --git a/relay-server/src/services/projects/project/state/mod.rs b/relay-server/src/services/projects/project/state/mod.rs index 868ad5ed4a..56a9995266 100644 --- a/relay-server/src/services/projects/project/state/mod.rs +++ b/relay-server/src/services/projects/project/state/mod.rs @@ -9,11 +9,11 @@ use relay_quotas::Scoping; mod fetch_state; mod info; -pub use self::fetch_state::{ExpiryState, ProjectFetchState}; +pub use self::fetch_state::ProjectFetchState; pub use self::info::{LimitedProjectInfo, ProjectInfo}; /// Representation of a project's current state. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub enum ProjectState { /// A valid project that is not disabled. Enabled(Arc), @@ -25,6 +25,7 @@ pub enum ProjectState { /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). /// - The upstream returned an unparsable project so we have to try again. /// - The project has expired and must be treated as "has not been fetched". + #[default] Pending, } From 44489a1f4b8ac6f04551a5cf420758902f58bde8 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Mon, 21 Oct 2024 16:57:07 +0200 Subject: [PATCH 03/15] it compiles --- relay-server/src/endpoints/batch_metrics.rs | 3 +- relay-server/src/endpoints/common.rs | 6 +- relay-server/src/service.rs | 48 +++++++++----- relay-server/src/services/buffer/mod.rs | 19 +++--- relay-server/src/services/processor.rs | 65 +++++++++++++------ .../src/services/processor/metrics.rs | 2 +- .../projects/{cache2 => cache}/handle.rs | 14 ++-- .../projects/{cache.rs => cache/legacy.rs} | 56 ++-------------- .../projects/{cache2 => cache}/mod.rs | 2 + .../projects/{cache2 => cache}/project.rs | 2 +- .../projects/{cache2 => cache}/service.rs | 62 +++++++++++++++--- .../projects/{cache2 => cache}/state.rs | 8 ++- relay-server/src/services/projects/mod.rs | 1 - relay-server/src/services/spooler/mod.rs | 2 +- relay-server/src/testutils.rs | 2 +- 15 files changed, 170 insertions(+), 122 deletions(-) rename relay-server/src/services/projects/{cache2 => cache}/handle.rs (82%) rename relay-server/src/services/projects/{cache.rs => cache/legacy.rs} (95%) rename relay-server/src/services/projects/{cache2 => cache}/mod.rs (92%) rename relay-server/src/services/projects/{cache2 => cache}/project.rs (99%) rename relay-server/src/services/projects/{cache2 => cache}/service.rs (66%) rename relay-server/src/services/projects/{cache2 => cache}/state.rs (98%) diff --git a/relay-server/src/endpoints/batch_metrics.rs b/relay-server/src/endpoints/batch_metrics.rs index d068e59fde..96aac00bd7 100644 --- a/relay-server/src/endpoints/batch_metrics.rs +++ b/relay-server/src/endpoints/batch_metrics.rs @@ -4,8 +4,7 @@ use serde::{Deserialize, Serialize}; use crate::extractors::{SignedBytes, StartTime}; use crate::service::ServiceState; -use crate::services::processor::ProcessBatchedMetrics; -use crate::services::projects::cache::BucketSource; +use crate::services::processor::{BucketSource, ProcessBatchedMetrics}; #[derive(Debug, Serialize, Deserialize)] struct SendMetricsResponse {} diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index e7a86e1d0d..19f0ee6125 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -13,7 +13,7 @@ use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::{MetricData, ProcessMetrics, ProcessingGroup}; -use crate::services::projects::cache::ValidateEnvelope; +use crate::services::projects::cache::legacy::ValidateEnvelope; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -312,7 +312,9 @@ fn queue_envelope( } None => { relay_log::trace!("Sending envelope to project cache for V1 buffer"); - state.project_cache().send(ValidateEnvelope::new(envelope)); + state + .legacy_project_cache() + .send(ValidateEnvelope::new(envelope)); } } } diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index 47b31a815e..a068ec1689 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -11,8 +11,10 @@ use crate::services::metrics::{Aggregator, RouterService}; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::projects::cache::{ProjectCache, ProjectCacheService, Services}; -use crate::services::projects::cache2::ProjectCacheHandle; +use crate::services::projects::cache::{ + legacy, ProjectCache, ProjectCacheHandle, ProjectCacheService, +}; +use crate::services::projects::source::ProjectSource; use crate::services::relays::{RelayCache, RelayCacheService}; use crate::services::stats::RelayStats; #[cfg(feature = "processing")] @@ -62,6 +64,7 @@ pub struct Registry { pub test_store: Addr, pub relay_cache: Addr, pub global_config: Addr, + pub legacy_project_cache: Addr, pub project_cache: Addr, pub upstream_relay: Addr, pub envelope_buffer: Option, @@ -189,12 +192,23 @@ impl ServiceState { // service fail if the service is not running. let global_config = global_config.start(); - let (project_cache, project_cache_rx) = channel(ProjectCacheService::name()); + let (legacy_project_cache, legacy_project_cache_rx) = + channel(legacy::ProjectCacheService::name()); + + let project_source = ProjectSource::start( + Arc::clone(&config), + upstream_relay.clone(), + redis_pools + .as_ref() + .map(|pools| pools.project_configs.clone()), + ); + let (project_cache, project_cache_handle) = + ProjectCacheService::new(Arc::clone(&config), project_source).start(); let aggregator = RouterService::new( config.default_aggregator_config().clone(), config.secondary_aggregator_configs().clone(), - Some(project_cache.clone().recipient()), + Some(legacy_project_cache.clone().recipient()), ); let aggregator_handle = aggregator.handle(); let aggregator = aggregator.start(); @@ -229,11 +243,12 @@ impl ServiceState { create_processor_pool(&config)?, config.clone(), global_config_handle, + project_cache_handle.clone(), cogs, #[cfg(feature = "processing")] redis_pools.clone(), processor::Addrs { - project_cache: project_cache.clone(), + legacy_project_cache: legacy_project_cache.clone(), outcome_aggregator: outcome_aggregator.clone(), upstream_relay: upstream_relay.clone(), test_store: test_store.clone(), @@ -252,7 +267,7 @@ impl ServiceState { global_config_rx.clone(), buffer::Services { envelopes_tx, - project_cache: project_cache.clone(), + project_cache_handle: project_cache_handle.clone(), outcome_aggregator: outcome_aggregator.clone(), test_store: test_store.clone(), }, @@ -260,27 +275,24 @@ impl ServiceState { .map(|b| b.start_observable()); // Keep all the services in one context. - let project_cache_services = Services { + let project_cache_services = legacy::Services { envelope_buffer: envelope_buffer.as_ref().map(ObservableEnvelopeBuffer::addr), aggregator: aggregator.clone(), envelope_processor: processor.clone(), outcome_aggregator: outcome_aggregator.clone(), - project_cache: project_cache.clone(), + project_cache: legacy_project_cache.clone(), test_store: test_store.clone(), - upstream_relay: upstream_relay.clone(), }; - ProjectCacheService::new( + legacy::ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), + project_cache_handle.clone(), project_cache_services, global_config_rx, envelopes_rx, - redis_pools - .as_ref() - .map(|pools| pools.project_configs.clone()), ) - .spawn_handler(project_cache_rx); + .spawn_handler(legacy_project_cache_rx); let health_check = HealthCheckService::new( config.clone(), @@ -310,7 +322,9 @@ impl ServiceState { test_store, relay_cache, global_config, + legacy_project_cache, project_cache, + project_cache_handle, upstream_relay, envelope_buffer, }; @@ -343,9 +357,9 @@ impl ServiceState { self.inner.registry.envelope_buffer.as_ref() } - /// Returns the address of the [`ProjectCache`] service. - pub fn project_cache(&self) -> &Addr { - &self.inner.registry.project_cache + /// Returns the address of the [`legacy::ProjectCache`] service. + pub fn legacy_project_cache(&self) -> &Addr { + &self.inner.registry.legacy_project_cache } /// Returns a [`ProjectCacheHandle`]. diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 10221615b8..80c90b04c0 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -21,10 +21,7 @@ use crate::services::outcome::DiscardReason; use crate::services::outcome::Outcome; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::DequeuedEnvelope; - -use crate::services::projects::cache2::ProjectCacheHandle; -use crate::services::projects::cache2::ProjectEvent; +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectEvent}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::ManagedEnvelope; @@ -101,7 +98,7 @@ impl ObservableEnvelopeBuffer { pub struct Services { /// Bounded channel used exclusively to handle backpressure when sending envelopes to the /// project cache. - pub envelopes_tx: mpsc::Sender, + pub envelopes_tx: mpsc::Sender, pub project_cache_handle: ProjectCacheHandle, pub outcome_aggregator: Addr, pub test_store: Addr, @@ -159,7 +156,7 @@ impl EnvelopeBufferService { &mut self, buffer: &PolymorphicEnvelopeBuffer, dequeue: bool, - ) -> Option> { + ) -> Option> { relay_statsd::metric!( counter(RelayCounters::BufferReadyToPop) += 1, status = "checking" @@ -220,7 +217,7 @@ impl EnvelopeBufferService { config: &Config, buffer: &mut PolymorphicEnvelopeBuffer, services: &Services, - envelopes_tx_permit: Permit<'a, DequeuedEnvelope>, + envelopes_tx_permit: Permit<'a, legacy::DequeuedEnvelope>, ) -> Result { let sleep = match buffer.peek().await? { Peek::Empty => { @@ -253,7 +250,7 @@ impl EnvelopeBufferService { .pop() .await? .expect("Element disappeared despite exclusive excess"); - envelopes_tx_permit.send(DequeuedEnvelope(envelope)); + envelopes_tx_permit.send(legacy::DequeuedEnvelope(envelope)); Duration::ZERO // try next pop immediately } @@ -430,7 +427,7 @@ impl Service for EnvelopeBufferService { } } } - ProjectEvent::Ready(project_key) = project_events.recv() => { + Ok(ProjectEvent::Ready(project_key)) = project_events.recv() => { Self::handle_message(&mut buffer, EnvelopeBuffer::Ready(project_key)).await; sleep = Duration::ZERO; } @@ -490,8 +487,8 @@ mod tests { struct EnvelopeBufferServiceResult { service: EnvelopeBufferService, global_tx: watch::Sender, - envelopes_rx: mpsc::Receiver, - project_cache_rx: mpsc::UnboundedReceiver, + envelopes_rx: mpsc::Receiver, + project_cache_rx: mpsc::UnboundedReceiver, outcome_aggregator_rx: mpsc::UnboundedReceiver, } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 572c252817..16aad5ac51 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -71,8 +71,7 @@ use crate::services::global_config::GlobalConfigHandle; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; -use crate::services::projects::cache::{BucketSource, ProjectCache}; -use crate::services::projects::cache2::ProjectCacheHandle; +use crate::services::projects::cache::{legacy, ProjectCacheHandle}; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; use crate::services::upstream::{ @@ -757,7 +756,7 @@ struct ProcessEnvelopeState<'a, Group> { /// Currently active cached rate limits of the project this envelope belongs to. #[cfg_attr(not(feature = "processing"), expect(dead_code))] - rate_limits: RateLimits, + rate_limits: Arc, /// The config of this Relay instance. config: Arc, @@ -981,6 +980,32 @@ pub struct ProcessBatchedMetrics { pub sent_at: Option>, } +/// Source information where a metric bucket originates from. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum BucketSource { + /// The metric bucket originated from an internal Relay use case. + /// + /// The metric bucket originates either from within the same Relay + /// or was accepted coming from another Relay which is registered as + /// an internal Relay via Relay's configuration. + Internal, + /// The bucket source originated from an untrusted source. + /// + /// Managed Relays sending extracted metrics are considered external, + /// it's a project use case but it comes from an untrusted source. + External, +} + +impl From<&RequestMeta> for BucketSource { + fn from(value: &RequestMeta) -> Self { + if value.is_from_internal_relay() { + Self::Internal + } else { + Self::External + } + } +} + /// Metric buckets with additional project. #[derive(Debug, Clone)] pub struct ProjectMetrics { @@ -1099,7 +1124,7 @@ pub struct EnvelopeProcessorService { /// Contains the addresses of services that the processor publishes to. pub struct Addrs { - pub project_cache: Addr, + pub legacy_project_cache: Addr, pub outcome_aggregator: Addr, pub upstream_relay: Addr, pub test_store: Addr, @@ -1111,7 +1136,7 @@ pub struct Addrs { impl Default for Addrs { fn default() -> Self { Addrs { - project_cache: Addr::dummy(), + legacy_project_cache: Addr::dummy(), outcome_aggregator: Addr::dummy(), upstream_relay: Addr::dummy(), test_store: Addr::dummy(), @@ -1126,7 +1151,7 @@ struct InnerProcessor { workers: WorkerGroup, config: Arc, global_config: GlobalConfigHandle, - project_cache_handle: ProjectCacheHandle, + project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] quotas_pool: Option, @@ -1145,6 +1170,7 @@ impl EnvelopeProcessorService { pool: ThreadPool, config: Arc, global_config: GlobalConfigHandle, + project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] redis: Option, addrs: Addrs, @@ -1173,6 +1199,7 @@ impl EnvelopeProcessorService { let inner = InnerProcessor { workers: WorkerGroup::new(pool), global_config, + project_cache, cogs, #[cfg(feature = "processing")] quotas_pool: quotas.clone(), @@ -1238,7 +1265,7 @@ impl EnvelopeProcessorService { mut managed_envelope: TypedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: RateLimits, + rate_limits: Arc, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> ProcessEnvelopeState { @@ -1310,7 +1337,7 @@ impl EnvelopeProcessorService { // Update cached rate limits with the freshly computed ones. if !limits.is_empty() { self.inner - .project_cache_handle + .project_cache .get(state.managed_envelope.scoping().project_key) .rate_limits() .merge(limits); @@ -1838,7 +1865,7 @@ impl EnvelopeProcessorService { mut managed_envelope: ManagedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: RateLimits, + rate_limits: Arc, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> Result { @@ -2096,13 +2123,13 @@ impl EnvelopeProcessorService { true }); - let project = self.inner.project_cache_handle.get(project_key); + let project = self.inner.project_cache.get(project_key); // Best effort check to filter and rate limit buckets, if there is no project state // available at the current time, we will check again after flushing. let buckets = match project.project_state() { ProjectState::Enabled(project_info) => { - let rate_limits = project.cached_rate_limits(); + let rate_limits = project.rate_limits().current_limits(); self.check_buckets(project_key, &project_info, &rate_limits, buckets) } _ => buckets, @@ -2198,7 +2225,7 @@ impl EnvelopeProcessorService { envelope, body, http_encoding, - project_cache: self.inner.addrs.project_cache.clone(), + project_cache: self.inner.project_cache.clone(), })); } Err(error) => { @@ -2352,7 +2379,7 @@ impl EnvelopeProcessorService { ); self.inner - .project_cache_handle + .project_cache .get(item_scoping.scoping.project_key) .rate_limits() .merge(limits); @@ -2430,7 +2457,7 @@ impl EnvelopeProcessorService { if was_enforced { // Update the rate limits in the project cache. self.inner - .project_cache_handle + .project_cache .get(scoping.project_key) .rate_limits() .merge(rate_limits); @@ -2879,7 +2906,7 @@ pub struct SendEnvelope { envelope: TypedEnvelope, body: Bytes, http_encoding: HttpEncoding, - project_cache_handle: ProjectCacheHandle, + project_cache: ProjectCacheHandle, } impl UpstreamRequest for SendEnvelope { @@ -2931,7 +2958,7 @@ impl UpstreamRequest for SendEnvelope { self.envelope.accept(); if let UpstreamRequestError::RateLimited(limits) = error { - self.project_cache_handle + self.project_cache .get(scoping.project_key) .rate_limits() .merge(limits.scope(&scoping)); @@ -3705,7 +3732,7 @@ mod tests { let processor = create_test_processor_with_addrs( config, Addrs { - project_cache, + legacy_project_cache, ..Default::default() }, ); @@ -3749,11 +3776,11 @@ mod tests { processor.handle_process_batched_metrics(&mut token, message); let value = project_cache_rx.recv().await.unwrap(); - let ProjectCache::ProcessMetrics(pm1) = value else { + let legacy::ProjectCache::ProcessMetrics(pm1) = value else { panic!() }; let value = project_cache_rx.recv().await.unwrap(); - let ProjectCache::ProcessMetrics(pm2) = value else { + let legacy::ProjectCache::ProcessMetrics(pm2) = value else { panic!() }; diff --git a/relay-server/src/services/processor/metrics.rs b/relay-server/src/services/processor/metrics.rs index 0d28285e77..703898e425 100644 --- a/relay-server/src/services/processor/metrics.rs +++ b/relay-server/src/services/processor/metrics.rs @@ -5,7 +5,7 @@ use relay_quotas::Scoping; use crate::metrics::MetricOutcomes; use crate::services::outcome::Outcome; -use crate::services::projects::cache::BucketSource; +use crate::services::processor::BucketSource; use crate::services::projects::project::ProjectInfo; /// Checks if the namespace of the passed bucket is valid. diff --git a/relay-server/src/services/projects/cache2/handle.rs b/relay-server/src/services/projects/cache/handle.rs similarity index 82% rename from relay-server/src/services/projects/cache2/handle.rs rename to relay-server/src/services/projects/cache/handle.rs index e7afb43454..39b4b23594 100644 --- a/relay-server/src/services/projects/cache2/handle.rs +++ b/relay-server/src/services/projects/cache/handle.rs @@ -1,4 +1,4 @@ -use core::fmt; +use std::fmt; use std::sync::Arc; use relay_base_schema::project::ProjectKey; @@ -7,15 +7,15 @@ use relay_system::Addr; use tokio::sync::broadcast; use super::state::Shared; -use crate::services::projects::cache2::service::ProjectEvent; -use crate::services::projects::cache2::{Project, ProjectCache}; +use crate::services::projects::cache::service::ProjectEvent; +use crate::services::projects::cache::{Project, ProjectCache}; #[derive(Clone)] pub struct ProjectCacheHandle { - shared: Arc, - config: Arc, - service: Addr, - project_events: broadcast::Sender, + pub(super) shared: Arc, + pub(super) config: Arc, + pub(super) service: Addr, + pub(super) project_events: broadcast::Sender, } impl ProjectCacheHandle { diff --git a/relay-server/src/services/projects/cache.rs b/relay-server/src/services/projects/cache/legacy.rs similarity index 95% rename from relay-server/src/services/projects/cache.rs rename to relay-server/src/services/projects/cache/legacy.rs index 2d99733db1..330ca47bc7 100644 --- a/relay-server/src/services/projects/cache.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -3,19 +3,17 @@ use std::error::Error; use std::sync::Arc; use std::time::Duration; -use crate::extractors::RequestMeta; use crate::services::buffer::{EnvelopeBuffer, EnvelopeBufferError}; use crate::services::global_config; use crate::services::processor::{ EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; -use crate::services::projects::cache2::{CheckedEnvelope, ProjectCacheHandle, ProjectEvent}; +use crate::services::projects::cache::{CheckedEnvelope, ProjectCacheHandle, ProjectEvent}; use crate::Envelope; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; use relay_metrics::Bucket; -use relay_redis::RedisPool; use relay_statsd::metric; use relay_system::{Addr, FromMessage, Interface, Service}; use tokio::sync::{mpsc, watch}; @@ -28,7 +26,6 @@ use crate::services::spooler::{ UnspooledEnvelope, BATCH_KEY_COUNT, }; use crate::services::test_store::TestStore; -use crate::services::upstream::UpstreamRelay; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; @@ -55,32 +52,6 @@ impl ValidateEnvelope { } } -/// Source information where a metric bucket originates from. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum BucketSource { - /// The metric bucket originated from an internal Relay use case. - /// - /// The metric bucket originates either from within the same Relay - /// or was accepted coming from another Relay which is registered as - /// an internal Relay via Relay's configuration. - Internal, - /// The bucket source originated from an untrusted source. - /// - /// Managed Relays sending extracted metrics are considered external, - /// it's a project use case but it comes from an untrusted source. - External, -} - -impl From<&RequestMeta> for BucketSource { - fn from(value: &RequestMeta) -> Self { - if value.is_from_internal_relay() { - Self::Internal - } else { - Self::External - } - } -} - /// Updates the buffer index for [`ProjectKey`] with the [`QueueKey`] keys. /// /// This message is sent from the project buffer in case of the error while fetching the data from @@ -106,12 +77,6 @@ pub struct RefreshIndexCache(pub HashSet); #[derive(Debug)] pub struct DequeuedEnvelope(pub Box); -/// A request to update a project, typically sent by the envelope buffer. -/// -/// This message is similar to [`GetProjectState`], except it has no `no_cache` option -/// and it does not send a response, but sends a signal back to the buffer instead. -pub struct UpdateProject(pub ProjectKey); - /// A cache for [`ProjectState`]s. /// /// The project maintains information about organizations, projects, and project keys along with @@ -189,7 +154,6 @@ pub struct Services { pub outcome_aggregator: Addr, pub project_cache: Addr, pub test_store: Addr, - pub upstream_relay: Addr, } /// Main broker of the [`ProjectCacheService`]. @@ -704,12 +668,11 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, - project_cache: ProjectCacheHandle, + project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, /// Bounded channel used exclusively to receive envelopes from the envelope buffer. envelopes_rx: mpsc::Receiver, - redis: Option, } impl ProjectCacheService { @@ -717,20 +680,18 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, - project_cache: ProjectCacheHandle, + project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, envelopes_rx: mpsc::Receiver, - redis: Option, ) -> Self { Self { config, memory_checker, - project_cache, + project_cache_handle, services, global_config_rx, envelopes_rx, - redis, } } } @@ -742,13 +703,12 @@ impl Service for ProjectCacheService { let Self { config, memory_checker, - project_cache, + project_cache_handle, services, mut global_config_rx, mut envelopes_rx, - redis, } = self; - let project_events = project_cache.events(); + let project_events = project_cache_handle.events(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); @@ -833,7 +793,7 @@ impl Service for ProjectCacheService { } }) }, - project_event = project_events.recv() => { + Ok(project_event) = project_events.recv() => { metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_project_event", { broker.handle_project_event(project_event); }) @@ -907,7 +867,6 @@ mod tests { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); - let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); Services { envelope_buffer: None, @@ -916,7 +875,6 @@ mod tests { project_cache, outcome_aggregator, test_store, - upstream_relay, } } diff --git a/relay-server/src/services/projects/cache2/mod.rs b/relay-server/src/services/projects/cache/mod.rs similarity index 92% rename from relay-server/src/services/projects/cache2/mod.rs rename to relay-server/src/services/projects/cache/mod.rs index 62d55aff70..7d2b1d1afa 100644 --- a/relay-server/src/services/projects/cache2/mod.rs +++ b/relay-server/src/services/projects/cache/mod.rs @@ -3,6 +3,8 @@ mod project; mod service; mod state; +pub mod legacy; + pub use self::handle::ProjectCacheHandle; pub use self::project::{CheckedEnvelope, Project}; pub use self::service::{ProjectCache, ProjectCacheService, ProjectEvent}; diff --git a/relay-server/src/services/projects/cache2/project.rs b/relay-server/src/services/projects/cache/project.rs similarity index 99% rename from relay-server/src/services/projects/cache2/project.rs rename to relay-server/src/services/projects/cache/project.rs index 2722360954..cdac543789 100644 --- a/relay-server/src/services/projects/cache2/project.rs +++ b/relay-server/src/services/projects/cache/project.rs @@ -7,7 +7,7 @@ use relay_sampling::evaluation::ReservoirCounters; use crate::envelope::ItemType; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::projects::cache2::state::SharedProject; +use crate::services::projects::cache::state::SharedProject; use crate::services::projects::project::ProjectState; use crate::utils::{CheckLimits, Enforcement, EnvelopeLimiter, ManagedEnvelope}; diff --git a/relay-server/src/services/projects/cache2/service.rs b/relay-server/src/services/projects/cache/service.rs similarity index 66% rename from relay-server/src/services/projects/cache2/service.rs rename to relay-server/src/services/projects/cache/service.rs index 1115a8feeb..c34879ebb0 100644 --- a/relay-server/src/services/projects/cache2/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -2,13 +2,25 @@ use std::sync::Arc; use relay_base_schema::project::ProjectKey; use relay_config::Config; +use relay_system::Service; use tokio::sync::{broadcast, mpsc}; -use crate::services::buffer::EnvelopeBuffer; -use crate::services::projects::cache2::state::{CompletedFetch, Fetch}; +use crate::services::projects::cache::handle::ProjectCacheHandle; +use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; use crate::services::projects::project::{ProjectFetchState, ProjectState}; use crate::services::projects::source::ProjectSource; +/// Size of the broadcast channel for project events. +/// +/// This is set to a value which theoretically should never be reachable, +/// the number of events is approximately bounded by the amount of projects +/// receiving events. +/// +/// It is set to such a large amount because receivers of events currently +/// do not deal with lags in the channel gracefuly. +const PROJECT_EVENTS_CHANNEL_SIZE: usize = 512_000; + +#[derive(Debug)] pub enum ProjectCache { Fetch(ProjectKey), } @@ -23,18 +35,17 @@ impl relay_system::FromMessage for ProjectCache { } } +#[derive(Debug, Copy, Clone)] pub enum ProjectEvent { Ready(ProjectKey), Evicted(ProjectKey), } pub struct ProjectCacheService { - store: super::state::ProjectStore, + store: ProjectStore, source: ProjectSource, config: Arc, - buffer: relay_system::Addr, - project_update_rx: mpsc::UnboundedReceiver, project_update_tx: mpsc::UnboundedSender, @@ -42,6 +53,35 @@ pub struct ProjectCacheService { } impl ProjectCacheService { + pub fn new(config: Arc, source: ProjectSource) -> Self { + let (project_update_tx, project_update_rx) = mpsc::unbounded_channel(); + let project_events_tx = broadcast::channel(256_000).0; + + Self { + store: ProjectStore::default(), + source, + config, + project_update_rx, + project_update_tx, + project_events_tx, + } + } + + pub fn start(self) -> (relay_system::Addr, ProjectCacheHandle) { + let (addr, addr_rx) = relay_system::channel(Self::name()); + + let handle = ProjectCacheHandle { + shared: self.store.shared(), + config: Arc::clone(&self.config), + service: addr.clone(), + project_events: self.project_events_tx.clone(), + }; + + self.spawn_handler(addr_rx); + + (addr, handle) + } + fn schedule_fetch(&self, fetch: Fetch) { let source = self.source.clone(); let project_updates = self.project_update_tx.clone(); @@ -70,6 +110,7 @@ impl ProjectCacheService { } } +/// All [`ProjectCacheService`] message handlers. impl ProjectCacheService { fn handle_fetch(&mut self, project_key: ProjectKey) { if let Some(fetch) = self.store.try_begin_fetch(project_key, &self.config) { @@ -89,20 +130,23 @@ impl ProjectCacheService { return; } - self.project_events_tx + // TODO: no-ops from revision checks should not end up here + let _ = self + .project_events_tx .send(ProjectEvent::Ready(project_key)); } fn handle_evict_stale_projects(&mut self) { let on_evict = |project_key| { - self.project_events_tx + let _ = self + .project_events_tx .send(ProjectEvent::Evicted(project_key)); }; self.store.evict_stale_projects(&self.config, on_evict); } - fn handle(&mut self, message: ProjectCache) { + fn handle_message(&mut self, message: ProjectCache) { match message { ProjectCache::Fetch(project_key) => self.handle_fetch(project_key), } @@ -124,7 +168,7 @@ impl relay_system::Service for ProjectCacheService { self.handle_project_update(update) }, Some(message) = rx.recv() => { - self.handle(message); + self.handle_message(message); }, _ = eviction_ticker.tick() => { self.handle_evict_stale_projects() diff --git a/relay-server/src/services/projects/cache2/state.rs b/relay-server/src/services/projects/cache/state.rs similarity index 98% rename from relay-server/src/services/projects/cache2/state.rs rename to relay-server/src/services/projects/cache/state.rs index 722e20f182..74fc03b72b 100644 --- a/relay-server/src/services/projects/cache2/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -8,7 +8,7 @@ use relay_config::Config; use relay_quotas::{CachedRateLimits, RateLimits}; use relay_sampling::evaluation::ReservoirCounters; -use crate::services::projects::cache2::ProjectCache; +use crate::services::projects::cache::ProjectCache; use crate::services::projects::project::ProjectState; use crate::utils::RetryBackoff; @@ -22,12 +22,17 @@ use crate::utils::RetryBackoff; /// a fetch to create the private state when [`Missing`] is returned. /// This gurnatuees that eventually the project state is populated, but for a undetermined, /// time it is possible that shared state exists without the respective private state. +#[derive(Default)] pub struct ProjectStore { shared: Arc, private: hashbrown::HashMap, } impl ProjectStore { + pub fn shared(&self) -> Arc { + Arc::clone(&self.shared) + } + pub fn get(&mut self, project_key: ProjectKey) -> Option> { let private = self.private.get_mut(&project_key)?; let shared = self.shared.projects.pin().get(&project_key).cloned(); @@ -155,6 +160,7 @@ impl ProjectRef<'_> { } } +#[derive(Default)] pub struct Shared { projects: papaya::HashMap, } diff --git a/relay-server/src/services/projects/mod.rs b/relay-server/src/services/projects/mod.rs index ef8b6fead2..bc8f253eee 100644 --- a/relay-server/src/services/projects/mod.rs +++ b/relay-server/src/services/projects/mod.rs @@ -1,4 +1,3 @@ pub mod cache; -pub mod cache2; pub mod project; pub mod source; diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index 54c2530550..7fc489ebbf 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -55,7 +55,7 @@ use crate::envelope::{Envelope, EnvelopeError}; use crate::extractors::StartTime; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; +use crate::services::projects::cache::legacy::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{ManagedEnvelope, MemoryChecker}; diff --git a/relay-server/src/testutils.rs b/relay-server/src/testutils.rs index 9f4c255d35..358a2a929d 100644 --- a/relay-server/src/testutils.rs +++ b/relay-server/src/testutils.rs @@ -137,7 +137,7 @@ pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { redis_pools, processor::Addrs { outcome_aggregator, - project_cache, + legacy_project_cache: project_cache, upstream_relay, test_store, #[cfg(feature = "processing")] From 6842dbbe43044637910da43c14030bea9cc60453 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 22 Oct 2024 11:13:25 +0200 Subject: [PATCH 04/15] lots of docs, rm projectfetchstate, more cleanups, some working tests --- .../src/services/projects/cache/handle.rs | 19 +- .../src/services/projects/cache/legacy.rs | 21 +- .../src/services/projects/cache/service.rs | 21 +- .../src/services/projects/cache/state.rs | 389 ++++++++++-------- .../projects/project/{state => }/info.rs | 0 .../src/services/projects/project/mod.rs | 133 +++++- .../projects/project/state/fetch_state.rs | 102 ----- .../services/projects/project/state/mod.rs | 127 ------ .../src/services/projects/source/mod.rs | 35 +- .../src/services/projects/source/redis.rs | 2 +- .../src/services/projects/source/upstream.rs | 6 +- 11 files changed, 404 insertions(+), 451 deletions(-) rename relay-server/src/services/projects/project/{state => }/info.rs (100%) delete mode 100644 relay-server/src/services/projects/project/state/fetch_state.rs delete mode 100644 relay-server/src/services/projects/project/state/mod.rs diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs index 39b4b23594..6c77558c66 100644 --- a/relay-server/src/services/projects/cache/handle.rs +++ b/relay-server/src/services/projects/cache/handle.rs @@ -21,23 +21,22 @@ pub struct ProjectCacheHandle { impl ProjectCacheHandle { /// Returns the current project state for the `project_key`. pub fn get(&self, project_key: ProjectKey) -> Project<'_> { - // TODO: maybe we should always trigger a fetch? - // We need a way to continously keep projects updated while at the same time - // let unused projects expire. - // TODO: trigger prefetch for the sampling projects, maybe take a resolver trait which can - // also resolve the sampling project and fetch? Or do it explicit. - let project = match self.shared.get_or_create(project_key) { - Ok(project) => project, - Err(missing) => missing.fetch(&self.service), - }; + let project = self.shared.get_or_create(project_key); + // Always trigger a fetch after retrieving the project to make sure the state is up to date. + self.fetch(project_key); + Project::new(project, &self.config) } + /// Triggers a fetch/update check in the project cache for the supplied project. pub fn fetch(&self, project_key: ProjectKey) { - // TODO: does this make sense? self.service.send(ProjectCache::Fetch(project_key)); } + /// Returns a subscription to all [`ProjectEvent`]'s. + /// + /// This stream notifies the subscriber about project state changes in the project cache. + /// Events may arrive in arbitrary order and be delivered multiple times. pub fn events(&self) -> broadcast::Receiver { self.project_events.subscribe() } diff --git a/relay-server/src/services/projects/cache/legacy.rs b/relay-server/src/services/projects/cache/legacy.rs index 330ca47bc7..5536cf7382 100644 --- a/relay-server/src/services/projects/cache/legacy.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -20,7 +20,7 @@ use tokio::sync::{mpsc, watch}; use crate::services::metrics::{Aggregator, FlushBuckets, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::services::projects::project::{ProjectFetchState, ProjectState}; +use crate::services::projects::project::ProjectState; use crate::services::spooler::{ self, Buffer, BufferService, DequeueMany, Enqueue, QueueKey, RemoveMany, RestoreIndex, UnspooledEnvelope, BATCH_KEY_COUNT, @@ -253,7 +253,7 @@ impl ProjectCacheBroker { } /// Handles the processing of the provided envelope. - fn handle_processing(&mut self, key: QueueKey, mut managed_envelope: ManagedEnvelope) { + fn handle_processing(&mut self, mut managed_envelope: ManagedEnvelope) { let project_key = managed_envelope.envelope().meta().public_key(); let project = self.projects.get(project_key); @@ -373,7 +373,7 @@ impl ProjectCacheBroker { { // TODO: Add ready project infos to the processing message. relay_log::trace!("Sending envelope to processor"); - return self.handle_processing(key, managed_envelope); + return self.handle_processing(managed_envelope); } relay_log::trace!("Enqueueing envelope"); @@ -708,7 +708,7 @@ impl Service for ProjectCacheService { mut global_config_rx, mut envelopes_rx, } = self; - let project_events = project_cache_handle.events(); + let mut project_events = project_cache_handle.events(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); @@ -771,7 +771,7 @@ impl Service for ProjectCacheService { let mut broker = ProjectCacheBroker { config: config.clone(), memory_checker, - projects: todo!(), + projects: project_cache_handle, garbage_disposal: GarbageDisposal::new(), services, spool_v1_unspool_handle: SleepHandle::idle(), @@ -802,7 +802,7 @@ impl Service for ProjectCacheService { // permits in `BufferGuard` available. Currently this is 50%. Some(UnspooledEnvelope { managed_envelope, key }) = buffer_rx.recv() => { metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_processing", { - broker.handle_processing(key, managed_envelope) + broker.handle_processing(managed_envelope) }) }, () = &mut broker.spool_v1_unspool_handle => { @@ -833,16 +833,9 @@ impl Service for ProjectCacheService { #[derive(Debug)] #[allow(dead_code)] // Fields are never read, only used for discarding/dropping data. enum ProjectGarbage { - ProjectFetchState(ProjectFetchState), Metrics(Vec), } -impl From for ProjectGarbage { - fn from(value: ProjectFetchState) -> Self { - Self::ProjectFetchState(value) - } -} - impl From> for ProjectGarbage { fn from(value: Vec) -> Self { Self::Metrics(value) @@ -1037,7 +1030,7 @@ mod tests { // Since there is no project we should not process anything but create a project and spool // the envelope. - broker.handle_processing(key, envelope); + broker.handle_processing(envelope); // Assert that we have a new project and also added an index. assert!(!broker diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index c34879ebb0..d8e22c1d03 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -7,7 +7,7 @@ use tokio::sync::{broadcast, mpsc}; use crate::services::projects::cache::handle::ProjectCacheHandle; use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; -use crate::services::projects::project::{ProjectFetchState, ProjectState}; +use crate::services::projects::project::ProjectState; use crate::services::projects::source::ProjectSource; /// Size of the broadcast channel for project events. @@ -20,8 +20,14 @@ use crate::services::projects::source::ProjectSource; /// do not deal with lags in the channel gracefuly. const PROJECT_EVENTS_CHANNEL_SIZE: usize = 512_000; +/// A cache for projects, which allows concurrent access to the cached projects. #[derive(Debug)] pub enum ProjectCache { + /// Schedules another fetch or update for the specified project. + /// + /// A project which is not fetched will eventually expire and be evicted + /// from the cache. Fetches for an already cached project ensure the project + /// is always up to date and not evicted. Fetch(ProjectKey), } @@ -41,6 +47,7 @@ pub enum ProjectEvent { Evicted(ProjectKey), } +/// A service implementing the [`ProjectCache`] interface. pub struct ProjectCacheService { store: ProjectStore, source: ProjectSource, @@ -53,9 +60,10 @@ pub struct ProjectCacheService { } impl ProjectCacheService { + /// Creates a new [`ProjectCacheService`]. pub fn new(config: Arc, source: ProjectSource) -> Self { let (project_update_tx, project_update_rx) = mpsc::unbounded_channel(); - let project_events_tx = broadcast::channel(256_000).0; + let project_events_tx = broadcast::channel(PROJECT_EVENTS_CHANNEL_SIZE).0; Self { store: ProjectStore::default(), @@ -67,6 +75,10 @@ impl ProjectCacheService { } } + /// Consumes and starts a [`ProjectCacheService`]. + /// + /// Returns a [`relay_system::Addr`] to communicate with the cache and a [`ProjectCacheHandle`] + /// to access the cache concurrently. pub fn start(self) -> (relay_system::Addr, ProjectCacheHandle) { let (addr, addr_rx) = relay_system::channel(Self::name()); @@ -82,6 +94,7 @@ impl ProjectCacheService { (addr, handle) } + /// Schedules a new [`Fetch`] and delivers the result to the [`Self::project_update_tx`] channel. fn schedule_fetch(&self, fetch: Fetch) { let source = self.source.clone(); let project_updates = self.project_update_tx.clone(); @@ -91,7 +104,7 @@ impl ProjectCacheService { // TODO: cached state for delta fetches, maybe this should just be a revision? let state = match source - .fetch(fetch.project_key(), false, ProjectFetchState::pending()) + .fetch(fetch.project_key(), false, ProjectState::Pending) .await { // TODO: verify if the sanitized here is correct @@ -105,7 +118,7 @@ impl ProjectCacheService { } }; - project_updates.send(fetch.complete(state)); + let _ = project_updates.send(fetch.complete(state)); }); } } diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index 74fc03b72b..cd5f92220d 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -5,10 +5,9 @@ use std::time::Instant; use arc_swap::ArcSwap; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use relay_quotas::{CachedRateLimits, RateLimits}; +use relay_quotas::CachedRateLimits; use relay_sampling::evaluation::ReservoirCounters; -use crate::services::projects::cache::ProjectCache; use crate::services::projects::project::ProjectState; use crate::utils::RetryBackoff; @@ -24,34 +23,32 @@ use crate::utils::RetryBackoff; /// time it is possible that shared state exists without the respective private state. #[derive(Default)] pub struct ProjectStore { + /// The shared state, which can be accessed concurrently. shared: Arc, + /// The private, mutably exclusve state, used to maintain the project state. private: hashbrown::HashMap, } impl ProjectStore { + /// Retrieves a [`Shared`] handle which can be freely shared with multiple consumers. pub fn shared(&self) -> Arc { Arc::clone(&self.shared) } - pub fn get(&mut self, project_key: ProjectKey) -> Option> { - let private = self.private.get_mut(&project_key)?; - let shared = self.shared.projects.pin().get(&project_key).cloned(); - debug_assert!( - shared.is_some(), - "there must be a shared project if private state exists" - ); - - Some(ProjectRef { - private, - shared: shared?, - }) - } - + /// Tries to begin a new fetch for the passed `project_key`. + /// + /// Returns `None` if no fetch is necessary or there is already a fetch ongoing. + /// A returned [`Fetch`] must be scheduled and completed with [`Fetch::complete`] and + /// [`Self::complete_fetch`]. pub fn try_begin_fetch(&mut self, project_key: ProjectKey, config: &Config) -> Option { self.get_or_create(project_key, config) .try_begin_fetch(config) } + /// Completes a [`CompletedFetch`] started with [`Self::try_begin_fetch`]. + /// + /// Returns a new [`Fetch`] if another fetch must be scheduled. This happens when the fetched + /// [`ProjectState`] is still pending or already deemed expired. #[must_use = "an incomplete fetch must be retried"] pub fn complete_fetch(&mut self, fetch: CompletedFetch, config: &Config) -> Option { // TODO: what if in the meantime the state expired and was evicted? @@ -67,26 +64,24 @@ impl ProjectStore { project.try_begin_fetch(config) } + /// Evicts all stale, expired projects from the cache. + /// + /// Evicted projects are passed to the `on_evict` callback. Returns the total amount of evicted + /// projects. pub fn evict_stale_projects(&mut self, config: &Config, mut on_evict: F) -> usize where F: FnMut(ProjectKey), { let eviction_start = Instant::now(); - let delta = 2 * config.project_cache_expiry() + config.project_grace_period(); - // TODO: what do we do with forever fetching projects, do we fail eventually? let expired = self.private.extract_if(|_, private| { - if private.has_fetch_in_progress() { - return false; - } - - // Invariant: if there is no last successful fetch, - // there must be a fetch currently in progress. - debug_assert!(private.last_non_pending_fetch.is_some()); - - private - .last_non_pending_fetch - .map_or(true, |ts| ts + delta <= eviction_start) + // We only evict projects which have fully expired and are not currently being fetched. + // + // This relies on the fact that a project should never remain in the `pending` state + // for long and is either always being fetched or successfully fetched. + private.last_fetch().map_or(false, |ts| { + ts.check_expiry(eviction_start, config).is_expired() + }) }); let mut evicted = 0; @@ -111,11 +106,17 @@ impl ProjectStore { evicted } + /// Get a reference to the current project or create a new project. + /// + /// For internal use only, a created project must always be fetched immeditately. fn get_or_create(&mut self, project_key: ProjectKey, config: &Config) -> ProjectRef<'_> { #[cfg(debug_assertions)] if self.private.contains_key(&project_key) { // We have exclusive access to the private part, there are no concurrent deletions // hence when if we have a private state there must always be a shared state as well. + // + // The opposite is not true, the shared state may have been created concurrently + // through the shared access. debug_assert!(self.shared.projects.pin().contains_key(&project_key)); } @@ -135,49 +136,18 @@ impl ProjectStore { } } -pub struct ProjectRef<'a> { - shared: SharedProjectState, - private: &'a mut PrivateProjectState, -} - -impl ProjectRef<'_> { - pub fn merge_rate_limits(&mut self, rate_limits: RateLimits) { - self.shared.merge_rate_limits(rate_limits) - } - - fn try_begin_fetch(&mut self, config: &Config) -> Option { - self.private.try_begin_fetch(config) - } - - fn complete_fetch(&mut self, fetch: CompletedFetch) { - self.private.complete_fetch(&fetch); - - // Keep the old state around if the current fetch is pending. - // It may still be useful to callers. - if !fetch.project_state.is_pending() { - self.shared.set_project_state(fetch.project_state); - } - } -} - +/// The shared and concurrently accessible handle to the project cache. #[derive(Default)] pub struct Shared { projects: papaya::HashMap, } impl Shared { - // pub fn get(&self, project_key: ProjectKey) -> Option { - // self.projects - // .pin() - // .get(&project_key) - // .map(|v| v.state.load().as_ref().clone()) - // } - - /// Returns the existing project state or creates a new one and returns `Err([`Missing`])`. + /// Returns the existing project state or creates a new one. /// - /// The returned [`Missing`] value must be used to trigger a fetch for this project - /// or it will stay pending forever. - pub fn get_or_create(&self, project_key: ProjectKey) -> Result { + /// The caller must ensure that the project cache is instructed to + /// [`super::ProjectCache::Fetch`] the retrieved project. + pub fn get_or_create(&self, project_key: ProjectKey) -> SharedProject { // TODO: do we need to check for expiry here? // TODO: if yes, we need to include the timestamp in the shared project state. // TODO: grace periods? @@ -185,16 +155,13 @@ impl Shared { // The fast path, we expect the project to exist. let projects = self.projects.pin(); if let Some(project) = projects.get(&project_key) { - return Ok(project.to_shared_project()); + return project.to_shared_project(); } // The slow path, try to attempt to insert, somebody else may have been faster, but that's okay. match projects.try_insert(project_key, Default::default()) { - Ok(inserted) => Err(Missing { - project_key, - shared_project: inserted.to_shared_project(), - }), - Err(occupied) => Ok(occupied.current.to_shared_project()), + Ok(inserted) => inserted.to_shared_project(), + Err(occupied) => occupied.current.to_shared_project(), } } } @@ -207,43 +174,115 @@ impl fmt::Debug for Shared { } } -#[must_use = "a missing project must be fetched"] -pub struct Missing { - project_key: ProjectKey, - shared_project: SharedProject, -} - -impl Missing { - pub fn fetch(self, project_cache: &relay_system::Addr) -> SharedProject { - project_cache.send(ProjectCache::Fetch(self.project_key)); - self.shared_project - } -} - +/// A single project from the [`Shared`] project cache. pub struct SharedProject(Arc); impl SharedProject { + /// Returns a reference to the contained [`ProjectState`]. pub fn project_state(&self) -> &ProjectState { &self.0.state } + /// Returns a reference to the contained [`CachedRateLimits`]. pub fn cached_rate_limits(&self) -> &CachedRateLimits { - // TODO: exposing cached rate limits may be a bad idea, this allows mutation + // Exposing cached rate limits may be a bad idea, this allows mutation // and caching of rate limits for pending projects, which may or may not be fine. + // Although, for now this is fine. // // Read only access is easily achievable if we return only the current rate limits. &self.0.rate_limits } + /// Returns a reference to the contained [`ReservoirCounters`]. pub fn reservoir_counters(&self) -> &ReservoirCounters { &self.0.reservoir_counters } } +/// Reference to a full project wrapping shared and private state. +struct ProjectRef<'a> { + shared: SharedProjectState, + private: &'a mut PrivateProjectState, +} + +impl ProjectRef<'_> { + fn try_begin_fetch(&mut self, config: &Config) -> Option { + let now = Instant::now(); + self.private.try_begin_fetch(now, config) + } + + fn complete_fetch(&mut self, fetch: CompletedFetch) { + self.private.complete_fetch(&fetch); + + // Keep the old state around if the current fetch is pending. + // It may still be useful to callers. + if !fetch.project_state.is_pending() { + self.shared.set_project_state(fetch.project_state); + } + } +} + +/// A [`Fetch`] token. +/// +/// When returned it must be executed and completed using [`Self::complete`]. +#[must_use = "a fetch must be executed"] +#[derive(Debug)] +pub struct Fetch { + project_key: ProjectKey, + when: Instant, +} + +impl Fetch { + /// Returns the [`ProjectKey`] of the project to fetch. + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + /// Returns when the the fetch for the project should be scheduled. + /// + /// This can be now (as soon as possible) or a alter point in time, if the project is currently + /// in a backoff. + pub fn when(&self) -> Instant { + self.when + } + + /// Completes the fetch with a result and returns a [`CompletedFetch`]. + pub fn complete(self, state: ProjectState) -> CompletedFetch { + CompletedFetch { + project_key: self.project_key, + project_state: state, + } + } +} + +/// The result of an executed [`Fetch`]. +#[must_use = "a completed fetch must be acted upon"] +#[derive(Debug)] +pub struct CompletedFetch { + project_key: ProjectKey, + project_state: ProjectState, +} + +impl CompletedFetch { + /// Returns the [`ProjectKey`] of the project which was fetched. + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + /// Returns a reference to the fetched [`ProjectState`]. + pub fn project_state(&self) -> &ProjectState { + &self.project_state + } +} + +/// The state of a project contained in the [`Shared`] project cache. +/// +/// This state is interior mutable and allows updates to the project. #[derive(Debug, Default, Clone)] struct SharedProjectState(Arc>); impl SharedProjectState { + /// Updates the project state. fn set_project_state(&self, state: ProjectState) { let prev = self.0.rcu(|stored| SharedProjectStateInner { state: state.clone(), @@ -271,10 +310,7 @@ impl SharedProjectState { } } - fn merge_rate_limits(&self, rate_limits: RateLimits) { - self.0.load().rate_limits.merge(rate_limits) - } - + /// Transforms this interior mutable handle to an immutable [`SharedProject`]. fn to_shared_project(&self) -> SharedProject { SharedProject(self.0.as_ref().load_full()) } @@ -291,13 +327,39 @@ struct SharedProjectStateInner { reservoir_counters: ReservoirCounters, } +/// Current fetch state for a project. +#[derive(Debug)] +enum FetchState { + /// There is a fetch currently in progress. + InProgress, + /// A successful fetch is pending. + /// + /// This state is essentially only the initial state, a project + /// for the most part should always have a fetch in progress or be + /// in the non-pending state. + Pending { + /// Time when the next fetch should be attempted. + /// + /// `None` means soon as possible. + next_fetch_attempt: Option, + }, + /// There was a successful non-pending fetch, + NonPending { + /// Time when the fetch was completed. + last_fetch: LastFetch, + }, +} + +/// Contains all mutable state necessary to maintain the project cache. struct PrivateProjectState { + /// Project key this state belongs to. project_key: ProjectKey, - current_fetch: Option<()>, - - last_non_pending_fetch: Option, - next_fetch_attempt: Option, + /// The current fetch state. + state: FetchState, + /// The current backoff used for calculating the next fetch attempt. + /// + /// The backoff is reset after a successful, non-pending fetch. backoff: RetryBackoff, } @@ -305,41 +367,50 @@ impl PrivateProjectState { fn new(project_key: ProjectKey, config: &Config) -> Self { Self { project_key, - current_fetch: None, - last_non_pending_fetch: None, - next_fetch_attempt: None, + state: FetchState::Pending { + next_fetch_attempt: None, + }, backoff: RetryBackoff::new(config.http_max_retry_interval()), } } - fn has_fetch_in_progress(&self) -> bool { - self.current_fetch.is_some() - } - - fn try_begin_fetch(&mut self, config: &Config) -> Option { - if self.current_fetch.is_some() { - // Already a fetch in progress. - relay_log::trace!( - project_key = self.project_key.as_str(), - "project fetch skipped, fetch in progress" - ); - return None; + /// Returns the [`LastFetch`] if there is currently no fetch in progress and the project + /// was fetched successfully before. + fn last_fetch(&self) -> Option { + match &self.state { + FetchState::NonPending { last_fetch } => Some(*last_fetch), + _ => None, } + } - if matches!(self.check_expiry(config), Expiry::Updated) { - // The current state is up to date, no need to start another fetch. - relay_log::trace!( - project_key = self.project_key.as_str(), - "project fetch skipped, already up to date" - ); - return None; - } + fn try_begin_fetch(&mut self, now: Instant, config: &Config) -> Option { + let when = match &self.state { + FetchState::InProgress {} => { + relay_log::trace!( + project_key = self.project_key.as_str(), + "project fetch skipped, fetch in progress" + ); + return None; + } + FetchState::Pending { next_fetch_attempt } => { + // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. + next_fetch_attempt.unwrap_or(now) + } + FetchState::NonPending { last_fetch } => { + if last_fetch.check_expiry(now, config).is_updated() { + // The current state is up to date, no need to start another fetch. + relay_log::trace!( + project_key = self.project_key.as_str(), + "project fetch skipped, already up to date" + ); + return None; + } + now + } + }; // Mark a current fetch in progress. - self.current_fetch.insert(()); - - // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. - let when = self.next_fetch_attempt.take().unwrap_or_else(Instant::now); + self.state = FetchState::InProgress {}; relay_log::debug!( project_key = &self.project_key.as_str(), @@ -355,32 +426,33 @@ impl PrivateProjectState { } fn complete_fetch(&mut self, fetch: &CompletedFetch) { + debug_assert!( + matches!(self.state, FetchState::InProgress), + "fetch completed while there was no current fetch registered" + ); + if fetch.project_state.is_pending() { - self.next_fetch_attempt = Instant::now().checked_add(self.backoff.next_backoff()); + self.state = FetchState::Pending { + next_fetch_attempt: Instant::now().checked_add(self.backoff.next_backoff()), + }; } else { - debug_assert!( - self.next_fetch_attempt.is_none(), - "the scheduled fetch should have cleared the next attempt" - ); - self.next_fetch_attempt = None; self.backoff.reset(); - self.last_non_pending_fetch = Some(Instant::now()); + self.state = FetchState::NonPending { + last_fetch: LastFetch(Instant::now()), + }; } - - let _current_fetch = self.current_fetch.take(); - debug_assert!( - _current_fetch.is_some(), - "fetch completed while there was no current fetch registered" - ); } +} - fn check_expiry(&self, config: &Config) -> Expiry { - let Some(last_fetch) = self.last_non_pending_fetch else { - return Expiry::Expired; - }; +/// New type containing the last successful fetch time as an [`Instant`]. +#[derive(Debug, Copy, Clone)] +struct LastFetch(Instant); +impl LastFetch { + /// Returns the [`Expiry`] of the last fetch in relation to `now`. + fn check_expiry(&self, now: Instant, config: &Config) -> Expiry { let expiry = config.project_cache_expiry(); - let elapsed = last_fetch.elapsed(); + let elapsed = now.saturating_duration_since(self.0); if elapsed >= expiry + config.project_grace_period() { Expiry::Expired @@ -392,6 +464,7 @@ impl PrivateProjectState { } } +/// Expiry state of a project. #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] enum Expiry { /// The project state is perfectly up to date. @@ -404,40 +477,14 @@ enum Expiry { Expired, } -#[derive(Debug)] -pub struct Fetch { - project_key: ProjectKey, - when: Instant, -} - -impl Fetch { - pub fn project_key(&self) -> ProjectKey { - self.project_key - } - - pub fn when(&self) -> Instant { - self.when +impl Expiry { + /// Returns `true` if the project is uo to date and does not need to be fetched. + fn is_updated(&self) -> bool { + matches!(self, Self::Updated) } - pub fn complete(self, state: ProjectState) -> CompletedFetch { - CompletedFetch { - project_key: self.project_key, - project_state: state, - } - } -} - -pub struct CompletedFetch { - project_key: ProjectKey, - project_state: ProjectState, -} - -impl CompletedFetch { - pub fn project_key(&self) -> ProjectKey { - self.project_key - } - - pub fn project_state(&self) -> &ProjectState { - &self.project_state + /// Returns `true` if the project is expired and can be evicted. + fn is_expired(&self) -> bool { + matches!(self, Self::Expired) } } diff --git a/relay-server/src/services/projects/project/state/info.rs b/relay-server/src/services/projects/project/info.rs similarity index 100% rename from relay-server/src/services/projects/project/state/info.rs rename to relay-server/src/services/projects/project/info.rs diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index 040a8d14a6..1dad67fc12 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -1,5 +1,130 @@ -pub mod state; +//! Types that represent the current project state. +use std::sync::Arc; -pub use state::{ - LimitedParsedProjectState, ParsedProjectState, ProjectFetchState, ProjectInfo, ProjectState, -}; +use serde::{Deserialize, Serialize}; + +use relay_base_schema::project::ProjectKey; +use relay_quotas::Scoping; + +mod info; + +pub use self::info::{LimitedProjectInfo, ProjectInfo}; + +/// Representation of a project's current state. +#[derive(Clone, Debug, Default)] +pub enum ProjectState { + /// A valid project that is not disabled. + Enabled(Arc), + /// A project that was marked as "gone" by the upstream. This variant does not expose + /// any other project information. + Disabled, + /// A project to which one of the following conditions apply: + /// - The project has not yet been fetched. + /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). + /// - The upstream returned an unparsable project so we have to try again. + /// - The project has expired and must be treated as "has not been fetched". + #[default] + Pending, +} + +impl ProjectState { + /// Project state for an unknown but allowed project. + /// + /// This state is used for forwarding in Proxy mode. + pub fn new_allowed() -> Self { + Self::Enabled(Arc::new(ProjectInfo { + project_id: None, + last_change: None, + rev: None, + public_keys: Default::default(), + slug: None, + config: Default::default(), + organization_id: None, + })) + } + + /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). + pub fn sanitized(self) -> Self { + match self { + ProjectState::Enabled(state) => { + ProjectState::Enabled(Arc::new(state.as_ref().clone().sanitized())) + } + ProjectState::Disabled => ProjectState::Disabled, + ProjectState::Pending => ProjectState::Pending, + } + } + + /// Whether or not this state is pending. + pub fn is_pending(&self) -> bool { + matches!(self, ProjectState::Pending) + } + + /// Utility function that returns the project config if enabled. + pub fn enabled(self) -> Option> { + match self { + ProjectState::Enabled(info) => Some(info), + ProjectState::Disabled | ProjectState::Pending => None, + } + } + + /// Returns the revision of the contained project info. + /// + /// `None` if the revision is missing or not available. + pub fn revision(&self) -> Option<&str> { + match &self { + ProjectState::Enabled(info) => info.rev.as_deref(), + ProjectState::Disabled => None, + ProjectState::Pending => None, + } + } + + /// Creates `Scoping` for this project if the state is loaded. + /// + /// Returns `Some` if the project state has been fetched and contains a project identifier, + /// otherwise `None`. + pub fn scoping(&self, project_key: ProjectKey) -> Option { + match self { + Self::Enabled(info) => info.scoping(project_key), + _ => None, + } + } +} + +impl From for ProjectState { + fn from(value: ParsedProjectState) -> Self { + let ParsedProjectState { disabled, info } = value; + match disabled { + true => ProjectState::Disabled, + false => ProjectState::Enabled(Arc::new(info)), + } + } +} + +/// Project state as used in serialization / deserialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParsedProjectState { + /// Whether the project state is disabled. + #[serde(default)] + pub disabled: bool, + /// Project info. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(flatten)] + pub info: ProjectInfo, +} + +/// Limited project state for external Relays. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] +pub struct LimitedParsedProjectState { + /// Whether the project state is disabled. + pub disabled: bool, + /// Limited project info for external Relays. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(with = "LimitedProjectInfo")] + #[serde(flatten)] + pub info: ProjectInfo, +} diff --git a/relay-server/src/services/projects/project/state/fetch_state.rs b/relay-server/src/services/projects/project/state/fetch_state.rs deleted file mode 100644 index 3a61f838b8..0000000000 --- a/relay-server/src/services/projects/project/state/fetch_state.rs +++ /dev/null @@ -1,102 +0,0 @@ -use std::sync::Arc; - -use tokio::time::Instant; - -use relay_dynamic_config::ProjectConfig; - -use crate::services::projects::project::state::info::ProjectInfo; -use crate::services::projects::project::ProjectState; - -/// Hides a cached project state and only exposes it if it has not expired. -#[derive(Clone, Debug)] -pub struct ProjectFetchState { - /// The time at which this project state was last updated. - last_fetch: Option, - state: ProjectState, -} - -impl ProjectFetchState { - /// Takes a [`ProjectState`] and sets it's last fetch to the current time. - pub fn new(state: ProjectState) -> Self { - Self { - last_fetch: Some(Instant::now()), - state, - } - } - - /// Refreshes the expiry of the fetch state. - pub fn refresh(old: ProjectFetchState) -> Self { - Self { - last_fetch: Some(Instant::now()), - state: old.state, - } - } - - /// Project state for an unknown but allowed project. - /// - /// This state is used for forwarding in Proxy mode. - pub fn allowed() -> Self { - Self::enabled(ProjectInfo { - project_id: None, - last_change: None, - rev: None, - public_keys: Default::default(), - slug: None, - config: ProjectConfig::default(), - organization_id: None, - }) - } - - /// An enabled project state created from a project info. - pub fn enabled(project_info: ProjectInfo) -> Self { - Self::new(ProjectState::Enabled(Arc::new(project_info))) - } - - // Returns a disabled state. - pub fn disabled() -> Self { - Self::new(ProjectState::Disabled) - } - - /// Returns a pending or invalid state. - pub fn pending() -> Self { - Self::new(ProjectState::Pending) - } - - /// Create a config that immediately counts as expired. - /// - /// This is what [`Project`](crate::services::projects::project::Project) initializes itself with. - pub fn expired() -> Self { - Self { - // Make sure the state immediately qualifies as expired: - last_fetch: None, - state: ProjectState::Pending, - } - } - - /// Sanitizes the contained project state. See [`ProjectState::sanitized`]. - pub fn sanitized(self) -> Self { - let Self { last_fetch, state } = self; - Self { - last_fetch, - state: state.sanitized(), - } - } - - /// Returns `true` if the contained state is pending. - pub fn is_pending(&self) -> bool { - matches!(self.state, ProjectState::Pending) - } - - /// Returns the revision of the contained project state. - /// - /// See: [`ProjectState::revision`]. - pub fn revision(&self) -> Option<&str> { - self.state.revision() - } -} - -impl From for ProjectState { - fn from(value: ProjectFetchState) -> Self { - value.state - } -} diff --git a/relay-server/src/services/projects/project/state/mod.rs b/relay-server/src/services/projects/project/state/mod.rs deleted file mode 100644 index 56a9995266..0000000000 --- a/relay-server/src/services/projects/project/state/mod.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! Types that represent the current project state. -use std::sync::Arc; - -use serde::{Deserialize, Serialize}; - -use relay_base_schema::project::ProjectKey; -use relay_quotas::Scoping; - -mod fetch_state; -mod info; - -pub use self::fetch_state::ProjectFetchState; -pub use self::info::{LimitedProjectInfo, ProjectInfo}; - -/// Representation of a project's current state. -#[derive(Clone, Debug, Default)] -pub enum ProjectState { - /// A valid project that is not disabled. - Enabled(Arc), - /// A project that was marked as "gone" by the upstream. This variant does not expose - /// any other project information. - Disabled, - /// A project to which one of the following conditions apply: - /// - The project has not yet been fetched. - /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). - /// - The upstream returned an unparsable project so we have to try again. - /// - The project has expired and must be treated as "has not been fetched". - #[default] - Pending, -} - -impl ProjectState { - /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). - pub fn sanitized(self) -> Self { - match self { - ProjectState::Enabled(state) => { - ProjectState::Enabled(Arc::new(state.as_ref().clone().sanitized())) - } - ProjectState::Disabled => ProjectState::Disabled, - ProjectState::Pending => ProjectState::Pending, - } - } - - /// Whether or not this state is pending. - pub fn is_pending(&self) -> bool { - matches!(self, ProjectState::Pending) - } - - /// Utility function that returns the project config if enabled. - pub fn enabled(self) -> Option> { - match self { - ProjectState::Enabled(info) => Some(info), - ProjectState::Disabled | ProjectState::Pending => None, - } - } - - /// Returns the revision of the contained project info. - /// - /// `None` if the revision is missing or not available. - pub fn revision(&self) -> Option<&str> { - match &self { - ProjectState::Enabled(info) => info.rev.as_deref(), - ProjectState::Disabled => None, - ProjectState::Pending => None, - } - } - - /// Creates `Scoping` for this project if the state is loaded. - /// - /// Returns `Some` if the project state has been fetched and contains a project identifier, - /// otherwise `None`. - pub fn scoping(&self, project_key: ProjectKey) -> Option { - match self { - Self::Enabled(info) => info.scoping(project_key), - _ => None, - } - } -} - -impl From for ProjectState { - fn from(value: ParsedProjectState) -> Self { - let ParsedProjectState { disabled, info } = value; - match disabled { - true => ProjectState::Disabled, - false => ProjectState::Enabled(Arc::new(info)), - } - } -} - -/// Project state as used in serialization / deserialization. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedProjectState { - /// Whether the project state is disabled. - #[serde(default)] - pub disabled: bool, - /// Project info. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(flatten)] - pub info: ProjectInfo, -} - -/// Limited project state for external Relays. -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] -pub struct LimitedParsedProjectState { - /// Whether the project state is disabled. - pub disabled: bool, - /// Limited project info for external Relays. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(with = "LimitedProjectInfo")] - #[serde(flatten)] - pub info: ProjectInfo, -} - -/// Response indicating whether a project state needs to be updated -/// or the upstream does not have a newer version. -#[derive(Debug, Clone)] -pub enum UpstreamProjectState { - /// The upstream sent a [`ProjectState`]. - New(ProjectState), - /// The upstream indicated that there is no newer version of the state available. - NotModified, -} diff --git a/relay-server/src/services/projects/source/mod.rs b/relay-server/src/services/projects/source/mod.rs index 5b208ef3ae..b6eb307607 100644 --- a/relay-server/src/services/projects/source/mod.rs +++ b/relay-server/src/services/projects/source/mod.rs @@ -15,8 +15,7 @@ pub mod local; pub mod redis; pub mod upstream; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::ProjectFetchState; +use crate::services::projects::project::ProjectState; use crate::services::upstream::UpstreamRelay; use self::local::{LocalProjectSource, LocalProjectSourceService}; @@ -95,21 +94,21 @@ impl ProjectSource { self, project_key: ProjectKey, no_cache: bool, - cached_state: ProjectFetchState, - ) -> Result { + cached_state: ProjectState, + ) -> Result { let state_opt = self .local_source .send(FetchOptionalProjectState { project_key }) .await?; if let Some(state) = state_opt { - return Ok(ProjectFetchState::new(state)); + return Ok(state); } match self.config.relay_mode() { - RelayMode::Proxy => return Ok(ProjectFetchState::allowed()), - RelayMode::Static => return Ok(ProjectFetchState::disabled()), - RelayMode::Capture => return Ok(ProjectFetchState::allowed()), + RelayMode::Proxy => return Ok(ProjectState::new_allowed()), + RelayMode::Static => return Ok(ProjectState::Disabled), + RelayMode::Capture => return Ok(ProjectState::new_allowed()), RelayMode::Managed => (), // Proceed with loading the config from redis or upstream } @@ -130,14 +129,12 @@ impl ProjectSource { Ok(UpstreamProjectState::New(state)) => { let state = state.sanitized(); if !state.is_pending() { - return Ok(ProjectFetchState::new(state)); + return Ok(state); } } // Redis reported that we're holding an up-to-date version of the state already, // refresh the state and return the old cached state again. - Ok(UpstreamProjectState::NotModified) => { - return Ok(ProjectFetchState::refresh(cached_state)) - } + Ok(UpstreamProjectState::NotModified) => return Ok(cached_state), Err(error) => { relay_log::error!( error = &error as &dyn std::error::Error, @@ -157,8 +154,8 @@ impl ProjectSource { .await?; match state { - UpstreamProjectState::New(state) => Ok(ProjectFetchState::new(state.sanitized())), - UpstreamProjectState::NotModified => Ok(ProjectFetchState::refresh(cached_state)), + UpstreamProjectState::New(state) => Ok(state.sanitized()), + UpstreamProjectState::NotModified => Ok(cached_state), } } } @@ -208,3 +205,13 @@ impl FetchOptionalProjectState { self.project_key } } + +/// Response indicating whether a project state needs to be updated +/// or the upstream does not have a newer version. +#[derive(Debug, Clone)] +pub enum UpstreamProjectState { + /// The upstream sent a [`ProjectState`]. + New(ProjectState), + /// The upstream indicated that there is no newer version of the state available. + NotModified, +} diff --git a/relay-server/src/services/projects/source/redis.rs b/relay-server/src/services/projects/source/redis.rs index e2a520f4e7..c7b82777dd 100644 --- a/relay-server/src/services/projects/source/redis.rs +++ b/relay-server/src/services/projects/source/redis.rs @@ -5,8 +5,8 @@ use relay_config::Config; use relay_redis::{RedisError, RedisPool}; use relay_statsd::metric; -use crate::services::projects::project::state::UpstreamProjectState; use crate::services::projects::project::{ParsedProjectState, ProjectState}; +use crate::services::projects::source::UpstreamProjectState; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; #[derive(Debug, Clone)] diff --git a/relay-server/src/services/projects/source/upstream.rs b/relay-server/src/services/projects/source/upstream.rs index bcbfd5a279..a42de789dd 100644 --- a/relay-server/src/services/projects/source/upstream.rs +++ b/relay-server/src/services/projects/source/upstream.rs @@ -18,10 +18,8 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::Instant; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::ParsedProjectState; -use crate::services::projects::project::ProjectState; -use crate::services::projects::source::FetchProjectState; +use crate::services::projects::project::{ParsedProjectState, ProjectState}; +use crate::services::projects::source::{FetchProjectState, UpstreamProjectState}; use crate::services::upstream::{ Method, RequestPriority, SendQuery, UpstreamQuery, UpstreamRelay, UpstreamRequestError, }; From 9d3d64a3614038d2e509105db41697746ffea77c Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 22 Oct 2024 12:07:46 +0200 Subject: [PATCH 05/15] implement revisions correctly --- relay-server/src/endpoints/project_configs.rs | 14 +++--- .../src/services/projects/cache/service.rs | 11 ++-- .../src/services/projects/cache/state.rs | 50 +++++++++++++++---- .../src/services/projects/project/info.rs | 36 ++++++++++++- .../src/services/projects/project/mod.rs | 29 +++++------ .../src/services/projects/source/mod.rs | 46 +++++++++-------- .../src/services/projects/source/redis.rs | 20 ++++---- .../src/services/projects/source/upstream.rs | 41 +++++++-------- 8 files changed, 154 insertions(+), 93 deletions(-) diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 50cd859fa1..408527490a 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -15,7 +15,7 @@ use crate::extractors::SignedJson; use crate::service::ServiceState; use crate::services::global_config::{self, StatusResponse}; use crate::services::projects::project::{ - LimitedParsedProjectState, ParsedProjectState, ProjectState, + LimitedParsedProjectState, ParsedProjectState, ProjectState, Revision, }; /// V2 version of this endpoint. @@ -99,7 +99,7 @@ struct GetProjectStatesRequest { /// /// This length of this list if specified must be the same length /// as [`Self::public_keys`], the items are asssociated by index. - revisions: Option>>>, + revisions: Option>>, #[serde(default)] full_config: bool, #[serde(default)] @@ -108,8 +108,8 @@ struct GetProjectStatesRequest { fn into_valid_keys( public_keys: Vec>, - revisions: Option>>>, -) -> impl Iterator)> { + revisions: Option>>, +) -> impl Iterator { let mut revisions = revisions.and_then(|e| e.ok()).unwrap_or_default(); if !revisions.is_empty() && revisions.len() != public_keys.len() { // The downstream sent us a different amount of revisions than project keys, @@ -122,7 +122,9 @@ fn into_valid_keys( ); revisions.clear(); } - let revisions = revisions.into_iter().chain(std::iter::repeat(None)); + let revisions = revisions + .into_iter() + .chain(std::iter::repeat_with(|| Revision::default())); std::iter::zip(public_keys, revisions).filter_map(|(public_key, revision)| { // Skip unparsable public keys. @@ -173,7 +175,7 @@ async fn inner( }; // Only ever omit responses when there was a valid revision in the first place. - if revision.is_some() && project_info.rev == revision { + if project_info.rev == revision { unchanged.push(project_key); continue; } diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index d8e22c1d03..4487803500 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -8,7 +8,7 @@ use tokio::sync::{broadcast, mpsc}; use crate::services::projects::cache::handle::ProjectCacheHandle; use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; use crate::services::projects::project::ProjectState; -use crate::services::projects::source::ProjectSource; +use crate::services::projects::source::{ProjectSource, SourceProjectState}; /// Size of the broadcast channel for project events. /// @@ -104,17 +104,17 @@ impl ProjectCacheService { // TODO: cached state for delta fetches, maybe this should just be a revision? let state = match source - .fetch(fetch.project_key(), false, ProjectState::Pending) + .fetch(fetch.project_key(), false, Default::default()) .await { - // TODO: verify if the sanitized here is correct - Ok(state) => state.sanitized().into(), + Ok(result) => result, Err(err) => { relay_log::error!( error = &err as &dyn std::error::Error, "failed to fetch project state for {fetch:?}" ); - ProjectState::Pending + // Fallback to a pending project on error, we will retry. + ProjectState::Pending.into() } }; @@ -143,7 +143,6 @@ impl ProjectCacheService { return; } - // TODO: no-ops from revision checks should not end up here let _ = self .project_events_tx .send(ProjectEvent::Ready(project_key)); diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index cd5f92220d..74c5bee467 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -2,13 +2,15 @@ use std::fmt; use std::sync::Arc; use std::time::Instant; +use arc_swap::access::Access; use arc_swap::ArcSwap; use relay_base_schema::project::ProjectKey; use relay_config::Config; use relay_quotas::CachedRateLimits; use relay_sampling::evaluation::ReservoirCounters; -use crate::services::projects::project::ProjectState; +use crate::services::projects::project::{ProjectState, Revision}; +use crate::services::projects::source::SourceProjectState; use crate::utils::RetryBackoff; /// The backing storage for a project cache. @@ -208,7 +210,9 @@ struct ProjectRef<'a> { impl ProjectRef<'_> { fn try_begin_fetch(&mut self, config: &Config) -> Option { let now = Instant::now(); - self.private.try_begin_fetch(now, config) + self.private + .try_begin_fetch(now, config) + .map(|fetch| fetch.with_revision(self.shared.revision())) } fn complete_fetch(&mut self, fetch: CompletedFetch) { @@ -216,8 +220,11 @@ impl ProjectRef<'_> { // Keep the old state around if the current fetch is pending. // It may still be useful to callers. - if !fetch.project_state.is_pending() { - self.shared.set_project_state(fetch.project_state); + match fetch.state { + SourceProjectState::New(state) if !state.is_pending() => { + self.shared.set_project_state(state); + } + _ => {} } } } @@ -230,6 +237,7 @@ impl ProjectRef<'_> { pub struct Fetch { project_key: ProjectKey, when: Instant, + revision: Revision, } impl Fetch { @@ -247,12 +255,17 @@ impl Fetch { } /// Completes the fetch with a result and returns a [`CompletedFetch`]. - pub fn complete(self, state: ProjectState) -> CompletedFetch { + pub fn complete(self, state: SourceProjectState) -> CompletedFetch { CompletedFetch { project_key: self.project_key, - project_state: state, + state, } } + + fn with_revision(mut self, revision: Revision) -> Self { + self.revision = revision; + self + } } /// The result of an executed [`Fetch`]. @@ -260,7 +273,7 @@ impl Fetch { #[derive(Debug)] pub struct CompletedFetch { project_key: ProjectKey, - project_state: ProjectState, + state: SourceProjectState, } impl CompletedFetch { @@ -270,8 +283,19 @@ impl CompletedFetch { } /// Returns a reference to the fetched [`ProjectState`]. - pub fn project_state(&self) -> &ProjectState { - &self.project_state + pub fn project_state(&self) -> Option<&ProjectState> { + match &self.state { + SourceProjectState::New(state) => Some(state), + SourceProjectState::NotModified => None, + } + } + + /// Returns `true` if the fetch completed with a pending status. + fn is_pending(&self) -> bool { + match &self.state { + SourceProjectState::New(state) => state.is_pending(), + SourceProjectState::NotModified => false, + } } } @@ -310,6 +334,11 @@ impl SharedProjectState { } } + /// Extracts and clones the revision from the contained project state. + fn revision(&self) -> Revision { + self.0.as_ref().load().state.revision().clone() + } + /// Transforms this interior mutable handle to an immutable [`SharedProject`]. fn to_shared_project(&self) -> SharedProject { SharedProject(self.0.as_ref().load_full()) @@ -422,6 +451,7 @@ impl PrivateProjectState { Some(Fetch { project_key: self.project_key, when, + revision: Revision::default(), }) } @@ -431,7 +461,7 @@ impl PrivateProjectState { "fetch completed while there was no current fetch registered" ); - if fetch.project_state.is_pending() { + if fetch.is_pending() { self.state = FetchState::Pending { next_fetch_attempt: Instant::now().checked_add(self.backoff.next_backoff()), }; diff --git a/relay-server/src/services/projects/project/info.rs b/relay-server/src/services/projects/project/info.rs index b389edc329..aa37e46431 100644 --- a/relay-server/src/services/projects/project/info.rs +++ b/relay-server/src/services/projects/project/info.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use chrono::{DateTime, Utc}; use relay_base_schema::project::{ProjectId, ProjectKey}; @@ -31,7 +33,7 @@ pub struct ProjectInfo { /// are faked locally. pub last_change: Option>, /// The revision id of the project config. - pub rev: Option, + pub rev: Revision, /// Indicates that the project is disabled. /// A container of known public keys in the project. /// @@ -56,7 +58,7 @@ pub struct ProjectInfo { pub struct LimitedProjectInfo { pub project_id: Option, pub last_change: Option>, - pub rev: Option, + pub rev: Revision, pub public_keys: SmallVec<[PublicKeyConfig; 1]>, pub slug: Option, #[serde(with = "LimitedProjectConfig")] @@ -259,3 +261,33 @@ pub struct PublicKeyConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub numeric_id: Option, } + +/// Represents a project info revision. +/// +/// A revision can be missing, a missing revision never compares equal +/// to any other revision. +/// +/// Revisions are internally reference counted and cheap the create. +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Revision(Option>); + +impl Revision { + /// Returns the revision as a string reference. + /// + /// `None` is a revision that does not match any other revision, + /// not even another revision which is represented as `None`. + pub fn as_str(&self) -> Option<&str> { + self.0.as_deref() + } +} + +impl PartialEq for Revision { + fn eq(&self, other: &Self) -> bool { + match (&self.0, &other.0) { + (None, _) => false, + (_, None) => false, + (Some(left), Some(right)) => left == right, + } + } +} diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index 1dad67fc12..f09338071e 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -8,7 +8,7 @@ use relay_quotas::Scoping; mod info; -pub use self::info::{LimitedProjectInfo, ProjectInfo}; +pub use self::info::{LimitedProjectInfo, ProjectInfo, PublicKeyConfig, Revision}; /// Representation of a project's current state. #[derive(Clone, Debug, Default)] @@ -35,7 +35,7 @@ impl ProjectState { Self::Enabled(Arc::new(ProjectInfo { project_id: None, last_change: None, - rev: None, + rev: Default::default(), public_keys: Default::default(), slug: None, config: Default::default(), @@ -46,35 +46,32 @@ impl ProjectState { /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). pub fn sanitized(self) -> Self { match self { - ProjectState::Enabled(state) => { - ProjectState::Enabled(Arc::new(state.as_ref().clone().sanitized())) - } - ProjectState::Disabled => ProjectState::Disabled, - ProjectState::Pending => ProjectState::Pending, + Self::Enabled(state) => Self::Enabled(Arc::new(state.as_ref().clone().sanitized())), + Self::Disabled => Self::Disabled, + Self::Pending => Self::Pending, } } /// Whether or not this state is pending. pub fn is_pending(&self) -> bool { - matches!(self, ProjectState::Pending) + matches!(self, Self::Pending) } /// Utility function that returns the project config if enabled. pub fn enabled(self) -> Option> { match self { - ProjectState::Enabled(info) => Some(info), - ProjectState::Disabled | ProjectState::Pending => None, + Self::Enabled(info) => Some(info), + Self::Disabled | Self::Pending => None, } } /// Returns the revision of the contained project info. /// /// `None` if the revision is missing or not available. - pub fn revision(&self) -> Option<&str> { + pub fn revision(&self) -> Revision { match &self { - ProjectState::Enabled(info) => info.rev.as_deref(), - ProjectState::Disabled => None, - ProjectState::Pending => None, + Self::Enabled(info) => info.rev.clone(), + Self::Disabled | Self::Pending => Revision::default(), } } @@ -94,8 +91,8 @@ impl From for ProjectState { fn from(value: ParsedProjectState) -> Self { let ParsedProjectState { disabled, info } = value; match disabled { - true => ProjectState::Disabled, - false => ProjectState::Enabled(Arc::new(info)), + true => Self::Disabled, + false => Self::Enabled(Arc::new(info)), } } } diff --git a/relay-server/src/services/projects/source/mod.rs b/relay-server/src/services/projects/source/mod.rs index b6eb307607..2dc78a7d46 100644 --- a/relay-server/src/services/projects/source/mod.rs +++ b/relay-server/src/services/projects/source/mod.rs @@ -15,7 +15,7 @@ pub mod local; pub mod redis; pub mod upstream; -use crate::services::projects::project::ProjectState; +use crate::services::projects::project::{ProjectState, Revision}; use crate::services::upstream::UpstreamRelay; use self::local::{LocalProjectSource, LocalProjectSourceService}; @@ -94,47 +94,48 @@ impl ProjectSource { self, project_key: ProjectKey, no_cache: bool, - cached_state: ProjectState, - ) -> Result { + current_revision: Revision, + ) -> Result { let state_opt = self .local_source .send(FetchOptionalProjectState { project_key }) .await?; if let Some(state) = state_opt { - return Ok(state); + return Ok(state.into()); } match self.config.relay_mode() { - RelayMode::Proxy => return Ok(ProjectState::new_allowed()), - RelayMode::Static => return Ok(ProjectState::Disabled), - RelayMode::Capture => return Ok(ProjectState::new_allowed()), + RelayMode::Proxy => return Ok(ProjectState::new_allowed().into()), + RelayMode::Static => return Ok(ProjectState::Disabled.into()), + RelayMode::Capture => return Ok(ProjectState::new_allowed().into()), RelayMode::Managed => (), // Proceed with loading the config from redis or upstream } - let current_revision = cached_state.revision().map(String::from); #[cfg(feature = "processing")] if let Some(redis_source) = self.redis_source { let current_revision = current_revision.clone(); let redis_permit = self.redis_semaphore.acquire().await?; let state_fetch_result = tokio::task::spawn_blocking(move || { - redis_source.get_config_if_changed(project_key, current_revision.as_deref()) + redis_source.get_config_if_changed(project_key, current_revision) }) .await?; drop(redis_permit); match state_fetch_result { // New state fetched from Redis, possibly pending. - Ok(UpstreamProjectState::New(state)) => { + // + // If it is pending, we must fallback to fetching from the upstream. + Ok(SourceProjectState::New(state)) => { let state = state.sanitized(); if !state.is_pending() { - return Ok(state); + return Ok(state.into()); } } // Redis reported that we're holding an up-to-date version of the state already, // refresh the state and return the old cached state again. - Ok(UpstreamProjectState::NotModified) => return Ok(cached_state), + Ok(SourceProjectState::NotModified) => return Ok(SourceProjectState::NotModified), Err(error) => { relay_log::error!( error = &error as &dyn std::error::Error, @@ -153,10 +154,10 @@ impl ProjectSource { }) .await?; - match state { - UpstreamProjectState::New(state) => Ok(state.sanitized()), - UpstreamProjectState::NotModified => Ok(cached_state), - } + Ok(match state { + SourceProjectState::New(state) => SourceProjectState::New(state.sanitized()), + SourceProjectState::NotModified => SourceProjectState::NotModified, + }) } } @@ -186,10 +187,7 @@ pub struct FetchProjectState { /// The upstream is allowed to omit full project configs /// for requests for which the requester already has the most /// recent revision. - /// - /// Settings this to `None` will essentially always re-fetch - /// the project config. - pub current_revision: Option, + pub current_revision: Revision, /// If true, all caches should be skipped and a fresh state should be computed. pub no_cache: bool, @@ -209,9 +207,15 @@ impl FetchOptionalProjectState { /// Response indicating whether a project state needs to be updated /// or the upstream does not have a newer version. #[derive(Debug, Clone)] -pub enum UpstreamProjectState { +pub enum SourceProjectState { /// The upstream sent a [`ProjectState`]. New(ProjectState), /// The upstream indicated that there is no newer version of the state available. NotModified, } + +impl From for SourceProjectState { + fn from(value: ProjectState) -> Self { + Self::New(value) + } +} diff --git a/relay-server/src/services/projects/source/redis.rs b/relay-server/src/services/projects/source/redis.rs index c7b82777dd..7109c408cd 100644 --- a/relay-server/src/services/projects/source/redis.rs +++ b/relay-server/src/services/projects/source/redis.rs @@ -5,8 +5,8 @@ use relay_config::Config; use relay_redis::{RedisError, RedisPool}; use relay_statsd::metric; -use crate::services::projects::project::{ParsedProjectState, ProjectState}; -use crate::services::projects::source::UpstreamProjectState; +use crate::services::projects::project::{ParsedProjectState, ProjectState, Revision}; +use crate::services::projects::source::SourceProjectState; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; #[derive(Debug, Clone)] @@ -60,13 +60,13 @@ impl RedisProjectSource { pub fn get_config_if_changed( &self, key: ProjectKey, - revision: Option<&str>, - ) -> Result { + revision: Revision, + ) -> Result { let mut client = self.redis.client()?; let mut connection = client.connection()?; // Only check for the revision if we were passed a revision. - if let Some(revision) = revision { + if let Some(revision) = revision.as_str() { let current_revision: Option = relay_redis::redis::cmd("GET") .arg(self.get_redis_rev_key(key)) .query(&mut connection) @@ -80,7 +80,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "revision", ); - return Ok(UpstreamProjectState::NotModified); + return Ok(SourceProjectState::NotModified); } } @@ -94,7 +94,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "false" ); - return Ok(UpstreamProjectState::New(ProjectState::Pending)); + return Ok(SourceProjectState::New(ProjectState::Pending)); }; let response = ProjectState::from(parse_redis_response(response.as_slice())?); @@ -106,18 +106,18 @@ impl RedisProjectSource { // // While this is theoretically possible this should always been handled using the above revision // check using the additional Redis key. - if revision.is_some() && response.revision() == revision { + if response.revision() == revision { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config_revision" ); - Ok(UpstreamProjectState::NotModified) + Ok(SourceProjectState::NotModified) } else { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config" ); - Ok(UpstreamProjectState::New(response)) + Ok(SourceProjectState::New(response)) } } diff --git a/relay-server/src/services/projects/source/upstream.rs b/relay-server/src/services/projects/source/upstream.rs index a42de789dd..cab39d4c19 100644 --- a/relay-server/src/services/projects/source/upstream.rs +++ b/relay-server/src/services/projects/source/upstream.rs @@ -18,8 +18,9 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::Instant; +use crate::services::projects::project::Revision; use crate::services::projects::project::{ParsedProjectState, ProjectState}; -use crate::services::projects::source::{FetchProjectState, UpstreamProjectState}; +use crate::services::projects::source::{FetchProjectState, SourceProjectState}; use crate::services::upstream::{ Method, RequestPriority, SendQuery, UpstreamQuery, UpstreamRelay, UpstreamRequestError, }; @@ -39,7 +40,7 @@ pub struct GetProjectStates { /// /// The revisions are mapped by index to the project key, /// this is a separate field to keep the API compatible. - revisions: Vec>, + revisions: Vec, /// If `true` the upstream should return a full configuration. /// /// Upstreams will ignore this for non-internal Relays. @@ -97,10 +98,10 @@ impl UpstreamQuery for GetProjectStates { #[derive(Debug)] struct ProjectStateChannel { // Main broadcast channel. - channel: BroadcastChannel, + channel: BroadcastChannel, // Additional broadcast channels tracked from merge operations. - merged: Vec>, - revision: Option, + merged: Vec>, + revision: Revision, deadline: Instant, no_cache: bool, attempts: u64, @@ -112,8 +113,8 @@ struct ProjectStateChannel { impl ProjectStateChannel { pub fn new( - sender: BroadcastSender, - revision: Option, + sender: BroadcastSender, + revision: Revision, timeout: Duration, no_cache: bool, ) -> Self { @@ -140,18 +141,14 @@ impl ProjectStateChannel { /// If the new revision is different from the contained revision this clears the revision. /// To not have multiple fetches per revision per batch, we need to find a common denominator /// for requests with different revisions, which is always to fetch the full project config. - pub fn attach( - &mut self, - sender: BroadcastSender, - revision: Option, - ) { + pub fn attach(&mut self, sender: BroadcastSender, revision: Revision) { self.channel.attach(sender); if self.revision != revision { - self.revision = None; + self.revision = Revision::default(); } } - pub fn send(self, state: UpstreamProjectState) { + pub fn send(self, state: SourceProjectState) { for channel in self.merged { channel.send(state.clone()); } @@ -177,7 +174,7 @@ impl ProjectStateChannel { self.merged.push(channel); self.merged.extend(merged); if self.revision != revision { - self.revision = None; + self.revision = Revision::default(); } self.deadline = self.deadline.max(deadline); self.no_cache |= no_cache; @@ -196,16 +193,16 @@ type ProjectStateChannels = HashMap; /// Internally it maintains the buffer queue of the incoming requests, which got scheduled to fetch the /// state and takes care of the backoff in case there is a problem with the requests. #[derive(Debug)] -pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); +pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); impl Interface for UpstreamProjectSource {} impl FromMessage for UpstreamProjectSource { - type Response = BroadcastResponse; + type Response = BroadcastResponse; fn from_message( message: FetchProjectState, - sender: BroadcastSender, + sender: BroadcastSender, ) -> Self { Self(message, sender) } @@ -465,7 +462,7 @@ impl UpstreamProjectSourceService { let mut result = "ok"; let state = if response.unchanged.contains(&key) { result = "ok_unchanged"; - UpstreamProjectState::NotModified + SourceProjectState::NotModified } else { let state = response .configs @@ -483,7 +480,7 @@ impl UpstreamProjectSourceService { ErrorBoundary::Ok(Some(state)) => state.into(), }; - UpstreamProjectState::New(state) + SourceProjectState::New(state) }; metric!( @@ -730,8 +727,8 @@ mod tests { .await; let (response1, response2) = futures::future::join(response1, response2).await; - assert!(matches!(response1, Ok(UpstreamProjectState::NotModified))); - assert!(matches!(response2, Ok(UpstreamProjectState::NotModified))); + assert!(matches!(response1, Ok(SourceProjectState::NotModified))); + assert!(matches!(response2, Ok(SourceProjectState::NotModified))); // No more messages to upstream expected. assert!(upstream_rx.try_recv().is_err()); From d9b1d77df7b3d54e88684a4ada1a4a10078d5a68 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Tue, 22 Oct 2024 13:12:27 +0200 Subject: [PATCH 06/15] warnings and metrics --- relay-server/src/endpoints/common.rs | 7 +- relay-server/src/endpoints/project_configs.rs | 2 +- relay-server/src/service.rs | 13 +- relay-server/src/services/processor.rs | 10 +- .../src/services/projects/cache/legacy.rs | 43 ++----- .../src/services/projects/cache/project.rs | 4 +- .../src/services/projects/cache/service.rs | 49 ++++++-- .../src/services/projects/cache/state.rs | 31 +++-- .../src/services/projects/project/mod.rs | 2 +- .../src/services/projects/source/mod.rs | 3 + relay-server/src/services/spooler/mod.rs | 16 +-- relay-server/src/statsd.rs | 51 +++----- relay-server/src/utils/garbage.rs | 113 ------------------ relay-server/src/utils/mod.rs | 2 - 14 files changed, 106 insertions(+), 240 deletions(-) delete mode 100644 relay-server/src/utils/garbage.rs diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 19f0ee6125..13e8aaffe2 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -40,9 +40,6 @@ impl IntoResponse for ServiceUnavailable { /// Error type for all store-like requests. #[derive(Debug, thiserror::Error)] pub enum BadStoreRequest { - #[error("could not schedule event processing")] - ScheduleFailed, - #[error("empty request body")] EmptyBody, @@ -113,7 +110,7 @@ impl IntoResponse for BadStoreRequest { (StatusCode::TOO_MANY_REQUESTS, headers, body).into_response() } - BadStoreRequest::ScheduleFailed | BadStoreRequest::QueueFailed => { + BadStoreRequest::QueueFailed => { // These errors indicate that something's wrong with our service system, most likely // mailbox congestion or a faulty shutdown. Indicate an unavailable service to the // client. It might retry event submission at a later time. @@ -276,7 +273,7 @@ fn queue_envelope( relay_log::trace!("sending metrics into processing queue"); state.processor().send(ProcessMetrics { data: MetricData::Raw(metric_items.into_vec()), - start_time: envelope.meta().start_time().into(), + start_time: envelope.meta().start_time(), sent_at: envelope.sent_at(), project_key: envelope.meta().public_key(), source: envelope.meta().into(), diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 408527490a..75d3f10c8b 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -124,7 +124,7 @@ fn into_valid_keys( } let revisions = revisions .into_iter() - .chain(std::iter::repeat_with(|| Revision::default())); + .chain(std::iter::repeat_with(Revision::default)); std::iter::zip(public_keys, revisions).filter_map(|(public_key, revision)| { // Skip unparsable public keys. diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index a068ec1689..ac953b13ff 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -7,13 +7,11 @@ use crate::services::buffer::{self, EnvelopeBufferService, ObservableEnvelopeBuf use crate::services::cogs::{CogsService, CogsServiceRecorder}; use crate::services::global_config::{GlobalConfigManager, GlobalConfigService}; use crate::services::health_check::{HealthCheck, HealthCheckService}; -use crate::services::metrics::{Aggregator, RouterService}; +use crate::services::metrics::RouterService; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::projects::cache::{ - legacy, ProjectCache, ProjectCacheHandle, ProjectCacheService, -}; +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectCacheService}; use crate::services::projects::source::ProjectSource; use crate::services::relays::{RelayCache, RelayCacheService}; use crate::services::stats::RelayStats; @@ -56,7 +54,6 @@ pub enum ServiceError { #[derive(Clone, Debug)] pub struct Registry { - pub aggregator: Addr, pub health_check: Addr, pub outcome_producer: Addr, pub outcome_aggregator: Addr, @@ -65,7 +62,6 @@ pub struct Registry { pub relay_cache: Addr, pub global_config: Addr, pub legacy_project_cache: Addr, - pub project_cache: Addr, pub upstream_relay: Addr, pub envelope_buffer: Option, @@ -202,7 +198,7 @@ impl ServiceState { .as_ref() .map(|pools| pools.project_configs.clone()), ); - let (project_cache, project_cache_handle) = + let project_cache_handle = ProjectCacheService::new(Arc::clone(&config), project_source).start(); let aggregator = RouterService::new( @@ -248,7 +244,6 @@ impl ServiceState { #[cfg(feature = "processing")] redis_pools.clone(), processor::Addrs { - legacy_project_cache: legacy_project_cache.clone(), outcome_aggregator: outcome_aggregator.clone(), upstream_relay: upstream_relay.clone(), test_store: test_store.clone(), @@ -314,7 +309,6 @@ impl ServiceState { let relay_cache = RelayCacheService::new(config.clone(), upstream_relay.clone()).start(); let registry = Registry { - aggregator, processor, health_check, outcome_producer, @@ -323,7 +317,6 @@ impl ServiceState { relay_cache, global_config, legacy_project_cache, - project_cache, project_cache_handle, upstream_relay, envelope_buffer, diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 16aad5ac51..994f278b81 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -71,7 +71,7 @@ use crate::services::global_config::GlobalConfigHandle; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; -use crate::services::projects::cache::{legacy, ProjectCacheHandle}; +use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; use crate::services::upstream::{ @@ -1124,7 +1124,6 @@ pub struct EnvelopeProcessorService { /// Contains the addresses of services that the processor publishes to. pub struct Addrs { - pub legacy_project_cache: Addr, pub outcome_aggregator: Addr, pub upstream_relay: Addr, pub test_store: Addr, @@ -1136,7 +1135,6 @@ pub struct Addrs { impl Default for Addrs { fn default() -> Self { Addrs { - legacy_project_cache: Addr::dummy(), outcome_aggregator: Addr::dummy(), upstream_relay: Addr::dummy(), test_store: Addr::dummy(), @@ -1166,6 +1164,7 @@ struct InnerProcessor { impl EnvelopeProcessorService { /// Creates a multi-threaded envelope processor. + #[cfg_attr(feature = "processing", expect(clippy::too_many_arguments))] pub fn new( pool: ThreadPool, config: Arc, @@ -2130,7 +2129,7 @@ impl EnvelopeProcessorService { let buckets = match project.project_state() { ProjectState::Enabled(project_info) => { let rate_limits = project.rate_limits().current_limits(); - self.check_buckets(project_key, &project_info, &rate_limits, buckets) + self.check_buckets(project_key, project_info, &rate_limits, buckets) } _ => buckets, }; @@ -2174,7 +2173,7 @@ impl EnvelopeProcessorService { data: MetricData::Parsed(buckets), project_key, source, - start_time: start_time.into(), + start_time, sent_at, }, ) @@ -3732,7 +3731,6 @@ mod tests { let processor = create_test_processor_with_addrs( config, Addrs { - legacy_project_cache, ..Default::default() }, ); diff --git a/relay-server/src/services/projects/cache/legacy.rs b/relay-server/src/services/projects/cache/legacy.rs index 5536cf7382..c8175edf51 100644 --- a/relay-server/src/services/projects/cache/legacy.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -13,7 +13,6 @@ use crate::Envelope; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use relay_metrics::Bucket; use relay_statsd::metric; use relay_system::{Addr, FromMessage, Interface, Service}; use tokio::sync::{mpsc, watch}; @@ -27,8 +26,8 @@ use crate::services::spooler::{ }; use crate::services::test_store::TestStore; -use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; -use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; +use crate::statsd::{RelayCounters, RelayGauges, RelayTimers}; +use crate::utils::{ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; /// Validates the envelope against project configuration and rate limits. /// @@ -166,8 +165,6 @@ struct ProjectCacheBroker { memory_checker: MemoryChecker, services: Services, projects: ProjectCacheHandle, - /// Utility for disposing of expired project data in a background thread. - garbage_disposal: GarbageDisposal, /// Handle to schedule periodic unspooling of buffered envelopes (spool V1). spool_v1_unspool_handle: SleepHandle, @@ -297,7 +294,7 @@ impl ProjectCacheBroker { let process = ProcessEnvelope { envelope: managed_envelope, - project_info: Arc::clone(&project_info), + project_info: Arc::clone(project_info), rate_limits, sampling_project_info, reservoir_counters, @@ -346,7 +343,7 @@ impl ProjectCacheBroker { let sampling_info = if let Some(sampling_key) = sampling_key { let sampling_project = self.projects.get(sampling_key); match sampling_project.project_state() { - ProjectState::Enabled(info) => Some(Arc::clone(&info)), + ProjectState::Enabled(info) => Some(Arc::clone(info)), ProjectState::Disabled => { relay_log::trace!("Sampling state is disabled ({sampling_key})"); // We accept events even if its root project has been disabled. @@ -402,7 +399,6 @@ impl ProjectCacheBroker { // Ideally we log outcomes for the metrics here, but currently for metric // outcomes we need a valid scoping, which we cannot construct for disabled // projects. - self.garbage_disposal.dispose(buckets); continue; } ProjectState::Enabled(project_info) => project_info, @@ -630,7 +626,7 @@ impl ProjectCacheBroker { fn handle_message(&mut self, message: ProjectCache) { let ty = message.variant(); metric!( - timer(RelayTimers::ProjectCacheMessageDuration), + timer(RelayTimers::LegacyProjectCacheMessageDuration), message = ty, { match message { @@ -772,7 +768,6 @@ impl Service for ProjectCacheService { config: config.clone(), memory_checker, projects: project_cache_handle, - garbage_disposal: GarbageDisposal::new(), services, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1, @@ -784,7 +779,7 @@ impl Service for ProjectCacheService { biased; Ok(()) = global_config_rx.changed() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "update_global_config", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "update_global_config", { match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => broker.set_global_config_ready(), // The watch should only be updated if it gets a new value. @@ -794,29 +789,29 @@ impl Service for ProjectCacheService { }) }, Ok(project_event) = project_events.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_project_event", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_project_event", { broker.handle_project_event(project_event); }) } // Buffer will not dequeue the envelopes from the spool if there is not enough // permits in `BufferGuard` available. Currently this is 50%. - Some(UnspooledEnvelope { managed_envelope, key }) = buffer_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_processing", { + Some(UnspooledEnvelope { managed_envelope, .. }) = buffer_rx.recv() => { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_processing", { broker.handle_processing(managed_envelope) }) }, () = &mut broker.spool_v1_unspool_handle => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "periodic_unspool", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "periodic_unspool", { broker.handle_periodic_unspool() }) } Some(message) = rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_message", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_message", { broker.handle_message(message) }) } Some(message) = envelopes_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_envelope", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_envelope", { broker.handle_envelope(message) }) } @@ -829,19 +824,6 @@ impl Service for ProjectCacheService { } } -/// Sum type for all objects which need to be discareded through the [`GarbageDisposal`]. -#[derive(Debug)] -#[allow(dead_code)] // Fields are never read, only used for discarding/dropping data. -enum ProjectGarbage { - Metrics(Vec), -} - -impl From> for ProjectGarbage { - fn from(value: Vec) -> Self { - Self::Metrics(value) - } -} - #[cfg(test)] mod tests { use relay_test::mock_service; @@ -912,7 +894,6 @@ mod tests { config: config.clone(), memory_checker, projects: todo!(), - garbage_disposal: GarbageDisposal::new(), services, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1: Some(SpoolV1 { diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs index cdac543789..239829e4d2 100644 --- a/relay-server/src/services/projects/cache/project.rs +++ b/relay-server/src/services/projects/cache/project.rs @@ -50,7 +50,7 @@ impl<'a> Project<'a> { mut envelope: ManagedEnvelope, ) -> Result { let state = match self.project_state() { - ProjectState::Enabled(state) => Some(Arc::clone(&state)), + ProjectState::Enabled(state) => Some(Arc::clone(state)), ProjectState::Disabled => { // TODO(jjbayer): We should refactor this function to either return a Result or // handle envelope rejections internally, but not both. @@ -66,7 +66,7 @@ impl<'a> Project<'a> { scoping = state.scope_request(envelope.envelope().meta()); envelope.scope(scoping); - if let Err(reason) = state.check_envelope(envelope.envelope(), &self.config) { + if let Err(reason) = state.check_envelope(envelope.envelope(), self.config) { envelope.reject(Outcome::Invalid(reason)); return Err(reason); } diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index 4487803500..149fbac3ff 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -2,13 +2,15 @@ use std::sync::Arc; use relay_base_schema::project::ProjectKey; use relay_config::Config; +use relay_statsd::metric; use relay_system::Service; use tokio::sync::{broadcast, mpsc}; use crate::services::projects::cache::handle::ProjectCacheHandle; use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; use crate::services::projects::project::ProjectState; -use crate::services::projects::source::{ProjectSource, SourceProjectState}; +use crate::services::projects::source::ProjectSource; +use crate::statsd::RelayTimers; /// Size of the broadcast channel for project events. /// @@ -31,6 +33,14 @@ pub enum ProjectCache { Fetch(ProjectKey), } +impl ProjectCache { + fn variant(&self) -> &'static str { + match self { + Self::Fetch(_) => "fetch", + } + } +} + impl relay_system::Interface for ProjectCache {} impl relay_system::FromMessage for ProjectCache { @@ -77,21 +87,20 @@ impl ProjectCacheService { /// Consumes and starts a [`ProjectCacheService`]. /// - /// Returns a [`relay_system::Addr`] to communicate with the cache and a [`ProjectCacheHandle`] - /// to access the cache concurrently. - pub fn start(self) -> (relay_system::Addr, ProjectCacheHandle) { + /// Returns a [`ProjectCacheHandle`] to access the cache concurrently. + pub fn start(self) -> ProjectCacheHandle { let (addr, addr_rx) = relay_system::channel(Self::name()); let handle = ProjectCacheHandle { shared: self.store.shared(), config: Arc::clone(&self.config), - service: addr.clone(), + service: addr, project_events: self.project_events_tx.clone(), }; self.spawn_handler(addr_rx); - (addr, handle) + handle } /// Schedules a new [`Fetch`] and delivers the result to the [`Self::project_update_tx`] channel. @@ -169,6 +178,17 @@ impl relay_system::Service for ProjectCacheService { type Interface = ProjectCache; fn spawn_handler(mut self, mut rx: relay_system::Receiver) { + macro_rules! timed { + ($task:expr, $body:expr) => {{ + let task_name = $task; + metric!( + timer(RelayTimers::ProjectCacheTaskDuration), + task = task_name, + { $body } + ) + }}; + } + tokio::spawn(async move { let mut eviction_ticker = tokio::time::interval(self.config.cache_eviction_interval()); @@ -176,15 +196,18 @@ impl relay_system::Service for ProjectCacheService { tokio::select! { biased; - Some(update) = self.project_update_rx.recv() => { + Some(update) = self.project_update_rx.recv() => timed!( + "project_update", self.handle_project_update(update) - }, - Some(message) = rx.recv() => { - self.handle_message(message); - }, - _ = eviction_ticker.tick() => { + ), + Some(message) = rx.recv() => timed!( + message.variant(), + self.handle_message(message) + ), + _ = eviction_ticker.tick() => timed!( + "evict_stale_projects", self.handle_evict_stale_projects() - } + ), } } }); diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index 74c5bee467..41c013a7e9 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -2,15 +2,16 @@ use std::fmt; use std::sync::Arc; use std::time::Instant; -use arc_swap::access::Access; use arc_swap::ArcSwap; use relay_base_schema::project::ProjectKey; use relay_config::Config; use relay_quotas::CachedRateLimits; use relay_sampling::evaluation::ReservoirCounters; +use relay_statsd::metric; use crate::services::projects::project::{ProjectState, Revision}; use crate::services::projects::source::SourceProjectState; +use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; use crate::utils::RetryBackoff; /// The backing storage for a project cache. @@ -105,6 +106,18 @@ impl ProjectStore { } drop(shared); + metric!( + histogram(RelayHistograms::ProjectStateCacheSize) = self.shared.projects.len() as u64, + storage = "shared" + ); + metric!( + histogram(RelayHistograms::ProjectStateCacheSize) = self.private.len() as u64, + storage = "private" + ); + // TODO: this can be replaced with a service level timing metric + metric!(timer(RelayTimers::ProjectStateEvictionDuration) = eviction_start.elapsed()); + metric!(counter(RelayCounters::EvictingStaleProjectCaches) += evicted as u64); + evicted } @@ -125,7 +138,13 @@ impl ProjectStore { let private = self .private .entry(project_key) - .or_insert_with(|| PrivateProjectState::new(project_key, config)); + .and_modify(|_| { + metric!(counter(RelayCounters::ProjectCacheHit) += 1); + }) + .or_insert_with(|| { + metric!(counter(RelayCounters::ProjectCacheMiss) += 1); + PrivateProjectState::new(project_key, config) + }); let shared = self .shared @@ -282,14 +301,6 @@ impl CompletedFetch { self.project_key } - /// Returns a reference to the fetched [`ProjectState`]. - pub fn project_state(&self) -> Option<&ProjectState> { - match &self.state { - SourceProjectState::New(state) => Some(state), - SourceProjectState::NotModified => None, - } - } - /// Returns `true` if the fetch completed with a pending status. fn is_pending(&self) -> bool { match &self.state { diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index f09338071e..17fda43ef3 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -8,7 +8,7 @@ use relay_quotas::Scoping; mod info; -pub use self::info::{LimitedProjectInfo, ProjectInfo, PublicKeyConfig, Revision}; +pub use self::info::{LimitedProjectInfo, ProjectInfo, Revision}; /// Representation of a project's current state. #[derive(Clone, Debug, Default)] diff --git a/relay-server/src/services/projects/source/mod.rs b/relay-server/src/services/projects/source/mod.rs index 2dc78a7d46..1311acfc40 100644 --- a/relay-server/src/services/projects/source/mod.rs +++ b/relay-server/src/services/projects/source/mod.rs @@ -90,6 +90,9 @@ impl ProjectSource { } } + /// Fetches a project with `project_key` from the configured sources. + /// + /// Returns a fully sanitized project. pub async fn fetch( self, project_key: ProjectKey, diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index 7fc489ebbf..be48956c5a 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -157,7 +157,6 @@ impl QueueKey { /// It's sent in response to [`DequeueMany`] message from the [`ProjectCache`]. #[derive(Debug)] pub struct UnspooledEnvelope { - pub key: QueueKey, pub managed_envelope: ManagedEnvelope, } @@ -376,7 +375,6 @@ impl InMemory { self.envelope_count = self.envelope_count.saturating_sub(1); sender .send(UnspooledEnvelope { - key, managed_envelope: envelope, }) .ok(); @@ -586,14 +584,9 @@ impl OnDisk { }; match self.extract_envelope(envelope, services) { - Ok((key, managed_envelopes)) => { + Ok((_, managed_envelopes)) => { for managed_envelope in managed_envelopes { - sender - .send(UnspooledEnvelope { - key, - managed_envelope, - }) - .ok(); + sender.send(UnspooledEnvelope { managed_envelope }).ok(); } } Err(err) => relay_log::error!( @@ -1436,10 +1429,7 @@ mod tests { sender: tx.clone(), }); - let UnspooledEnvelope { - key: _, - managed_envelope, - } = rx.recv().await.unwrap(); + let UnspooledEnvelope { managed_envelope } = rx.recv().await.unwrap(); let start_time_received = managed_envelope.envelope().meta().start_time(); // Check if the original start time elapsed to the same second as the restored one. diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 644beb882f..18f4c1a190 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -7,8 +7,6 @@ pub enum RelayGauges { /// The state of Relay with respect to the upstream connection. /// Possible values are `0` for normal operations and `1` for a network outage. NetworkOutage, - /// The number of items currently in the garbage disposal queue. - ProjectCacheGarbageQueueSize, /// The number of envelopes waiting for project states in memory. /// /// This number is always <= `EnvelopeQueueSize`. @@ -52,7 +50,6 @@ impl GaugeMetric for RelayGauges { fn name(&self) -> &'static str { match self { RelayGauges::NetworkOutage => "upstream.network_outage", - RelayGauges::ProjectCacheGarbageQueueSize => "project_cache.garbage.queue_size", RelayGauges::BufferEnvelopesMemoryCount => "buffer.envelopes_mem_count", RelayGauges::BufferEnvelopesDiskCount => "buffer.envelopes_disk_count", RelayGauges::BufferPeriodicUnspool => "buffer.unspool.periodic", @@ -481,25 +478,30 @@ pub enum RelayTimers { /// /// - `message`: The type of message that was processed. ProcessMessageDuration, - /// Timing in milliseconds for handling a project cache message. + /// Timing in milliseconds for processing a task in the project cache service. /// /// This metric is tagged with: - /// - `message`: The type of message that was processed. - ProjectCacheMessageDuration, - /// Timing in milliseconds for processing a message in the buffer service. + /// - `task`: The type of the task the project cache does. + ProjectCacheTaskDuration, + /// Timing in milliseconds for handling a legacy project cache message. /// /// This metric is tagged with: - /// /// - `message`: The type of message that was processed. - BufferMessageProcessDuration, - /// Timing in milliseconds for processing a task in the project cache service. + LegacyProjectCacheMessageDuration, + /// Timing in milliseconds for processing a task in the legacy project cache service. /// /// A task is a unit of work the service does. Each branch of the /// `tokio::select` is a different task type. /// /// This metric is tagged with: - /// - `task`: The type of the task the processor does. - ProjectCacheTaskDuration, + /// - `task`: The type of the task the project cache does. + LegacyProjectCacheTaskDuration, + /// Timing in milliseconds for processing a message in the buffer service. + /// + /// This metric is tagged with: + /// + /// - `message`: The type of message that was processed. + BufferMessageProcessDuration, /// Timing in milliseconds for handling and responding to a health check request. /// /// This metric is tagged with: @@ -577,9 +579,12 @@ impl TimerMetric for RelayTimers { RelayTimers::ReplayRecordingProcessing => "replay.recording.process", RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration", RelayTimers::ProcessMessageDuration => "processor.message.duration", - RelayTimers::ProjectCacheMessageDuration => "project_cache.message.duration", RelayTimers::BufferMessageProcessDuration => "buffer.message.duration", RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration", + RelayTimers::LegacyProjectCacheMessageDuration => { + "legacy_project_cache.message.duration" + } + RelayTimers::LegacyProjectCacheTaskDuration => "legacy_project_cache.task.duration", RelayTimers::HealthCheckDuration => "health.message.duration", #[cfg(feature = "processing")] RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets", @@ -674,15 +679,6 @@ pub enum RelayCounters { /// - `invalid`: Data was considered invalid and could not be recovered. The reason indicates /// the validation that failed. Outcomes, - /// Number of times a project state is looked up from the cache. - /// - /// This includes lookups for both cached and new projects. As part of this, updates for - /// outdated or expired project caches are triggered. - /// - /// Related metrics: - /// - `project_cache.hit`: For successful cache lookups, even for outdated projects. - /// - `project_cache.miss`: For failed lookups resulting in an update. - ProjectStateGet, /// Number of project state HTTP requests. /// /// Relay updates projects in batches. Every update cycle, Relay requests @@ -692,15 +688,6 @@ pub enum RelayCounters { /// Note that after an update loop has completed, there may be more projects pending updates. /// This is indicated by `project_state.pending`. ProjectStateRequest, - /// Number of times a project config was requested with `.no-cache`. - /// - /// This effectively counts the number of envelopes or events that have been sent with a - /// corresponding DSN. Actual queries to the upstream may still be deduplicated for these - /// project state requests. - /// - /// A maximum of 1 such requests per second is allowed per project key. This metric counts only - /// permitted requests. - ProjectStateNoCache, /// Number of times a project state is requested from the central Redis cache. /// /// This metric is tagged with: @@ -872,9 +859,7 @@ impl CounterMetric for RelayCounters { RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes", RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes", RelayCounters::Outcomes => "events.outcomes", - RelayCounters::ProjectStateGet => "project_state.get", RelayCounters::ProjectStateRequest => "project_state.request", - RelayCounters::ProjectStateNoCache => "project_state.no_cache", #[cfg(feature = "processing")] RelayCounters::ProjectStateRedis => "project_state.redis.requests", RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed", diff --git a/relay-server/src/utils/garbage.rs b/relay-server/src/utils/garbage.rs deleted file mode 100644 index 5b86cc80cd..0000000000 --- a/relay-server/src/utils/garbage.rs +++ /dev/null @@ -1,113 +0,0 @@ -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::{mpsc, Arc}; -use std::thread::JoinHandle; - -/// Garbage disposal agent. -/// -/// Spawns a background thread which drops items sent to it via [`GarbageDisposal::dispose`]. -#[derive(Debug)] -pub struct GarbageDisposal { - tx: mpsc::Sender, - queue_size: Arc, -} - -impl GarbageDisposal { - /// Returns a new instance plus a handle to join on the background thread. - /// Currently only used in tests. - fn new_joinable() -> (Self, JoinHandle<()>) { - let (tx, rx) = mpsc::channel(); - - let queue_size = Arc::new(AtomicUsize::new(0)); - let queue_size_clone = queue_size.clone(); - let join_handle = std::thread::spawn(move || { - relay_log::debug!("Start garbage collection thread"); - while let Ok(object) = rx.recv() { - queue_size_clone.fetch_sub(1, Ordering::Relaxed); // Wraps around on overflow - drop(object); - } - relay_log::debug!("Stop garbage collection thread"); - }); - - (Self { tx, queue_size }, join_handle) - } - - /// Spawns a new garbage disposal instance. - /// Every instance has its own background thread that received items to be dropped via - /// [`Self::dispose`]. - /// When the instance is dropped, the background thread stops automatically. - pub fn new() -> Self { - let (instance, _) = Self::new_joinable(); - instance - } - - /// Defers dropping an object by sending it to the background thread. - pub fn dispose(&self, object: U) - where - T: From, - { - self.queue_size.fetch_add(1, Ordering::Relaxed); - self.tx - .send(T::from(object)) - .map_err(|e| { - relay_log::error!("failed to send object to garbage disposal thread, drop here"); - drop(e.0); - }) - .ok(); - } - - /// Get current queue size. - pub fn queue_size(&self) -> usize { - self.queue_size.load(Ordering::Relaxed) - } -} - -impl Default for GarbageDisposal { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - use std::thread::ThreadId; - - use super::GarbageDisposal; - - struct SomeStruct { - thread_ids: Arc>>, - } - - impl Drop for SomeStruct { - fn drop(&mut self) { - self.thread_ids - .lock() - .unwrap() - .push(std::thread::current().id()) - } - } - - #[test] - fn test_garbage_disposal() { - let thread_ids = Arc::new(Mutex::new(Vec::::new())); - - let x1 = SomeStruct { - thread_ids: thread_ids.clone(), - }; - drop(x1); - - let x2 = SomeStruct { - thread_ids: thread_ids.clone(), - }; - - let (garbage, join_handle) = GarbageDisposal::::new_joinable(); - garbage.dispose(x2); - drop(garbage); // breaks the while loop by dropping rx - join_handle.join().ok(); // wait for thread to finish its work - - let thread_ids = thread_ids.lock().unwrap(); - assert_eq!(thread_ids.len(), 2); - assert_eq!(thread_ids[0], std::thread::current().id()); - assert!(thread_ids[0] != thread_ids[1]); - } -} diff --git a/relay-server/src/utils/mod.rs b/relay-server/src/utils/mod.rs index 8d12ee347e..9c428d3b5c 100644 --- a/relay-server/src/utils/mod.rs +++ b/relay-server/src/utils/mod.rs @@ -1,6 +1,5 @@ mod api; mod dynamic_sampling; -mod garbage; mod managed_envelope; mod multipart; mod param_parser; @@ -22,7 +21,6 @@ mod unreal; pub use self::api::*; pub use self::dynamic_sampling::*; -pub use self::garbage::*; pub use self::managed_envelope::*; pub use self::memory::*; pub use self::multipart::*; From 1e9472376d7743791b543dec48208a885f83a78d Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 23 Oct 2024 15:22:08 +0200 Subject: [PATCH 07/15] disable project on error, tune config, drop v2 project config endpoint --- relay-config/src/config.rs | 14 +- relay-server/src/endpoints/project_configs.rs | 34 ++-- .../src/services/projects/cache/service.rs | 9 +- .../src/services/projects/cache/state.rs | 33 ++-- tests/integration/test_projectconfigs.py | 162 ++++++------------ tests/integration/test_query.py | 7 +- 6 files changed, 114 insertions(+), 145 deletions(-) diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 2a98bc63a3..6f9ac83e2e 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -1017,7 +1017,9 @@ struct Cache { /// The cache timeout for project configurations in seconds. project_expiry: u32, /// Continue using project state this many seconds after cache expiry while a new state is - /// being fetched. This is added on top of `project_expiry` and `miss_expiry`. Default is 0. + /// being fetched. This is added on top of `project_expiry`. + /// + /// Default is 2 minutes. project_grace_period: u32, /// The cache timeout for downstream relay info (public keys) in seconds. relay_expiry: u32, @@ -1053,17 +1055,17 @@ impl Default for Cache { fn default() -> Self { Cache { project_request_full_config: false, - project_expiry: 300, // 5 minutes - project_grace_period: 0, - relay_expiry: 3600, // 1 hour - envelope_expiry: 600, // 10 minutes + project_expiry: 300, // 5 minutes + project_grace_period: 120, // 2 minutes + relay_expiry: 3600, // 1 hour + envelope_expiry: 600, // 10 minutes envelope_buffer_size: 1000, miss_expiry: 60, // 1 minute batch_interval: 100, // 100ms downstream_relays_batch_interval: 100, // 100ms batch_size: 500, file_interval: 10, // 10 seconds - eviction_interval: 60, // 60 seconds + eviction_interval: 15, // 15 seconds global_config_fetch_interval: 10, // 10 seconds } } diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 75d3f10c8b..5408008837 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use axum::extract::{Query, Request}; use axum::handler::Handler; +use axum::http::StatusCode; use axum::response::{IntoResponse, Result}; use axum::{Json, RequestExt}; use relay_base_schema::project::ProjectKey; @@ -17,11 +18,7 @@ use crate::services::global_config::{self, StatusResponse}; use crate::services::projects::project::{ LimitedParsedProjectState, ParsedProjectState, ProjectState, Revision, }; - -/// V2 version of this endpoint. -/// -/// The request is a list of [`ProjectKey`]s and the response a list of [`ProjectStateWrapper`]s -const ENDPOINT_V2: u16 = 2; +use crate::utils::ApiErrorResponse; /// V3 version of this endpoint. /// @@ -31,6 +28,16 @@ const ENDPOINT_V2: u16 = 2; /// returned, or a further poll ensues. const ENDPOINT_V3: u16 = 3; +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("This API version is no longer supported, upgrade your Relay or Client")] +struct VersionOutdatedError; + +impl IntoResponse for VersionOutdatedError { + fn into_response(self) -> axum::response::Response { + (StatusCode::BAD_REQUEST, ApiErrorResponse::from_error(&self)).into_response() + } +} + /// Helper to deserialize the `version` query parameter. #[derive(Clone, Copy, Debug, Deserialize)] struct VersionQuery { @@ -216,9 +223,14 @@ async fn inner( })) } +/// Returns `true` for all `?version` query parameters that are no longer supported by Relay and Sentry. +fn is_outdated(Query(query): Query) -> bool { + query.version < ENDPOINT_V3 +} + /// Returns `true` if the `?version` query parameter is compatible with this implementation. fn is_compatible(Query(query): Query) -> bool { - query.version >= ENDPOINT_V2 && query.version <= ENDPOINT_V3 + query.version >= ENDPOINT_V3 && query.version <= ENDPOINT_V3 } /// Endpoint handler for the project configs endpoint. @@ -234,9 +246,11 @@ fn is_compatible(Query(query): Query) -> bool { /// support old Relay versions. pub async fn handle(state: ServiceState, mut req: Request) -> Result { let data = req.extract_parts().await?; - Ok(if is_compatible(data) { - inner.call(req, state).await + if is_outdated(data) { + Err(VersionOutdatedError.into()) + } else if is_compatible(data) { + Ok(inner.call(req, state).await) } else { - forward::forward(state, req).await - }) + Ok(forward::forward(state, req).await) + } } diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index 149fbac3ff..270f78a7a7 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -119,11 +119,14 @@ impl ProjectCacheService { Ok(result) => result, Err(err) => { relay_log::error!( + tags.project_key = fetch.project_key().as_str(), error = &err as &dyn std::error::Error, - "failed to fetch project state for {fetch:?}" + "failed to fetch project from source: {fetch:?}" ); - // Fallback to a pending project on error, we will retry. - ProjectState::Pending.into() + + // TODO: change this to ProjectState::Pending once we consider it safe to do so. + // see https://github.com/getsentry/relay/pull/4140. + ProjectState::Disabled.into() } }; diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index 41c013a7e9..401578a967 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -96,12 +96,10 @@ impl ProjectStore { _removed.is_some(), "an expired project must exist in the shared state" ); + relay_log::trace!(tags.project_key = project_key.as_str(), "project evicted"); evicted += 1; - // TODO: garbage disposal? Do we still need that, we shouldn't have a problem with - // timings anymore. - on_evict(project_key); } drop(shared); @@ -169,10 +167,6 @@ impl Shared { /// The caller must ensure that the project cache is instructed to /// [`super::ProjectCache::Fetch`] the retrieved project. pub fn get_or_create(&self, project_key: ProjectKey) -> SharedProject { - // TODO: do we need to check for expiry here? - // TODO: if yes, we need to include the timestamp in the shared project state. - // TODO: grace periods? - // The fast path, we expect the project to exist. let projects = self.projects.pin(); if let Some(project) = projects.get(&project_key) { @@ -235,7 +229,8 @@ impl ProjectRef<'_> { } fn complete_fetch(&mut self, fetch: CompletedFetch) { - self.private.complete_fetch(&fetch); + let now = Instant::now(); + self.private.complete_fetch(&fetch, now); // Keep the old state around if the current fetch is pending. // It may still be useful to callers. @@ -427,7 +422,7 @@ impl PrivateProjectState { let when = match &self.state { FetchState::InProgress {} => { relay_log::trace!( - project_key = self.project_key.as_str(), + tags.project_key = self.project_key.as_str(), "project fetch skipped, fetch in progress" ); return None; @@ -440,7 +435,7 @@ impl PrivateProjectState { if last_fetch.check_expiry(now, config).is_updated() { // The current state is up to date, no need to start another fetch. relay_log::trace!( - project_key = self.project_key.as_str(), + tags.project_key = self.project_key.as_str(), "project fetch skipped, already up to date" ); return None; @@ -452,8 +447,8 @@ impl PrivateProjectState { // Mark a current fetch in progress. self.state = FetchState::InProgress {}; - relay_log::debug!( - project_key = &self.project_key.as_str(), + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), attempts = self.backoff.attempt() + 1, "project state fetch scheduled in {:?}", when.saturating_duration_since(Instant::now()), @@ -466,7 +461,7 @@ impl PrivateProjectState { }) } - fn complete_fetch(&mut self, fetch: &CompletedFetch) { + fn complete_fetch(&mut self, fetch: &CompletedFetch, now: Instant) { debug_assert!( matches!(self.state, FetchState::InProgress), "fetch completed while there was no current fetch registered" @@ -474,12 +469,20 @@ impl PrivateProjectState { if fetch.is_pending() { self.state = FetchState::Pending { - next_fetch_attempt: Instant::now().checked_add(self.backoff.next_backoff()), + next_fetch_attempt: now.checked_add(self.backoff.next_backoff()), }; + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + "project state fetch completed but still pending" + ); } else { + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + "project state fetch completed with non-pending config" + ); self.backoff.reset(); self.state = FetchState::NonPending { - last_fetch: LastFetch(Instant::now()), + last_fetch: LastFetch(now), }; } } diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index 0b54189bd9..5d9bd7adf1 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -12,6 +12,42 @@ RelayInfo = namedtuple("RelayInfo", ["id", "public_key", "secret_key", "internal"]) +def request_config( + relay, packed, signature, *, version="3", relay_id=None, headers=None +): + return relay.post( + f"/api/0/relays/projectconfigs/?version={version}", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay_id or relay.relay_id, + "X-Sentry-Relay-Signature": signature, + } + | (headers or {}), + ) + + +def get_response(relay, packed, signature, *, version="3", relay_id=None, headers=None): + data = None + deadline = time.monotonic() + 15 + while time.monotonic() <= deadline: + response = request_config( + relay, + packed, + signature, + version=version, + relay_id=relay_id, + headers=headers, + ) + assert response.ok + data = response.json() + print(data) + if data["configs"] or data.get("global") or data.get("unchanged"): + return data, response + time.sleep(0.01) + + assert False, "Relay did still not receive a project config from minisentry" + + @pytest.mark.parametrize( "caller, projects", [ @@ -84,16 +120,9 @@ def test_dynamic_relays(mini_sentry, relay, caller, projects): packed, signature = caller.secret_key.pack(request) - resp = relay1.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={"X-Sentry-Relay-Id": caller.id, "X-Sentry-Relay-Signature": signature}, - ) - - assert resp.ok + data, _ = get_response(relay1, packed, signature, relay_id=caller.id) # test that it returns valid data - data = resp.json() for p in public_keys: assert data["configs"][p] is not None @@ -104,14 +133,7 @@ def test_invalid_json(mini_sentry, relay): body = {} # missing the required `publicKeys` field packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) + response = request_config(relay, packed, signature) assert response.status_code == 400 # Bad Request assert "JSON" in response.text @@ -121,7 +143,7 @@ def test_invalid_signature(mini_sentry, relay): relay = relay(mini_sentry) response = relay.post( - "/api/0/relays/projectconfigs/?version=2", + "/api/0/relays/projectconfigs/?version=3", data='{"publicKeys":[]}', headers={ "X-Sentry-Relay-Id": relay.relay_id, @@ -148,17 +170,9 @@ def test_broken_projectkey(mini_sentry, relay): } packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) + data, _ = get_response(relay, packed, signature) - assert response.ok - assert public_key in response.json()["configs"] + assert public_key in data["configs"] def test_pending_projects(mini_sentry, relay): @@ -172,63 +186,17 @@ def test_pending_projects(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - def request_config(): - return relay.post( - "/api/0/relays/projectconfigs/?version=3", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - response = request_config() - + response = request_config(relay, packed, signature) assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data["configs"] - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config() - assert response.ok - data = response.json() - if data["configs"]: - break - else: - assert False, "Relay did still not receive a project config from minisentry" + data, _ = get_response(relay, packed, signature) assert public_key in data["configs"] assert data.get("pending") is None -def request_config(relay, packed, signature, version: str): - return relay.post( - f"/api/0/relays/projectconfigs/?version={version}", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - -def get_response(relay, packed, signature, version="3"): - data = None - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - # send 1 r/s - time.sleep(1) - response = request_config(relay, packed, signature, version) - assert response.ok - data = response.json() - if data["configs"]: - break - else: - print("Relay did still not receive a project config from minisentry") - return data - - def test_unparsable_project_config(mini_sentry, relay): project_key = 42 relay_config = { @@ -284,7 +252,7 @@ def assert_clear_test_failures(): assert_clear_test_failures() # The state should be fixed and updated by now, since we keep re-trying to fetch new one all the time. - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] time.sleep(1) @@ -323,7 +291,7 @@ def test_cached_project_config(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -333,7 +301,7 @@ def test_cached_project_config(mini_sentry, relay): # Give it a bit time for update to go through. time.sleep(1) - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -370,7 +338,7 @@ def test_get_global_config(mini_sentry, relay): body = {"publicKeys": [], "global": True} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data = get_response(relay, packed, signature, version="3") + data, _ = get_response(relay, packed, signature, version="3") global_extraction_config = data["global"].pop("metricExtraction") assert "span_metrics_common" in global_extraction_config["groups"] @@ -386,19 +354,11 @@ def test_compression(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "Accept-Encoding": "gzip", - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, + data, response = get_response( + relay, packed, signature, headers={"Accept-Encoding": "gzip"} ) - - assert response.ok assert response.headers["content-encoding"] == "gzip" - assert public_key in response.json()["configs"] + assert public_key in data["configs"] def test_unchanged_projects(mini_sentry, relay): @@ -410,32 +370,14 @@ def test_unchanged_projects(mini_sentry, relay): body = {"publicKeys": [public_key], "revisions": ["123"]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - def request_config(): - return relay.post( - "/api/0/relays/projectconfigs/?version=3", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - response = request_config() + response = request_config(relay, packed, signature) assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data.get("unchanged", []) - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config() - assert response.ok - data = response.json() - if data.get("unchanged"): - break - else: - assert False, "Relay did still not receive a project config from minisentry" + data, response = get_response(relay, packed, signature) assert public_key in data["unchanged"] assert public_key not in data["configs"] diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index a934319c2c..9c3a40014a 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -16,7 +16,12 @@ def test_local_project_config(mini_sentry, relay): project_id = 42 config = mini_sentry.basic_project_config(project_id) relay_config = { - "cache": {"file_interval": 1, "project_expiry": 1, "project_grace_period": 0} + "cache": { + "file_interval": 1, + "project_expiry": 1, + "project_grace_period": 0, + "eviction_interval": 1, + } } relay = relay(mini_sentry, relay_config, wait_health_check=False) relay.config_dir.mkdir("projects").join("42.json").write( From 8a41e10bc096992b214cc2c3bf3134075585a7c9 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 23 Oct 2024 16:32:21 +0200 Subject: [PATCH 08/15] fix all tests --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 1 + relay-quotas/src/rate_limit.rs | 2 +- relay-server/src/endpoints/project_configs.rs | 2 +- relay-server/src/services/buffer/mod.rs | 50 ++---- relay-server/src/services/processor.rs | 103 ++++++----- .../src/services/processor/span/processing.rs | 2 +- .../src/services/projects/cache/handle.rs | 35 ++++ .../src/services/projects/cache/legacy.rs | 97 ++--------- .../src/services/projects/cache/project.rs | 162 +++++------------- .../src/services/projects/cache/service.rs | 2 +- .../src/services/projects/cache/state.rs | 40 ++++- .../src/services/projects/project/info.rs | 9 +- .../src/services/projects/project/mod.rs | 2 +- .../src/services/projects/source/upstream.rs | 4 +- relay-server/src/testutils.rs | 5 +- tests/integration/test_projectconfigs.py | 2 + tests/integration/test_query.py | 21 ++- 18 files changed, 230 insertions(+), 311 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28548848d3..8287d3842e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -553,7 +553,7 @@ jobs: - run: make test-integration env: PYTEST_N: 6 - RELAY_VERSION_CHAIN: "20.6.0,latest" + RELAY_VERSION_CHAIN: "23.12.0,latest" sentry-relay-integration-tests: name: Sentry-Relay Integration Tests diff --git a/CHANGELOG.md b/CHANGELOG.md index c75570ec2f..9084b6ecf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ **Breaking Changes**: - Removes support for metric meta envelope items. ([#4152](https://github.com/getsentry/relay/pull/4152)) +- Removes support for the project cache endpoint version 2 and before. ([#4147](https://github.com/getsentry/relay/pull/4147)) ## 24.10.0 diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index 0c2a80de54..a77baaf173 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -1149,7 +1149,7 @@ mod tests { #[test] fn test_cached_rate_limits_expired() { - let mut cached = CachedRateLimits::new(); + let cached = CachedRateLimits::new(); // Active error limit cached.add(RateLimit { diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 5408008837..38876be44c 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -230,7 +230,7 @@ fn is_outdated(Query(query): Query) -> bool { /// Returns `true` if the `?version` query parameter is compatible with this implementation. fn is_compatible(Query(query): Query) -> bool { - query.version >= ENDPOINT_V3 && query.version <= ENDPOINT_V3 + query.version == ENDPOINT_V3 } /// Endpoint handler for the project configs endpoint. diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 80c90b04c0..5546889283 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -488,7 +488,7 @@ mod tests { service: EnvelopeBufferService, global_tx: watch::Sender, envelopes_rx: mpsc::Receiver, - project_cache_rx: mpsc::UnboundedReceiver, + project_cache_handle: ProjectCacheHandle, outcome_aggregator_rx: mpsc::UnboundedReceiver, } @@ -496,6 +496,8 @@ mod tests { config_json: Option, global_config_status: global_config::Status, ) -> EnvelopeBufferServiceResult { + relay_log::init_test!(); + let config_json = config_json.unwrap_or(serde_json::json!({ "spool": { "envelopes": { @@ -508,8 +510,8 @@ mod tests { let memory_stat = MemoryStat::default(); let (global_tx, global_rx) = watch::channel(global_config_status); let (envelopes_tx, envelopes_rx) = mpsc::channel(5); - let (project_cache, project_cache_rx) = Addr::custom(); let (outcome_aggregator, outcome_aggregator_rx) = Addr::custom(); + let project_cache_handle = ProjectCacheHandle::for_test(); let envelope_buffer_service = EnvelopeBufferService::new( config, @@ -517,7 +519,7 @@ mod tests { global_rx, Services { envelopes_tx, - project_cache, + project_cache_handle: project_cache_handle.clone(), outcome_aggregator, test_store: Addr::dummy(), }, @@ -528,7 +530,7 @@ mod tests { service: envelope_buffer_service, global_tx, envelopes_rx, - project_cache_rx, + project_cache_handle, outcome_aggregator_rx, } } @@ -541,7 +543,7 @@ mod tests { service, global_tx: _global_tx, envelopes_rx: _envelopes_rx, - project_cache_rx: _project_cache_rx, + project_cache_handle: _project_cache_handle, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service(None, global_config::Status::Pending); @@ -566,8 +568,8 @@ mod tests { service, global_tx, envelopes_rx, - project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, + .. } = envelope_buffer_service(None, global_config::Status::Pending); let addr = service.start(); @@ -580,7 +582,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); global_tx.send_replace(global_config::Status::Ready(Arc::new( GlobalConfig::default(), @@ -589,7 +590,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 1); - assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -599,9 +599,9 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, global_tx: _global_tx, + .. } = envelope_buffer_service( Some(serde_json::json!({ "spool": { @@ -627,7 +627,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -637,7 +636,7 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_rx, + project_cache_handle: _project_cache_handle, mut outcome_aggregator_rx, global_tx: _global_tx, } = envelope_buffer_service( @@ -664,7 +663,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); let outcome = outcome_aggregator_rx.try_recv().unwrap(); assert_eq!(outcome.category, DataCategory::TransactionIndexed); @@ -678,7 +676,7 @@ mod tests { let EnvelopeBufferServiceResult { service, mut envelopes_rx, - mut project_cache_rx, + project_cache_handle, global_tx: _global_tx, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service( @@ -693,9 +691,8 @@ mod tests { addr.send(EnvelopeBuffer::Push(envelope.clone())); - tokio::time::sleep(Duration::from_secs(1)).await; - - let Some(DequeuedEnvelope(envelope)) = envelopes_rx.recv().await else { + let message = tokio::time::timeout(Duration::from_secs(3), envelopes_rx.recv()); + let Some(legacy::DequeuedEnvelope(envelope)) = message.await.unwrap() else { panic!(); }; @@ -703,20 +700,11 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; - assert_eq!(project_cache_rx.len(), 1); - let message = project_cache_rx.recv().await; - assert!(matches!( - message, - Some(ProjectCache::UpdateProject(key)) if key == project_key - )); + assert_eq!(project_cache_handle.test_num_fetches(), 1); - tokio::time::sleep(Duration::from_secs(1)).await; + tokio::time::sleep(Duration::from_millis(1300)).await; - assert_eq!(project_cache_rx.len(), 1); - assert!(matches!( - message, - Some(ProjectCache::UpdateProject(key)) if key == project_key - )) + assert_eq!(project_cache_handle.test_num_fetches(), 2); } #[tokio::test] @@ -727,8 +715,8 @@ mod tests { service, mut envelopes_rx, global_tx: _global_tx, - project_cache_rx: _project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, + .. } = envelope_buffer_service( None, global_config::Status::Ready(Arc::new(GlobalConfig::default())), @@ -751,7 +739,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, DequeuedEnvelope(..))) + .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) .count(), 5 ); @@ -764,7 +752,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, DequeuedEnvelope(..))) + .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) .count(), 5 ); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 994f278b81..35bd47ef9e 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -3702,8 +3702,6 @@ mod tests { ] { let message = ProcessMetrics { data: MetricData::Raw(vec![item.clone()]), - project_state: ProjectState::Pending, - rate_limits: Default::default(), project_key, source, start_time, @@ -3727,10 +3725,11 @@ mod tests { let start_time = Instant::now(); let config = Config::default(); - let (project_cache, mut project_cache_rx) = Addr::custom(); + let (aggregator, mut aggregator_rx) = Addr::custom(); let processor = create_test_processor_with_addrs( config, Addrs { + aggregator, ..Default::default() }, ); @@ -3773,78 +3772,72 @@ mod tests { }; processor.handle_process_batched_metrics(&mut token, message); - let value = project_cache_rx.recv().await.unwrap(); - let legacy::ProjectCache::ProcessMetrics(pm1) = value else { + let value = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb1) = value else { panic!() }; - let value = project_cache_rx.recv().await.unwrap(); - let legacy::ProjectCache::ProcessMetrics(pm2) = value else { + let value = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb2) = value else { panic!() }; - let mut messages = vec![pm1, pm2]; + let mut messages = vec![mb1, mb2]; messages.sort_by_key(|pm| pm.project_key); let actual = messages .into_iter() - .map(|pm| (pm.project_key, pm.data, pm.source)) + .map(|pm| (pm.project_key, pm.buckets)) .collect::>(); assert_debug_snapshot!(actual, @r###" [ ( ProjectKey("11111111111111111111111111111111"), - Parsed( - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.response_time@millisecond", - ), - value: Distribution( - [ - 68.0, - ], - ), - tags: { - "route": "user_index", - }, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, - }, + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.response_time@millisecond", + ), + value: Distribution( + [ + 68.0, + ], + ), + tags: { + "route": "user_index", }, - ], - ), - Internal, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, + }, + }, + ], ), ( ProjectKey("22222222222222222222222222222222"), - Parsed( - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.cache_rate@none", - ), - value: Distribution( - [ - 36.0, - ], - ), - tags: {}, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, - }, + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.cache_rate@none", + ), + value: Distribution( + [ + 36.0, + ], + ), + tags: {}, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, }, - ], - ), - Internal, + }, + ], ), ] "###); diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index a9801e45f8..10d44f6c17 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -822,7 +822,7 @@ mod tests { extracted_metrics: ProcessingExtractedMetrics::new(), config: Arc::new(Config::default()), project_info, - rate_limits: RateLimits::default(), + rate_limits: Arc::new(RateLimits::default()), sampling_project_info: None, project_id: ProjectId::new(42), managed_envelope: managed_envelope.try_into().unwrap(), diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs index 6c77558c66..7d203252ea 100644 --- a/relay-server/src/services/projects/cache/handle.rs +++ b/relay-server/src/services/projects/cache/handle.rs @@ -49,3 +49,38 @@ impl fmt::Debug for ProjectCacheHandle { .finish() } } + +#[cfg(test)] +mod test { + use super::*; + use crate::services::projects::project::ProjectState; + + impl ProjectCacheHandle { + pub fn for_test() -> Self { + Self { + shared: Default::default(), + config: Default::default(), + service: Addr::dummy(), + project_events: broadcast::channel(999_999).0, + } + } + + pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { + let is_pending = state.is_pending(); + self.shared.test_set_project_state(project_key, state); + if is_pending { + let _ = self.project_events.send(ProjectEvent::Evicted(project_key)); + } else { + let _ = self.project_events.send(ProjectEvent::Ready(project_key)); + } + } + + pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { + self.shared.test_has_project_created(project_key) + } + + pub fn test_num_fetches(&self) -> u64 { + self.service.len() + } + } +} diff --git a/relay-server/src/services/projects/cache/legacy.rs b/relay-server/src/services/projects/cache/legacy.rs index c8175edf51..33850df77c 100644 --- a/relay-server/src/services/projects/cache/legacy.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -31,15 +31,12 @@ use crate::utils::{ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; /// Validates the envelope against project configuration and rate limits. /// -/// This ensures internally that the project state is up to date and then runs the same checks as -/// [`CheckEnvelope`]. Once the envelope has been validated, remaining items are forwarded to the -/// next stage: +/// This ensures internally that the project state is up to date . +/// Once the envelope has been validated, remaining items are forwarded to the next stage: /// /// - If the envelope needs dynamic sampling, and the project state is not cached or out of the /// date, the envelopes is spooled and we continue when the state is fetched. /// - Otherwise, the envelope is directly submitted to the [`EnvelopeProcessor`]. -/// -/// [`EnvelopeProcessor`]: crate::services::processor::EnvelopeProcessor #[derive(Debug)] pub struct ValidateEnvelope { envelope: ManagedEnvelope, @@ -76,21 +73,9 @@ pub struct RefreshIndexCache(pub HashSet); #[derive(Debug)] pub struct DequeuedEnvelope(pub Box); -/// A cache for [`ProjectState`]s. -/// -/// The project maintains information about organizations, projects, and project keys along with -/// settings required to ingest traffic to Sentry. Internally, it tries to keep information -/// up-to-date and automatically retires unused old data. -/// -/// To retrieve information from the cache, use [`GetProjectState`] for guaranteed up-to-date -/// information, or [`GetCachedProjectState`] for immediately available but potentially older -/// information. +/// The legacy project cache. /// -/// There are also higher-level operations, such as [`CheckEnvelope`] and [`ValidateEnvelope`] that -/// inspect contents of envelopes for ingestion, as well as [`ProcessMetrics`] to aggregate metrics -/// associated with a project. -/// -/// See the enumerated variants for a full list of available messages for this service. +/// It manages spool v1 and some remaining messages which handle project state. #[derive(Debug)] pub enum ProjectCache { ValidateEnvelope(ValidateEnvelope), @@ -893,7 +878,7 @@ mod tests { ProjectCacheBroker { config: config.clone(), memory_checker, - projects: todo!(), + projects: ProjectCacheHandle::for_test(), services, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1: Some(SpoolV1 { @@ -916,9 +901,10 @@ mod tests { let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); let (mut broker, _buffer_svc) = project_cache_broker_setup(services.clone(), buffer_tx).await; + let projects = broker.projects.clone(); + let mut project_events = projects.events(); broker.global_config = GlobalConfigStatus::Ready; - let (tx_update, mut rx_update) = mpsc::unbounded_channel(); let (tx_assert, mut rx_assert) = mpsc::unbounded_channel(); let dsn1 = "111d836b15bb49d7bbf99e64295d995b"; @@ -945,11 +931,12 @@ mod tests { tokio::task::spawn(async move { loop { select! { - + Ok(project_event) = project_events.recv() => { + broker.handle_project_event(project_event); + } Some(assert) = rx_assert.recv() => { assert_eq!(broker.spool_v1.as_ref().unwrap().index.len(), assert); }, - Some(update) = rx_update.recv() => broker.merge_state(update), () = &mut broker.spool_v1_unspool_handle => broker.handle_periodic_unspool(), } } @@ -958,13 +945,11 @@ mod tests { // Before updating any project states. tx_assert.send(2).unwrap(); - let update_dsn1_project_state = UpdateProjectState { - project_key: ProjectKey::parse(dsn1).unwrap(), - state: ProjectFetchState::allowed(), - no_cache: false, - }; + projects.test_set_project_state( + ProjectKey::parse(dsn1).unwrap(), + ProjectState::new_allowed(), + ); - tx_update.send(update_dsn1_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // One of the project should be unspooled. tx_assert.send(1).unwrap(); @@ -972,61 +957,15 @@ mod tests { // Schedule some work... tokio::time::sleep(Duration::from_secs(2)).await; - let update_dsn2_project_state = UpdateProjectState { - project_key: ProjectKey::parse(dsn2).unwrap(), - state: ProjectFetchState::allowed(), - no_cache: false, - }; + projects.test_set_project_state( + ProjectKey::parse(dsn2).unwrap(), + ProjectState::new_allowed(), + ); - tx_update.send(update_dsn2_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // The last project should be unspooled. tx_assert.send(0).unwrap(); // Make sure the last assert is tested. tokio::time::sleep(Duration::from_millis(100)).await; } - - #[tokio::test] - async fn handle_processing_without_project() { - let services = mocked_services(); - let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); - let (mut broker, buffer_svc) = - project_cache_broker_setup(services.clone(), buffer_tx.clone()).await; - - let dsn = "111d836b15bb49d7bbf99e64295d995b"; - let project_key = ProjectKey::parse(dsn).unwrap(); - let key = QueueKey { - own_key: project_key, - sampling_key: project_key, - }; - let envelope = ManagedEnvelope::new( - empty_envelope_with_dsn(dsn), - services.outcome_aggregator.clone(), - services.test_store.clone(), - ProcessingGroup::Ungrouped, - ); - - // Index and projects are empty. - assert!(broker.spool_v1.as_mut().unwrap().index.is_empty()); - - // Since there is no project we should not process anything but create a project and spool - // the envelope. - broker.handle_processing(envelope); - - // Assert that we have a new project and also added an index. - assert!(!broker - .projects - .get(project_key) - .project_state() - .is_pending()); - assert!(broker.spool_v1.as_mut().unwrap().index.contains(&key)); - - // Check is we actually spooled anything. - buffer_svc.send(DequeueMany::new([key].into(), buffer_tx.clone())); - let UnspooledEnvelope { - managed_envelope, .. - } = buffer_rx.recv().await.unwrap(); - - assert_eq!(key, QueueKey::from_envelope(managed_envelope.envelope())); - } } diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs index 239829e4d2..93a50159c2 100644 --- a/relay-server/src/services/projects/cache/project.rs +++ b/relay-server/src/services/projects/cache/project.rs @@ -34,17 +34,15 @@ impl<'a> Project<'a> { self.shared.reservoir_counters() } - /// Runs the checks on incoming envelopes. + /// Checks the envelope against project configuration and rate limits. /// - /// See, [`crate::services::projects::cache::CheckEnvelope`] for more information + /// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated + /// project state may be used, or otherwise the envelope is passed through unaltered. /// - /// * checks the rate limits - /// * validates the envelope meta in `check_request` - determines whether the given request - /// should be accepted or discarded - /// - /// IMPORTANT: If the [`ProjectState`] is invalid, the `check_request` will be skipped and only - /// rate limits will be validated. This function **must not** be called in the main processing - /// pipeline. + /// To check the envelope, this runs: + /// - Validate origins and public keys + /// - Quotas with a limit of `0` + /// - Cached rate limits pub fn check_envelope( &self, mut envelope: ManagedEnvelope, @@ -178,114 +176,32 @@ mod tests { use crate::envelope::{ContentType, Envelope, Item}; use crate::extractors::RequestMeta; use crate::services::processor::ProcessingGroup; - use relay_base_schema::project::ProjectId; + use crate::services::projects::project::{ProjectInfo, PublicKeyConfig}; + use relay_base_schema::project::{ProjectId, ProjectKey}; use relay_event_schema::protocol::EventId; - use relay_test::mock_service; use serde_json::json; - use smallvec::SmallVec; + use smallvec::smallvec; use super::*; - #[test] - fn get_state_expired() { - for expiry in [9999, 0] { - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": expiry, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - // Initialize project with a state - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let project_info = ProjectInfo { - project_id: Some(ProjectId::new(123)), - ..Default::default() - }; - let mut project = Project::new(project_key, config.clone()); - project.state = ProjectFetchState::enabled(project_info); - - if expiry > 0 { - // With long expiry, should get a state - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - } else { - // With 0 expiry, project should expire immediately. No state can be set. - assert!(matches!(project.current_state(), ProjectState::Pending)); - } - } - } - - #[tokio::test] - async fn test_stale_cache() { - let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); - - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": 100, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - let channel = StateChannel::new(); - - // Initialize project with a state. - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut project = Project::new(project_key, config); - project.state_channel = Some(channel); - project.state = ProjectFetchState::allowed(); - - assert!(project.next_fetch_attempt.is_none()); - // Try to update project with errored project state. - project.update_state(&addr, ProjectFetchState::pending(), false); - // Since we got invalid project state we still keep the old one meaning there - // still must be the project id set. - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - assert!(project.next_fetch_attempt.is_some()); - - // This tests that we actually initiate the backoff and the backoff mechanism works: - // * first call to `update_state` with invalid ProjectState starts the backoff, but since - // it's the first attemt, we get Duration of 0. - // * second call to `update_state` here will bumpt the `next_backoff` Duration to somehing - // like ~ 1s - // * and now, by calling `fetch_state` we test that it's a noop, since if backoff is active - // we should never fetch - // * without backoff it would just panic, not able to call the ProjectCache service - let channel = StateChannel::new(); - project.state_channel = Some(channel); - project.update_state(&addr, ProjectFetchState::pending(), false); - project.fetch_state(addr, false); - } - - fn create_project(config: Option) -> Project { - let project_key = ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(); - let mut project = Project::new(project_key, Arc::new(Config::default())); + fn create_project(config: &Config, data: Option) -> Project<'_> { let mut project_info = ProjectInfo { project_id: Some(ProjectId::new(42)), ..Default::default() }; - let mut public_keys = SmallVec::new(); - public_keys.push(PublicKeyConfig { - public_key: project_key, + project_info.public_keys = smallvec![PublicKeyConfig { + public_key: ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), numeric_id: None, - }); - project_info.public_keys = public_keys; - if let Some(config) = config { - project_info.config = serde_json::from_value(config).unwrap(); + }]; + + if let Some(data) = data { + project_info.config = serde_json::from_value(data).unwrap(); } - project.state = ProjectFetchState::enabled(project_info); - project + + Project::new( + SharedProject::for_test(ProjectState::Enabled(project_info.into())), + config, + ) } fn request_meta() -> RequestMeta { @@ -298,18 +214,22 @@ mod tests { #[test] fn test_track_nested_spans_outcomes() { - let mut project = create_project(Some(json!({ - "features": [ - "organizations:indexed-spans-extraction" - ], - "quotas": [{ - "id": "foo", - "categories": ["transaction"], - "window": 3600, - "limit": 0, - "reasonCode": "foo", - }] - }))); + let config = Default::default(); + let project = create_project( + &config, + Some(json!({ + "features": [ + "organizations:indexed-spans-extraction" + ], + "quotas": [{ + "id": "foo", + "categories": ["transaction"], + "window": 3600, + "limit": 0, + "reasonCode": "foo", + }] + })), + ); let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); @@ -347,8 +267,8 @@ mod tests { envelope.add_item(transaction); - let (outcome_aggregator, mut outcome_aggregator_rx) = Addr::custom(); - let (test_store, _) = Addr::custom(); + let (outcome_aggregator, mut outcome_aggregator_rx) = relay_system::Addr::custom(); + let (test_store, _) = relay_system::Addr::custom(); let managed_envelope = ManagedEnvelope::new( envelope, @@ -357,7 +277,7 @@ mod tests { ProcessingGroup::Transaction, ); - let _ = project.check_envelope(managed_envelope); + project.check_envelope(managed_envelope).unwrap(); drop(outcome_aggregator); let expected = [ diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index 270f78a7a7..e61583de39 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -113,7 +113,7 @@ impl ProjectCacheService { // TODO: cached state for delta fetches, maybe this should just be a revision? let state = match source - .fetch(fetch.project_key(), false, Default::default()) + .fetch(fetch.project_key(), false, fetch.revision()) .await { Ok(result) => result, diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index 401578a967..c6752689c0 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -21,7 +21,7 @@ use crate::utils::RetryBackoff; /// /// [`Shared`] can be extended through [`Shared::get_or_create`], in which case /// the private state is missing. Users of [`Shared::get_or_create`] *must* trigger -/// a fetch to create the private state when [`Missing`] is returned. +/// a fetch to create the private state and keep it updated. /// This gurnatuees that eventually the project state is populated, but for a undetermined, /// time it is possible that shared state exists without the respective private state. #[derive(Default)] @@ -181,6 +181,24 @@ impl Shared { } } +/// TEST ONLY bypass to make the project cache mockable. +#[cfg(test)] +impl Shared { + /// Updates the project state for a project. + /// + /// TEST ONLY! + pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { + self.projects + .pin() + .get_or_insert_with(project_key, Default::default) + .set_project_state(state); + } + + pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { + self.projects.pin().contains_key(&project_key) + } +} + impl fmt::Debug for Shared { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Shared") @@ -214,6 +232,18 @@ impl SharedProject { } } +/// TEST ONLY bypass to make the project cache mockable. +#[cfg(test)] +impl SharedProject { + /// Creates a new [`SharedProject`] for testing only. + pub fn for_test(state: ProjectState) -> Self { + Self(Arc::new(SharedProjectStateInner { + state, + ..Default::default() + })) + } +} + /// Reference to a full project wrapping shared and private state. struct ProjectRef<'a> { shared: SharedProjectState, @@ -268,6 +298,14 @@ impl Fetch { self.when } + /// Returns the revisions of the currently cached project. + /// + /// If the upstream indicates it does not have a different version of this project + /// we do not need to update the local state. + pub fn revision(&self) -> Revision { + self.revision.clone() + } + /// Completes the fetch with a result and returns a [`CompletedFetch`]. pub fn complete(self, state: SourceProjectState) -> CompletedFetch { CompletedFetch { diff --git a/relay-server/src/services/projects/project/info.rs b/relay-server/src/services/projects/project/info.rs index aa37e46431..ae477849ad 100644 --- a/relay-server/src/services/projects/project/info.rs +++ b/relay-server/src/services/projects/project/info.rs @@ -188,9 +188,6 @@ impl ProjectInfo { /// This scoping amends `RequestMeta::get_partial_scoping` by adding organization and key info. /// The processor must fetch the full scoping before attempting to rate limit with partial /// scoping. - /// - /// To get the own scoping of this ProjectKey without amending request information, use - /// [`Project::scoping`](crate::services::projects::project::Project::scoping) instead. pub fn scope_request(&self, meta: &RequestMeta) -> Scoping { let mut scoping = meta.get_partial_scoping(); @@ -291,3 +288,9 @@ impl PartialEq for Revision { } } } + +impl From<&str> for Revision { + fn from(value: &str) -> Self { + Self(Some(value.into())) + } +} diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index 17fda43ef3..5e23327370 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -8,7 +8,7 @@ use relay_quotas::Scoping; mod info; -pub use self::info::{LimitedProjectInfo, ProjectInfo, Revision}; +pub use self::info::*; /// Representation of a project's current state. #[derive(Clone, Debug, Default)] diff --git a/relay-server/src/services/projects/source/upstream.rs b/relay-server/src/services/projects/source/upstream.rs index cab39d4c19..6f051c5ec4 100644 --- a/relay-server/src/services/projects/source/upstream.rs +++ b/relay-server/src/services/projects/source/upstream.rs @@ -692,7 +692,7 @@ mod tests { let mut response1 = service.send(FetchProjectState { project_key, - current_revision: Some("123".to_owned()), + current_revision: "123".into(), no_cache: false, }); @@ -703,7 +703,7 @@ mod tests { // request, after responding to the first inflight request. let mut response2 = service.send(FetchProjectState { project_key, - current_revision: None, + current_revision: Revision::default(), no_cache: false, }); diff --git a/relay-server/src/testutils.rs b/relay-server/src/testutils.rs index 358a2a929d..56e9742925 100644 --- a/relay-server/src/testutils.rs +++ b/relay-server/src/testutils.rs @@ -20,6 +20,7 @@ use crate::service::create_redis_pools; use crate::services::global_config::GlobalConfigHandle; use crate::services::outcome::TrackOutcome; use crate::services::processor::{self, EnvelopeProcessorService}; +use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::ProjectInfo; use crate::services::test_store::TestStore; use crate::utils::{ThreadPool, ThreadPoolBuilder}; @@ -117,7 +118,6 @@ pub fn empty_envelope_with_dsn(dsn: &str) -> Box { pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); - let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (aggregator, _) = mock_service("aggregator", (), |&mut (), _| {}); let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); @@ -132,12 +132,12 @@ pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), + ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, processor::Addrs { outcome_aggregator, - legacy_project_cache: project_cache, upstream_relay, test_store, #[cfg(feature = "processing")] @@ -162,6 +162,7 @@ pub fn create_test_processor_with_addrs( create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), + ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index 5d9bd7adf1..f3e8c4ccef 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -238,6 +238,8 @@ def assert_clear_test_failures(): relay.send_event(project_key) assert mini_sentry.captured_events.empty() + time.sleep(1) + # Fix the config. config = mini_sentry.project_configs[project_key] config["slug"] = "some-slug" diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index 9c3a40014a..d7ab5cc2e3 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -50,17 +50,14 @@ def test_local_project_config(mini_sentry, relay): assert event["logentry"] == {"formatted": "Hello, World!"} relay.config_dir.join("projects").join("42.json").write( - json.dumps({"disabled": True}) + json.dumps({"disabled": True, "publicKeys": config["publicKeys"]}) ) - time.sleep(2) - try: - # This may or may not respond with 403, depending on how quickly the future to fetch project - # states executes. - relay.send_event(project_id, dsn_key=dsn_key) - except HTTPError: - pass + time.sleep(2) + relay.send_event(project_id, dsn_key=dsn_key) + time.sleep(0.3) + pytest.raises(HTTPError, lambda: relay.send_event(project_id, dsn_key=dsn_key)) pytest.raises(queue.Empty, lambda: mini_sentry.captured_events.get(timeout=1)) @@ -85,6 +82,7 @@ def get_project_config(): "miss_expiry": 1, "project_expiry": 1, "project_grace_period": grace_period, + "eviction_interval": 1, } }, ) @@ -329,7 +327,8 @@ def get_project_config(): "cache": { "miss_expiry": 1, "project_expiry": 1, - "project_grace_period": 0, + "project_grace_period": 5, + "eviction_interval": 1, } }, ) @@ -348,8 +347,8 @@ def get_project_config(): relay.send_event(42) - assert project_config_fetch.wait(timeout=1) - assert with_rev.wait(timeout=1) + assert project_config_fetch.wait(timeout=2) + assert with_rev.wait(timeout=2) event = mini_sentry.captured_events.get(timeout=1).get_event() assert event["logentry"] == {"formatted": "Hello, World!"} From 2c8aeb4233e164a06842fc6cc748503b1b64ec14 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Thu, 24 Oct 2024 13:36:16 +0200 Subject: [PATCH 09/15] unit tests --- .../src/services/projects/cache/service.rs | 2 +- .../src/services/projects/cache/state.rs | 209 +++++++++++++++++- 2 files changed, 206 insertions(+), 5 deletions(-) diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index e61583de39..7dc801f99e 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -109,7 +109,7 @@ impl ProjectCacheService { let project_updates = self.project_update_tx.clone(); tokio::spawn(async move { - tokio::time::sleep_until(fetch.when().into()).await; + tokio::time::sleep_until(fetch.when()).await; // TODO: cached state for delta fetches, maybe this should just be a revision? let state = match source diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index c6752689c0..e61387613d 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -1,6 +1,6 @@ use std::fmt; use std::sync::Arc; -use std::time::Instant; +use tokio::time::Instant; use arc_swap::ArcSwap; use relay_base_schema::project::ProjectKey; @@ -54,9 +54,8 @@ impl ProjectStore { /// [`ProjectState`] is still pending or already deemed expired. #[must_use = "an incomplete fetch must be retried"] pub fn complete_fetch(&mut self, fetch: CompletedFetch, config: &Config) -> Option { - // TODO: what if in the meantime the state expired and was evicted? - // Should eviction be possible for states that are currently in progress, I don't think so. - // Maybe need to discard outdated fetches (for evicted projects)? + // Eviction is not possible for projects which are currently being fetched. + // Hence if there was a started fetch, the project state must always exist at this stage. debug_assert!(self.shared.projects.pin().get(&fetch.project_key).is_some()); debug_assert!(self.private.get(&fetch.project_key).is_some()); @@ -570,3 +569,205 @@ impl Expiry { matches!(self, Self::Expired) } } + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + fn collect_evicted(store: &mut ProjectStore, config: &Config) -> Vec { + let mut evicted = Vec::new(); + let num_evicted = store.evict_stale_projects(config, |pk| evicted.push(pk)); + assert_eq!(evicted.len(), num_evicted); + evicted + } + + macro_rules! assert_state { + ($store:ident, $project_key:ident, $state:pat) => { + assert!(matches!( + $store.shared().get_or_create($project_key).project_state(), + $state + )); + }; + } + + #[test] + fn test_store_fetch() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Default::default(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + assert!(fetch.when() < Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Fetch already in progress, nothing to do. + assert!(store.try_begin_fetch(project_key, &config).is_none()); + + // A pending fetch should trigger a new fetch immediately. + let fetch = fetch.complete(ProjectState::Pending.into()); + let fetch = store.complete_fetch(fetch, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + // First backoff is still immediately. + assert!(fetch.when() < Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Pending again. + let fetch = fetch.complete(ProjectState::Pending.into()); + let fetch = store.complete_fetch(fetch, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + // This time it needs to be in the future (backoff). + assert!(fetch.when() > Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Now complete with disabled. + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Disabled); + + // A new fetch is not yet necessary. + assert!(store.try_begin_fetch(project_key, &config).is_none()); + } + + #[tokio::test(start_paused = true)] + async fn test_store_fetch_pending_does_not_replace_state() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 5, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Disabled); + + tokio::time::advance(Duration::from_secs(6)).await; + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Pending.into()); + // We're returned a new fetch, because the current one completed pending. + let fetch = store.complete_fetch(fetch, &config).unwrap(); + // The old cached state is still available and not replaced. + assert_state!(store, project_key, ProjectState::Disabled); + + let fetch = fetch.complete(ProjectState::new_allowed().into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Enabled(_)); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects() { + let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 0, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + // 3 seconds is not enough to expire any project. + tokio::time::advance(Duration::from_secs(3)).await; + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + let fetch = store.try_begin_fetch(project_key2, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // A total of 6 seconds should expire the first project. + tokio::time::advance(Duration::from_secs(3)).await; + + assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); + assert_state!(store, project_key1, ProjectState::Pending); + assert_state!(store, project_key2, ProjectState::Disabled); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects_pending_not_expired() { + let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 0, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); + // Create a new project in a pending state, but never fetch it, this should also never expire. + store.shared().get_or_create(project_key2); + + tokio::time::advance(Duration::from_secs(6)).await; + + // No evictions, project is pending. + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + + // Complete the project. + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // Still should not be evicted, because we do have 5 seconds to expire since completion. + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + tokio::time::advance(Duration::from_secs(4)).await; + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + // Just enough to expire the project. + tokio::time::advance(Duration::from_millis(1001)).await; + assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); + assert_state!(store, project_key1, ProjectState::Pending); + assert_state!(store, project_key2, ProjectState::Pending); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects_stale() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 5, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // This is in the grace period, but not yet expired. + tokio::time::advance(Duration::from_millis(9500)).await; + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key, ProjectState::Disabled); + + // Now it's expired. + tokio::time::advance(Duration::from_secs(1)).await; + + assert_eq!(collect_evicted(&mut store, &config), vec![project_key]); + assert_state!(store, project_key, ProjectState::Pending); + } +} From 1310d9db73d7e2c274b9f569c91a6eef39de2501 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Thu, 24 Oct 2024 13:42:16 +0200 Subject: [PATCH 10/15] flaky spooler test --- relay-server/src/services/buffer/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 5546889283..0dff761dcd 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -690,6 +690,7 @@ mod tests { let project_key = envelope.meta().public_key(); addr.send(EnvelopeBuffer::Push(envelope.clone())); + tokio::time::sleep(Duration::from_secs(2)).await; let message = tokio::time::timeout(Duration::from_secs(3), envelopes_rx.recv()); let Some(legacy::DequeuedEnvelope(envelope)) = message.await.unwrap() else { From 2d417445e7b674a13a4626ab30206214ce39c189 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Thu, 24 Oct 2024 13:50:12 +0200 Subject: [PATCH 11/15] more rustdoc --- .../src/services/projects/cache/handle.rs | 16 ++++++++++++++++ .../src/services/projects/cache/project.rs | 3 +++ .../src/services/projects/cache/state.rs | 1 + 3 files changed, 20 insertions(+) diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs index 7d203252ea..68feeb9e9b 100644 --- a/relay-server/src/services/projects/cache/handle.rs +++ b/relay-server/src/services/projects/cache/handle.rs @@ -10,6 +10,10 @@ use super::state::Shared; use crate::services::projects::cache::service::ProjectEvent; use crate::services::projects::cache::{Project, ProjectCache}; +/// A synchronous handle to the [`ProjectCache`]. +/// +/// The handle allows lock free access to cached projects. It also acts as an interface +/// to the [`ProjectCacheService`](super::ProjectCacheService). #[derive(Clone)] pub struct ProjectCacheHandle { pub(super) shared: Arc, @@ -56,6 +60,9 @@ mod test { use crate::services::projects::project::ProjectState; impl ProjectCacheHandle { + /// Creates a new [`ProjectCacheHandle`] for testing only. + /// + /// A project cache handle created this way does not require a service to function. pub fn for_test() -> Self { Self { shared: Default::default(), @@ -65,6 +72,9 @@ mod test { } } + /// Sets the project state for a project. + /// + /// This can be used to emulate a project cache update in tests. pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { let is_pending = state.is_pending(); self.shared.test_set_project_state(project_key, state); @@ -75,10 +85,16 @@ mod test { } } + /// Returns `true` if there is a project created for this `project_key`. + /// + /// A project is automatically created on access via [`Self::get`]. pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { self.shared.test_has_project_created(project_key) } + /// The amount of fetches triggered for projects. + /// + /// A fetch is triggered for both [`Self::get`] and [`Self::fetch`]. pub fn test_num_fetches(&self) -> u64 { self.service.len() } diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs index 93a50159c2..3cbe02ba6e 100644 --- a/relay-server/src/services/projects/cache/project.rs +++ b/relay-server/src/services/projects/cache/project.rs @@ -22,14 +22,17 @@ impl<'a> Project<'a> { Self { shared, config } } + /// Returns a reference to the currently cached project state. pub fn project_state(&self) -> &ProjectState { self.shared.project_state() } + /// Returns a reference to the currently cached rate limits. pub fn rate_limits(&self) -> &CachedRateLimits { self.shared.cached_rate_limits() } + /// Returns a reference to the currently reservoir counters. pub fn reservoir_counters(&self) -> &ReservoirCounters { self.shared.reservoir_counters() } diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index e61387613d..22df147b11 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -193,6 +193,7 @@ impl Shared { .set_project_state(state); } + /// Returns `true` if there exists a shared state for the passed `project_key`. pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { self.projects.pin().contains_key(&project_key) } From dcad925d706853984401a93822c5921026655d5d Mon Sep 17 00:00:00 2001 From: David Herberth Date: Mon, 28 Oct 2024 08:20:05 +0100 Subject: [PATCH 12/15] review changes, spelling, ahash, rm redundant metric --- Cargo.lock | 9 +++-- Cargo.toml | 1 + relay-server/Cargo.toml | 1 + relay-server/src/endpoints/common.rs | 4 +- relay-server/src/services/processor.rs | 12 +++--- .../src/services/projects/cache/service.rs | 5 ++- .../src/services/projects/cache/state.rs | 38 +++++++++---------- .../src/services/projects/project/info.rs | 2 +- relay-server/src/statsd.rs | 3 -- 9 files changed, 38 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 79da6b4549..1ca067f14f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" -version = "0.8.8" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", @@ -407,7 +407,7 @@ dependencies = [ "bitflags 2.4.1", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -3004,7 +3004,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.77", @@ -3736,6 +3736,7 @@ dependencies = [ name = "relay-server" version = "24.10.0" dependencies = [ + "ahash", "anyhow", "arc-swap", "axum", diff --git a/Cargo.toml b/Cargo.toml index 3fd9e67567..7b41485040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ relay-test = { path = "relay-test" } relay-protocol-derive = { path = "relay-protocol-derive" } relay-event-derive = { path = "relay-event-derive" } +ahash = "0.8.11" android_trace_log = { version = "0.3.0", features = ["serde"] } anyhow = "1.0.66" axum = "0.7.5" diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 4ae8cbe9e6..34862cd152 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -29,6 +29,7 @@ processing = [ workspace = true [dependencies] +ahash = { workspace = true } anyhow = { workspace = true } serde_path_to_error = { workspace = true } axum = { workspace = true, features = ["macros", "matched-path", "tracing"] } diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 13e8aaffe2..1b9c1814e1 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -12,7 +12,7 @@ use crate::envelope::{AttachmentType, Envelope, EnvelopeError, Item, ItemType, I use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{MetricData, ProcessMetrics, ProcessingGroup}; +use crate::services::processor::{BucketSource, MetricData, ProcessMetrics, ProcessingGroup}; use crate::services::projects::cache::legacy::ValidateEnvelope; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -276,7 +276,7 @@ fn queue_envelope( start_time: envelope.meta().start_time(), sent_at: envelope.sent_at(), project_key: envelope.meta().public_key(), - source: envelope.meta().into(), + source: BucketSource::from_meta(envelope.meta()), }); } } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 35bd47ef9e..40e79009d7 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -996,12 +996,12 @@ pub enum BucketSource { External, } -impl From<&RequestMeta> for BucketSource { - fn from(value: &RequestMeta) -> Self { - if value.is_from_internal_relay() { - Self::Internal - } else { - Self::External +impl BucketSource { + /// Infers the bucket source from [`RequestMeta::is_from_internal_relay`]. + pub fn from_meta(meta: &RequestMeta) -> Self { + match meta.is_from_internal_relay() { + true => Self::Internal, + false => Self::External, } } } diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index 7dc801f99e..9f9d821058 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -19,7 +19,7 @@ use crate::statsd::RelayTimers; /// receiving events. /// /// It is set to such a large amount because receivers of events currently -/// do not deal with lags in the channel gracefuly. +/// do not deal with lags in the channel gracefully. const PROJECT_EVENTS_CHANNEL_SIZE: usize = 512_000; /// A cache for projects, which allows concurrent access to the cached projects. @@ -51,9 +51,12 @@ impl relay_system::FromMessage for ProjectCache { } } +/// Project life-cycle events produced by the project cache. #[derive(Debug, Copy, Clone)] pub enum ProjectEvent { + /// A project was successfully fetched and is now ready to use. Ready(ProjectKey), + /// A project expired from the cache and was evicted. Evicted(ProjectKey), } diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index 22df147b11..091be28a54 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -11,24 +11,24 @@ use relay_statsd::metric; use crate::services::projects::project::{ProjectState, Revision}; use crate::services::projects::source::SourceProjectState; -use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; +use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::RetryBackoff; /// The backing storage for a project cache. /// -/// Exposes the only interface to delete from [`Shared`], gurnatueed by +/// Exposes the only interface to delete from [`Shared`], guaranteed by /// requiring exclusive/mutable access to [`ProjectStore`]. /// /// [`Shared`] can be extended through [`Shared::get_or_create`], in which case /// the private state is missing. Users of [`Shared::get_or_create`] *must* trigger /// a fetch to create the private state and keep it updated. -/// This gurnatuees that eventually the project state is populated, but for a undetermined, +/// This guarantees that eventually the project state is populated, but for a undetermined, /// time it is possible that shared state exists without the respective private state. #[derive(Default)] pub struct ProjectStore { /// The shared state, which can be accessed concurrently. shared: Arc, - /// The private, mutably exclusve state, used to maintain the project state. + /// The private, mutably exclusive state, used to maintain the project state. private: hashbrown::HashMap, } @@ -111,8 +111,6 @@ impl ProjectStore { histogram(RelayHistograms::ProjectStateCacheSize) = self.private.len() as u64, storage = "private" ); - // TODO: this can be replaced with a service level timing metric - metric!(timer(RelayTimers::ProjectStateEvictionDuration) = eviction_start.elapsed()); metric!(counter(RelayCounters::EvictingStaleProjectCaches) += evicted as u64); evicted @@ -120,12 +118,12 @@ impl ProjectStore { /// Get a reference to the current project or create a new project. /// - /// For internal use only, a created project must always be fetched immeditately. + /// For internal use only, a created project must always be fetched immediately. fn get_or_create(&mut self, project_key: ProjectKey, config: &Config) -> ProjectRef<'_> { #[cfg(debug_assertions)] if self.private.contains_key(&project_key) { // We have exclusive access to the private part, there are no concurrent deletions - // hence when if we have a private state there must always be a shared state as well. + // hence if we have a private state there must always be a shared state as well. // // The opposite is not true, the shared state may have been created concurrently // through the shared access. @@ -157,7 +155,7 @@ impl ProjectStore { /// The shared and concurrently accessible handle to the project cache. #[derive(Default)] pub struct Shared { - projects: papaya::HashMap, + projects: papaya::HashMap, } impl Shared { @@ -290,9 +288,9 @@ impl Fetch { self.project_key } - /// Returns when the the fetch for the project should be scheduled. + /// Returns when the fetch for the project should be scheduled. /// - /// This can be now (as soon as possible) or a alter point in time, if the project is currently + /// This can be now (as soon as possible) or a later point in time, if the project is currently /// in a backoff. pub fn when(&self) -> Instant { self.when @@ -416,8 +414,8 @@ enum FetchState { /// `None` means soon as possible. next_fetch_attempt: Option, }, - /// There was a successful non-pending fetch, - NonPending { + /// There was a successful non-pending fetch. + Complete { /// Time when the fetch was completed. last_fetch: LastFetch, }, @@ -451,7 +449,7 @@ impl PrivateProjectState { /// was fetched successfully before. fn last_fetch(&self) -> Option { match &self.state { - FetchState::NonPending { last_fetch } => Some(*last_fetch), + FetchState::Complete { last_fetch } => Some(*last_fetch), _ => None, } } @@ -469,7 +467,7 @@ impl PrivateProjectState { // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. next_fetch_attempt.unwrap_or(now) } - FetchState::NonPending { last_fetch } => { + FetchState::Complete { last_fetch } => { if last_fetch.check_expiry(now, config).is_updated() { // The current state is up to date, no need to start another fetch. relay_log::trace!( @@ -519,7 +517,7 @@ impl PrivateProjectState { "project state fetch completed with non-pending config" ); self.backoff.reset(); - self.state = FetchState::NonPending { + self.state = FetchState::Complete { last_fetch: LastFetch(now), }; } @@ -541,7 +539,7 @@ impl LastFetch { } else if elapsed >= expiry { Expiry::Stale } else { - Expiry::Updated + Expiry::Fresh } } } @@ -550,7 +548,7 @@ impl LastFetch { #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] enum Expiry { /// The project state is perfectly up to date. - Updated, + Fresh, /// The project state is outdated but events depending on this project state can still be /// processed. The state should be refreshed in the background though. Stale, @@ -560,9 +558,9 @@ enum Expiry { } impl Expiry { - /// Returns `true` if the project is uo to date and does not need to be fetched. + /// Returns `true` if the project is up-to-date and does not need to be fetched. fn is_updated(&self) -> bool { - matches!(self, Self::Updated) + matches!(self, Self::Fresh) } /// Returns `true` if the project is expired and can be evicted. diff --git a/relay-server/src/services/projects/project/info.rs b/relay-server/src/services/projects/project/info.rs index ae477849ad..d6fb7cfaab 100644 --- a/relay-server/src/services/projects/project/info.rs +++ b/relay-server/src/services/projects/project/info.rs @@ -264,7 +264,7 @@ pub struct PublicKeyConfig { /// A revision can be missing, a missing revision never compares equal /// to any other revision. /// -/// Revisions are internally reference counted and cheap the create. +/// Revisions are internally reference counted and cheap to create. #[derive(Debug, Default, Clone, Serialize, Deserialize)] #[serde(transparent)] pub struct Revision(Option>); diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 18f4c1a190..dca6421e01 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -386,8 +386,6 @@ pub enum RelayTimers { /// Total time in milliseconds an envelope spends in Relay from the time it is received until it /// finishes processing and has been submitted to the upstream. EnvelopeTotalTime, - /// Total time in milliseconds spent evicting outdated and unused projects happens. - ProjectStateEvictionDuration, /// Total time in milliseconds spent fetching queued project configuration updates requests to /// resolve. /// @@ -566,7 +564,6 @@ impl TimerMetric for RelayTimers { RelayTimers::EnvelopeWaitTime => "event.wait_time", RelayTimers::EnvelopeProcessingTime => "event.processing_time", RelayTimers::EnvelopeTotalTime => "event.total_time", - RelayTimers::ProjectStateEvictionDuration => "project_state.eviction.duration", RelayTimers::ProjectStateRequestDuration => "project_state.request.duration", #[cfg(feature = "processing")] RelayTimers::ProjectStateDecompression => "project_state.decompression", From e459a3093953af6d72a96553ff3d5b659613506f Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 30 Oct 2024 07:53:18 +0100 Subject: [PATCH 13/15] small review changes --- relay-server/src/endpoints/project_configs.rs | 2 +- relay-server/src/services/buffer/mod.rs | 6 ++-- relay-server/src/services/processor.rs | 2 +- .../src/services/projects/cache/handle.rs | 18 ++++++------ .../src/services/projects/cache/legacy.rs | 28 +++++++++---------- .../src/services/projects/cache/mod.rs | 2 +- .../src/services/projects/cache/project.rs | 4 +-- .../src/services/projects/cache/service.rs | 17 ++++++----- .../src/services/projects/cache/state.rs | 4 +-- 9 files changed, 42 insertions(+), 41 deletions(-) diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 38876be44c..ff59384618 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -169,7 +169,7 @@ async fn inner( for (project_key, revision) in into_valid_keys(inner.public_keys, inner.revisions) { let project = state.project_cache_handle().get(project_key); - let project_info = match project.project_state() { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { // Don't insert project config. Downstream Relay will consider it disabled. diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 0dff761dcd..10a4319319 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -21,7 +21,7 @@ use crate::services::outcome::DiscardReason; use crate::services::outcome::Outcome; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectEvent}; +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectChange}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::ManagedEnvelope; @@ -397,7 +397,7 @@ impl Service for EnvelopeBufferService { buffer.initialize().await; let mut shutdown = Controller::shutdown_handle(); - let mut project_events = self.services.project_cache_handle.events(); + let mut project_events = self.services.project_cache_handle.changes(); relay_log::info!("EnvelopeBufferService: starting"); loop { @@ -427,7 +427,7 @@ impl Service for EnvelopeBufferService { } } } - Ok(ProjectEvent::Ready(project_key)) = project_events.recv() => { + Ok(ProjectChange::Ready(project_key)) = project_events.recv() => { Self::handle_message(&mut buffer, EnvelopeBuffer::Ready(project_key)).await; sleep = Duration::ZERO; } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 40e79009d7..0b5441b444 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -2126,7 +2126,7 @@ impl EnvelopeProcessorService { // Best effort check to filter and rate limit buckets, if there is no project state // available at the current time, we will check again after flushing. - let buckets = match project.project_state() { + let buckets = match project.state() { ProjectState::Enabled(project_info) => { let rate_limits = project.rate_limits().current_limits(); self.check_buckets(project_key, project_info, &rate_limits, buckets) diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs index 68feeb9e9b..415bad5b75 100644 --- a/relay-server/src/services/projects/cache/handle.rs +++ b/relay-server/src/services/projects/cache/handle.rs @@ -7,7 +7,7 @@ use relay_system::Addr; use tokio::sync::broadcast; use super::state::Shared; -use crate::services::projects::cache::service::ProjectEvent; +use crate::services::projects::cache::service::ProjectChange; use crate::services::projects::cache::{Project, ProjectCache}; /// A synchronous handle to the [`ProjectCache`]. @@ -19,7 +19,7 @@ pub struct ProjectCacheHandle { pub(super) shared: Arc, pub(super) config: Arc, pub(super) service: Addr, - pub(super) project_events: broadcast::Sender, + pub(super) project_changes: broadcast::Sender, } impl ProjectCacheHandle { @@ -37,12 +37,12 @@ impl ProjectCacheHandle { self.service.send(ProjectCache::Fetch(project_key)); } - /// Returns a subscription to all [`ProjectEvent`]'s. + /// Returns a subscription to all [`ProjectChange`]'s. /// /// This stream notifies the subscriber about project state changes in the project cache. /// Events may arrive in arbitrary order and be delivered multiple times. - pub fn events(&self) -> broadcast::Receiver { - self.project_events.subscribe() + pub fn changes(&self) -> broadcast::Receiver { + self.project_changes.subscribe() } } @@ -68,7 +68,7 @@ mod test { shared: Default::default(), config: Default::default(), service: Addr::dummy(), - project_events: broadcast::channel(999_999).0, + project_changes: broadcast::channel(999_999).0, } } @@ -79,9 +79,11 @@ mod test { let is_pending = state.is_pending(); self.shared.test_set_project_state(project_key, state); if is_pending { - let _ = self.project_events.send(ProjectEvent::Evicted(project_key)); + let _ = self + .project_changes + .send(ProjectChange::Evicted(project_key)); } else { - let _ = self.project_events.send(ProjectEvent::Ready(project_key)); + let _ = self.project_changes.send(ProjectChange::Ready(project_key)); } } diff --git a/relay-server/src/services/projects/cache/legacy.rs b/relay-server/src/services/projects/cache/legacy.rs index 33850df77c..0dc95464c1 100644 --- a/relay-server/src/services/projects/cache/legacy.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -8,7 +8,7 @@ use crate::services::global_config; use crate::services::processor::{ EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; -use crate::services::projects::cache::{CheckedEnvelope, ProjectCacheHandle, ProjectEvent}; +use crate::services::projects::cache::{CheckedEnvelope, ProjectCacheHandle, ProjectChange}; use crate::Envelope; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; @@ -227,10 +227,10 @@ impl ProjectCacheBroker { } } - fn handle_project_event(&mut self, event: ProjectEvent) { + fn handle_project_event(&mut self, event: ProjectChange) { match event { - ProjectEvent::Ready(_) => self.schedule_unspool(), - ProjectEvent::Evicted(project_key) => self.evict_project(project_key), + ProjectChange::Ready(_) => self.schedule_unspool(), + ProjectChange::Evicted(project_key) => self.evict_project(project_key), } } @@ -240,7 +240,7 @@ impl ProjectCacheBroker { let project = self.projects.get(project_key); - let project_info = match project.project_state() { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -274,7 +274,7 @@ impl ProjectCacheBroker { .envelope() .sampling_key() .map(|key| self.projects.get(key)) - .and_then(|p| p.project_state().clone().enabled()) + .and_then(|p| p.state().clone().enabled()) .filter(|info| info.organization_id == project_info.organization_id); let process = ProcessEnvelope { @@ -313,7 +313,7 @@ impl ProjectCacheBroker { let own_key = envelope.meta().public_key(); let project = self.projects.get(own_key); - let project_state = match project.project_state() { + let project_state = match project.state() { ProjectState::Enabled(state) => Some(state), ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -327,7 +327,7 @@ impl ProjectCacheBroker { let mut requires_sampling_state = sampling_key.is_some(); let sampling_info = if let Some(sampling_key) = sampling_key { let sampling_project = self.projects.get(sampling_key); - match sampling_project.project_state() { + match sampling_project.state() { ProjectState::Enabled(info) => Some(Arc::clone(info)), ProjectState::Disabled => { relay_log::trace!("Sampling state is disabled ({sampling_key})"); @@ -370,7 +370,7 @@ impl ProjectCacheBroker { for (project_key, buckets) in message.buckets { let project = self.projects.get(project_key); - let project_info = match project.project_state() { + let project_info = match project.state() { ProjectState::Pending => { no_project += 1; @@ -454,7 +454,7 @@ impl ProjectCacheBroker { let project = self.projects.get(own_key); // Check if project config is enabled. - let project_info = match project.project_state() { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( @@ -476,7 +476,7 @@ impl ProjectCacheBroker { let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, - self.projects.get(sampling_key).project_state().clone(), + self.projects.get(sampling_key).state().clone(), ) }) { Some((_, ProjectState::Enabled(info))) => { @@ -551,7 +551,7 @@ impl ProjectCacheBroker { fn is_state_cached(&mut self, key: &QueueKey) -> bool { key.unique_keys() .iter() - .all(|key| !self.projects.get(*key).project_state().is_pending()) + .all(|key| !self.projects.get(*key).state().is_pending()) } /// Iterates the buffer index and tries to unspool the envelopes for projects with a valid @@ -689,7 +689,7 @@ impl Service for ProjectCacheService { mut global_config_rx, mut envelopes_rx, } = self; - let mut project_events = project_cache_handle.events(); + let mut project_events = project_cache_handle.changes(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); @@ -902,7 +902,7 @@ mod tests { let (mut broker, _buffer_svc) = project_cache_broker_setup(services.clone(), buffer_tx).await; let projects = broker.projects.clone(); - let mut project_events = projects.events(); + let mut project_events = projects.changes(); broker.global_config = GlobalConfigStatus::Ready; let (tx_assert, mut rx_assert) = mpsc::unbounded_channel(); diff --git a/relay-server/src/services/projects/cache/mod.rs b/relay-server/src/services/projects/cache/mod.rs index 7d2b1d1afa..35ee749ecc 100644 --- a/relay-server/src/services/projects/cache/mod.rs +++ b/relay-server/src/services/projects/cache/mod.rs @@ -7,4 +7,4 @@ pub mod legacy; pub use self::handle::ProjectCacheHandle; pub use self::project::{CheckedEnvelope, Project}; -pub use self::service::{ProjectCache, ProjectCacheService, ProjectEvent}; +pub use self::service::{ProjectCache, ProjectCacheService, ProjectChange}; diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs index 3cbe02ba6e..4b9e95a2be 100644 --- a/relay-server/src/services/projects/cache/project.rs +++ b/relay-server/src/services/projects/cache/project.rs @@ -23,7 +23,7 @@ impl<'a> Project<'a> { } /// Returns a reference to the currently cached project state. - pub fn project_state(&self) -> &ProjectState { + pub fn state(&self) -> &ProjectState { self.shared.project_state() } @@ -50,7 +50,7 @@ impl<'a> Project<'a> { &self, mut envelope: ManagedEnvelope, ) -> Result { - let state = match self.project_state() { + let state = match self.state() { ProjectState::Enabled(state) => Some(Arc::clone(state)), ProjectState::Disabled => { // TODO(jjbayer): We should refactor this function to either return a Result or diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index 9f9d821058..f668512663 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -51,9 +51,9 @@ impl relay_system::FromMessage for ProjectCache { } } -/// Project life-cycle events produced by the project cache. +/// Project life-cycle changes produced by the project cache. #[derive(Debug, Copy, Clone)] -pub enum ProjectEvent { +pub enum ProjectChange { /// A project was successfully fetched and is now ready to use. Ready(ProjectKey), /// A project expired from the cache and was evicted. @@ -69,7 +69,7 @@ pub struct ProjectCacheService { project_update_rx: mpsc::UnboundedReceiver, project_update_tx: mpsc::UnboundedSender, - project_events_tx: broadcast::Sender, + project_events_tx: broadcast::Sender, } impl ProjectCacheService { @@ -98,7 +98,7 @@ impl ProjectCacheService { shared: self.store.shared(), config: Arc::clone(&self.config), service: addr, - project_events: self.project_events_tx.clone(), + project_changes: self.project_events_tx.clone(), }; self.spawn_handler(addr_rx); @@ -114,7 +114,6 @@ impl ProjectCacheService { tokio::spawn(async move { tokio::time::sleep_until(fetch.when()).await; - // TODO: cached state for delta fetches, maybe this should just be a revision? let state = match source .fetch(fetch.project_key(), false, fetch.revision()) .await @@ -146,7 +145,7 @@ impl ProjectCacheService { } } - fn handle_project_update(&mut self, fetch: CompletedFetch) { + fn handle_completed_fetch(&mut self, fetch: CompletedFetch) { let project_key = fetch.project_key(); if let Some(fetch) = self.store.complete_fetch(fetch, &self.config) { @@ -160,14 +159,14 @@ impl ProjectCacheService { let _ = self .project_events_tx - .send(ProjectEvent::Ready(project_key)); + .send(ProjectChange::Ready(project_key)); } fn handle_evict_stale_projects(&mut self) { let on_evict = |project_key| { let _ = self .project_events_tx - .send(ProjectEvent::Evicted(project_key)); + .send(ProjectChange::Evicted(project_key)); }; self.store.evict_stale_projects(&self.config, on_evict); @@ -204,7 +203,7 @@ impl relay_system::Service for ProjectCacheService { Some(update) = self.project_update_rx.recv() => timed!( "project_update", - self.handle_project_update(update) + self.handle_completed_fetch(update) ), Some(message) = rx.recv() => timed!( message.variant(), diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index 091be28a54..e6d670c0d3 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -468,7 +468,7 @@ impl PrivateProjectState { next_fetch_attempt.unwrap_or(now) } FetchState::Complete { last_fetch } => { - if last_fetch.check_expiry(now, config).is_updated() { + if last_fetch.check_expiry(now, config).is_fresh() { // The current state is up to date, no need to start another fetch. relay_log::trace!( tags.project_key = self.project_key.as_str(), @@ -559,7 +559,7 @@ enum Expiry { impl Expiry { /// Returns `true` if the project is up-to-date and does not need to be fetched. - fn is_updated(&self) -> bool { + fn is_fresh(&self) -> bool { matches!(self, Self::Fresh) } From ebf1c52a4c7c39c51fc76bbba63f94b0e442acf5 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 30 Oct 2024 09:51:21 +0100 Subject: [PATCH 14/15] replace cached rate limits inner with mutex --- Cargo.lock | 1 - relay-quotas/Cargo.toml | 1 - relay-quotas/src/rate_limit.rs | 61 ++++++------------- relay-sampling/src/evaluation.rs | 2 +- relay-server/src/services/buffer/mod.rs | 4 +- .../src/services/projects/cache/state.rs | 3 - 6 files changed, 22 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ca067f14f..ec8d671f82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3669,7 +3669,6 @@ dependencies = [ name = "relay-quotas" version = "24.10.0" dependencies = [ - "arc-swap", "hashbrown", "insta", "itertools 0.13.0", diff --git a/relay-quotas/Cargo.toml b/relay-quotas/Cargo.toml index f5a3d03d21..4ac6cdb070 100644 --- a/relay-quotas/Cargo.toml +++ b/relay-quotas/Cargo.toml @@ -17,7 +17,6 @@ redis = ["dep:thiserror", "dep:relay-log", "relay-redis/impl"] workspace = true [dependencies] -arc-swap = { workspace = true } hashbrown = { workspace = true } relay-base-schema = { workspace = true } relay-common = { workspace = true } diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index a77baaf173..ba90bacaca 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -1,9 +1,8 @@ use std::fmt; use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, Mutex, PoisonError}; use std::time::{Duration, Instant}; -use arc_swap::ArcSwap; use relay_base_schema::metrics::MetricNamespace; use relay_base_schema::project::{ProjectId, ProjectKey}; use smallvec::SmallVec; @@ -281,15 +280,14 @@ impl RateLimits { } /// Removes expired rate limits from this instance. - pub fn clean_expired(&mut self) { - let now = Instant::now(); + pub fn clean_expired(&mut self, now: Instant) { self.limits .retain(|limit| !limit.retry_after.expired_at(now)); } /// Checks whether any rate limits apply to the given scoping. /// - /// If no limits match, then the returned `RateLimits` instance evalutes `is_ok`. Otherwise, it + /// If no limits match, then the returned `RateLimits` instance evaluates `is_ok`. Otherwise, it /// contains rate limits that match the given scoping. pub fn check(&self, scoping: ItemScoping<'_>) -> Self { self.check_with_quotas(&[], scoping) @@ -300,7 +298,7 @@ impl RateLimits { /// This is similar to `check`. Additionally, it checks for quotas with a static limit `0`, and /// rejects items even if there is no active rate limit in this instance. /// - /// If no limits or quotas match, then the returned `RateLimits` instance evalutes `is_ok`. + /// If no limits or quotas match, then the returned `RateLimits` instance evaluates `is_ok`. /// Otherwise, it contains rate limits that match the given scoping. pub fn check_with_quotas<'a>( &self, @@ -341,7 +339,7 @@ impl RateLimits { /// Returns `true` if there are any limits contained. /// - /// This is equavalent to checking whether [`Self::longest`] returns `Some` + /// This is equivalent to checking whether [`Self::longest`] returns `Some` /// or [`Self::iter`] returns an iterator with at least one item. pub fn is_empty(&self) -> bool { self.limits.is_empty() @@ -404,7 +402,7 @@ impl<'a> IntoIterator for &'a RateLimits { /// /// The data structure makes sure no expired rate limits are enforced. #[derive(Debug, Default)] -pub struct CachedRateLimits(ArcSwap); +pub struct CachedRateLimits(Mutex>); impl CachedRateLimits { /// Creates a new, empty instance without any rate limits enforced. @@ -416,51 +414,30 @@ impl CachedRateLimits { /// /// See also: [`RateLimits::add`]. pub fn add(&self, limit: RateLimit) { - self.0.rcu(|current| { - let mut current = current.as_ref().clone(); - current.add(limit.clone()); - current - }); + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + let current = Arc::make_mut(&mut inner); + current.add(limit); } /// Merges more rate limits into this instance. /// /// See also: [`RateLimits::merge`]. pub fn merge(&self, limits: RateLimits) { - self.0.rcu(|current| { - let mut current = current.as_ref().clone(); - for limit in limits.clone() { - current.add(limit) - } - current - }); + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + let current = Arc::make_mut(&mut inner); + for limit in limits { + current.add(limit) + } } /// Returns a reference to the contained rate limits. /// - /// This call gurantuess that at the time of call no returned rate limit is expired. + /// This call guarantees that at the time of call no returned rate limit is expired. pub fn current_limits(&self) -> Arc { let now = Instant::now(); - - let mut current = self.0.load(); - while current.iter().any(|rl| rl.retry_after.expired_at(now)) { - let new = { - let mut new = current.as_ref().clone(); - new.clean_expired(); - Arc::new(new) - }; - - let prev = self.0.compare_and_swap(&*current, Arc::clone(&new)); - - // If there was a swap, we know `new` is now stored and the most recent value. - if Arc::ptr_eq(¤t, &prev) { - return new; - } - - current = prev; - } - - arc_swap::Guard::into_inner(current) + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + Arc::make_mut(&mut inner).clean_expired(now); + Arc::clone(&inner) } } @@ -956,7 +933,7 @@ mod tests { // Sanity check before running `clean_expired` assert_eq!(rate_limits.iter().count(), 2); - rate_limits.clean_expired(); + rate_limits.clean_expired(Instant::now()); // Check that the expired limit has been removed insta::assert_ron_snapshot!(rate_limits, @r###" diff --git a/relay-sampling/src/evaluation.rs b/relay-sampling/src/evaluation.rs index 7f08dc8fa8..c3b3944dd7 100644 --- a/relay-sampling/src/evaluation.rs +++ b/relay-sampling/src/evaluation.rs @@ -70,7 +70,7 @@ impl<'a> ReservoirEvaluator<'a> { Arc::clone(&self.counters) } - /// Sets the Redis pool and organiation ID for the [`ReservoirEvaluator`]. + /// Sets the Redis pool and organization ID for the [`ReservoirEvaluator`]. /// /// These values are needed to synchronize with Redis. #[cfg(feature = "redis")] diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index 10a4319319..29ec9f77f0 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -690,9 +690,9 @@ mod tests { let project_key = envelope.meta().public_key(); addr.send(EnvelopeBuffer::Push(envelope.clone())); - tokio::time::sleep(Duration::from_secs(2)).await; + tokio::time::sleep(Duration::from_secs(3)).await; - let message = tokio::time::timeout(Duration::from_secs(3), envelopes_rx.recv()); + let message = tokio::time::timeout(Duration::from_secs(5), envelopes_rx.recv()); let Some(legacy::DequeuedEnvelope(envelope)) = message.await.unwrap() else { panic!(); }; diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs index e6d670c0d3..ada692cef0 100644 --- a/relay-server/src/services/projects/cache/state.rs +++ b/relay-server/src/services/projects/cache/state.rs @@ -366,9 +366,6 @@ impl SharedProjectState { // reservoir counters did not change. // // `try_lock` to not potentially block, it's a best effort cleanup. - // - // TODO: Remove the lock, we already have interior mutability with the `ArcSwap` - // and the counters themselves can be atomics. if let Ok(mut counters) = prev.reservoir_counters.try_lock() { counters.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); } From 4fd48394064aff146b36d2d04dac08dcf667efa8 Mon Sep 17 00:00:00 2001 From: David Herberth Date: Wed, 30 Oct 2024 13:58:05 +0100 Subject: [PATCH 15/15] metric for project cache notification channel --- relay-server/src/services/projects/cache/service.rs | 7 ++++++- relay-server/src/statsd.rs | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs index f668512663..23a21e553c 100644 --- a/relay-server/src/services/projects/cache/service.rs +++ b/relay-server/src/services/projects/cache/service.rs @@ -10,7 +10,7 @@ use crate::services::projects::cache::handle::ProjectCacheHandle; use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; use crate::services::projects::project::ProjectState; use crate::services::projects::source::ProjectSource; -use crate::statsd::RelayTimers; +use crate::statsd::{RelayGauges, RelayTimers}; /// Size of the broadcast channel for project events. /// @@ -160,6 +160,11 @@ impl ProjectCacheService { let _ = self .project_events_tx .send(ProjectChange::Ready(project_key)); + + metric!( + gauge(RelayGauges::ProjectCacheNotificationChannel) = + self.project_events_tx.len() as u64 + ); } fn handle_evict_stale_projects(&mut self) { diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index dca6421e01..007dd025b8 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -44,6 +44,8 @@ pub enum RelayGauges { /// The number of idle connections in the Redis Pool. #[cfg(feature = "processing")] RedisPoolIdleConnections, + /// The number of notifications in the broadcast channel of the project cache. + ProjectCacheNotificationChannel, } impl GaugeMetric for RelayGauges { @@ -61,6 +63,9 @@ impl GaugeMetric for RelayGauges { RelayGauges::RedisPoolConnections => "redis.pool.connections", #[cfg(feature = "processing")] RelayGauges::RedisPoolIdleConnections => "redis.pool.idle_connections", + RelayGauges::ProjectCacheNotificationChannel => { + "project_cache.notification_channel.size" + } } } }