diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8287d3842e..28548848d3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -553,7 +553,7 @@ jobs: - run: make test-integration env: PYTEST_N: 6 - RELAY_VERSION_CHAIN: "23.12.0,latest" + RELAY_VERSION_CHAIN: "20.6.0,latest" sentry-relay-integration-tests: name: Sentry-Relay Integration Tests diff --git a/CHANGELOG.md b/CHANGELOG.md index 4abf6d0f4b..8f8721a92c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,6 @@ ## Unreleased -**Breaking Changes**: - -- Removes support for metric meta envelope items. ([#4152](https://github.com/getsentry/relay/pull/4152)) -- Removes support for the project cache endpoint version 2 and before. ([#4147](https://github.com/getsentry/relay/pull/4147)) - **Bug Fixes** - Allow profile chunks without release. ([#4155](https://github.com/getsentry/relay/pull/4155)) @@ -26,7 +21,7 @@ ## 24.10.0 -**Breaking Changes**: +**Breaking Changes:** - Only allow processing enabled in managed mode. ([#4087](https://github.com/getsentry/relay/pull/4087)) @@ -36,7 +31,7 @@ - Use the duration reported by the profiler instead of the transaction. ([#4058](https://github.com/getsentry/relay/pull/4058)) - Incorrect pattern matches involving adjacent any and wildcard matchers. ([#4072](https://github.com/getsentry/relay/pull/4072)) -**Features**: +**Features:** - Add a config option to add default tags to all Relay Sentry events. ([#3944](https://github.com/getsentry/relay/pull/3944)) - Automatically derive `client.address` and `user.geo` for standalone spans. ([#4047](https://github.com/getsentry/relay/pull/4047)) @@ -48,7 +43,7 @@ - Add support for creating User from LoginId in Unreal Crash Context. ([#4093](https://github.com/getsentry/relay/pull/4093)) - Add multi-write Redis client. ([#4064](https://github.com/getsentry/relay/pull/4064)) -**Internal**: +**Internal:** - Remove unused `cogs.enabled` configuration option. ([#4060](https://github.com/getsentry/relay/pull/4060)) - Add the dynamic sampling rate to standalone spans as a measurement so that it can be stored, queried, and used for extrapolation. ([#4063](https://github.com/getsentry/relay/pull/4063)) diff --git a/Cargo.lock b/Cargo.lock index 8e9e350e0d..f844e78d09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" -version = "0.8.11" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" dependencies = [ "cfg-if", "getrandom", @@ -217,16 +217,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "atomic-wait" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55b94919229f2c42292fd71ffa4b75e83193bffdd77b1e858cd55fd2d0b0ea8" -dependencies = [ - "libc", - "windows-sys 0.42.0", -] - [[package]] name = "atomic-waker" version = "1.1.2" @@ -422,7 +412,7 @@ dependencies = [ "bitflags 2.4.1", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools 0.10.5", "log", "prettyplease", "proc-macro2", @@ -2705,16 +2695,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" -[[package]] -name = "papaya" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "674d893d27388726e4020eaa60257b3df760392003c14f46455e3277af72cd04" -dependencies = [ - "atomic-wait", - "seize", -] - [[package]] name = "parking" version = "2.2.0" @@ -3019,7 +2999,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.77", @@ -3753,7 +3733,6 @@ dependencies = [ name = "relay-server" version = "24.10.0" dependencies = [ - "ahash", "anyhow", "arc-swap", "axum", @@ -3780,7 +3759,6 @@ dependencies = [ "minidump", "multer", "once_cell", - "papaya", "pin-project-lite", "priority-queue", "rand", @@ -4194,12 +4172,6 @@ dependencies = [ "libc", ] -[[package]] -name = "seize" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d659fa6f19e82a52ab8d3fff3c380bd8cc16462eaea411395618a38760eb85bc" - [[package]] name = "semver" version = "1.0.23" @@ -5815,21 +5787,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -5888,12 +5845,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -5906,12 +5857,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5924,12 +5869,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5948,12 +5887,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5966,12 +5899,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5984,12 +5911,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6002,12 +5923,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index 7b41485040..2865dccce0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,6 @@ relay-test = { path = "relay-test" } relay-protocol-derive = { path = "relay-protocol-derive" } relay-event-derive = { path = "relay-event-derive" } -ahash = "0.8.11" android_trace_log = { version = "0.3.0", features = ["serde"] } anyhow = "1.0.66" axum = "0.7.5" @@ -120,7 +119,6 @@ num-traits = "0.2.18" num_cpus = "1.13.0" once_cell = "1.13.1" opentelemetry-proto = { version = "0.7.0", default-features = false } -papaya = "0.1.4" parking_lot = "0.12.1" path-slash = "0.2.1" pest = "2.1.3" diff --git a/relay-cogs/src/lib.rs b/relay-cogs/src/lib.rs index 68401d11eb..7c5a08fbfe 100644 --- a/relay-cogs/src/lib.rs +++ b/relay-cogs/src/lib.rs @@ -132,6 +132,9 @@ pub enum AppFeature { /// This app feature is for continuous profiling. Profiles, + /// Metric metadata. + MetricMeta, + /// Metrics in the transactions namespace. MetricsTransactions, /// Metrics in the spans namespace. @@ -165,6 +168,7 @@ impl AppFeature { Self::ClientReports => "client_reports", Self::CheckIns => "check_ins", Self::Replays => "replays", + Self::MetricMeta => "metric_meta", Self::MetricsTransactions => "metrics_transactions", Self::MetricsSpans => "metrics_spans", Self::MetricsProfiles => "metrics_profiles", diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 6f9ac83e2e..54a6b35b60 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -559,9 +559,17 @@ impl Default for Metrics { } /// Controls processing of Sentry metrics and metric metadata. -#[derive(Serialize, Deserialize, Debug, Default)] +#[derive(Serialize, Deserialize, Debug)] #[serde(default)] struct SentryMetrics { + /// Code locations expiry in seconds. + /// + /// Defaults to 15 days. + pub meta_locations_expiry: u64, + /// Maximum amount of code locations to store per metric. + /// + /// Defaults to 5. + pub meta_locations_max: usize, /// Whether metric stats are collected and emitted. /// /// Metric stats are always collected and emitted when processing @@ -575,6 +583,16 @@ struct SentryMetrics { pub metric_stats_enabled: bool, } +impl Default for SentryMetrics { + fn default() -> Self { + Self { + meta_locations_expiry: 15 * 24 * 60 * 60, + meta_locations_max: 5, + metric_stats_enabled: false, + } + } +} + /// Controls various limits #[derive(Serialize, Deserialize, Debug)] #[serde(default)] @@ -615,6 +633,8 @@ struct Limits { max_statsd_size: ByteSize, /// The maximum payload size for metric buckets. max_metric_buckets_size: ByteSize, + /// The maximum payload size for metric metadata. + max_metric_meta_size: ByteSize, /// The maximum payload size for a compressed replay. max_replay_compressed_size: ByteSize, /// The maximum payload size for an uncompressed replay. @@ -666,6 +686,7 @@ impl Default for Limits { max_span_size: ByteSize::mebibytes(1), max_statsd_size: ByteSize::mebibytes(1), max_metric_buckets_size: ByteSize::mebibytes(1), + max_metric_meta_size: ByteSize::mebibytes(1), max_replay_compressed_size: ByteSize::mebibytes(10), max_replay_uncompressed_size: ByteSize::mebibytes(100), max_replay_message_size: ByteSize::mebibytes(15), @@ -1017,9 +1038,7 @@ struct Cache { /// The cache timeout for project configurations in seconds. project_expiry: u32, /// Continue using project state this many seconds after cache expiry while a new state is - /// being fetched. This is added on top of `project_expiry`. - /// - /// Default is 2 minutes. + /// being fetched. This is added on top of `project_expiry` and `miss_expiry`. Default is 0. project_grace_period: u32, /// The cache timeout for downstream relay info (public keys) in seconds. relay_expiry: u32, @@ -1055,17 +1074,17 @@ impl Default for Cache { fn default() -> Self { Cache { project_request_full_config: false, - project_expiry: 300, // 5 minutes - project_grace_period: 120, // 2 minutes - relay_expiry: 3600, // 1 hour - envelope_expiry: 600, // 10 minutes + project_expiry: 300, // 5 minutes + project_grace_period: 0, + relay_expiry: 3600, // 1 hour + envelope_expiry: 600, // 10 minutes envelope_buffer_size: 1000, miss_expiry: 60, // 1 minute batch_interval: 100, // 100ms downstream_relays_batch_interval: 100, // 100ms batch_size: 500, file_interval: 10, // 10 seconds - eviction_interval: 15, // 15 seconds + eviction_interval: 60, // 60 seconds global_config_fetch_interval: 10, // 10 seconds } } @@ -2052,6 +2071,16 @@ impl Config { self.values.metrics.sample_rate } + /// Returns the maximum amount of code locations per metric. + pub fn metrics_meta_locations_max(&self) -> usize { + self.values.sentry_metrics.meta_locations_max + } + + /// Returns the expiry for code locations. + pub fn metrics_meta_locations_expiry(&self) -> Duration { + Duration::from_secs(self.values.sentry_metrics.meta_locations_expiry) + } + /// Returns the interval for periodic metrics emitted from Relay. /// /// `None` if periodic metrics are disabled. @@ -2267,6 +2296,11 @@ impl Config { self.values.limits.max_metric_buckets_size.as_bytes() } + /// Returns the maximum payload size of metric metadata in bytes. + pub fn max_metric_meta_size(&self) -> usize { + self.values.limits.max_metric_meta_size.as_bytes() + } + /// Whether metric stats are collected and emitted. /// /// Metric stats are always collected and emitted when processing diff --git a/relay-config/src/redis.rs b/relay-config/src/redis.rs index f1749bdf83..90d29617c9 100644 --- a/relay-config/src/redis.rs +++ b/relay-config/src/redis.rs @@ -178,6 +178,8 @@ pub enum RedisConfigs { cardinality: Box, /// Configuration for the `quotas` pool. quotas: Box, + /// Configuration for the `misc` pool. + misc: Box, }, } @@ -219,6 +221,8 @@ pub enum RedisPoolConfigs<'a> { cardinality: RedisConfigRef<'a>, /// Configuration for the `quotas` pool. quotas: RedisConfigRef<'a>, + /// Configuration for the `misc` pool. + misc: RedisConfigRef<'a>, }, } @@ -288,15 +292,18 @@ pub(super) fn create_redis_pools(configs: &RedisConfigs, cpu_concurrency: u32) - project_configs, cardinality, quotas, + misc, } => { let project_configs = create_redis_pool(project_configs, project_configs_default_connections); let cardinality = create_redis_pool(cardinality, cpu_concurrency); let quotas = create_redis_pool(quotas, cpu_concurrency); + let misc = create_redis_pool(misc, cpu_concurrency); RedisPoolConfigs::Individual { project_configs, cardinality, quotas, + misc, } } } @@ -371,6 +378,16 @@ quotas: - "redis://127.0.0.2:6379" max_connections: 17 connection_timeout: 5 +misc: + configs: + - cluster_nodes: + - "redis://127.0.0.1:6379" + - "redis://127.0.0.2:6379" + max_connections: 42 + connection_timeout: 5 + - server: "redis://127.0.0.1:6379" + max_connections: 84 + connection_timeout: 10 "#; let configs: RedisConfigs = serde_yaml::from_str(yaml) @@ -400,6 +417,29 @@ quotas: ..Default::default() }, }), + misc: Box::new(RedisConfig::MultiWrite { + configs: vec![ + RedisConfig::Cluster { + cluster_nodes: vec![ + "redis://127.0.0.1:6379".to_owned(), + "redis://127.0.0.2:6379".to_owned(), + ], + options: PartialRedisConfigOptions { + max_connections: Some(42), + connection_timeout: 5, + ..Default::default() + }, + }, + RedisConfig::Single(SingleRedisConfig::Detailed { + server: "redis://127.0.0.1:6379".to_owned(), + options: PartialRedisConfigOptions { + max_connections: Some(84), + connection_timeout: 10, + ..Default::default() + }, + }), + ], + }), }; assert_eq!(configs, expected); @@ -693,6 +733,17 @@ read_timeout: 10 }), ], }), + misc: Box::new(RedisConfig::Cluster { + cluster_nodes: vec![ + "redis://127.0.0.1:6379".to_owned(), + "redis://127.0.0.2:6379".to_owned(), + ], + options: PartialRedisConfigOptions { + max_connections: Some(84), + connection_timeout: 10, + ..Default::default() + }, + }), }; assert_json_snapshot!(configs, @r###" @@ -738,6 +789,18 @@ read_timeout: 10 "write_timeout": 3 } ] + }, + "misc": { + "cluster_nodes": [ + "redis://127.0.0.1:6379", + "redis://127.0.0.2:6379" + ], + "max_connections": 84, + "connection_timeout": 10, + "max_lifetime": 300, + "idle_timeout": 60, + "read_timeout": 3, + "write_timeout": 3 } } "###); diff --git a/relay-dynamic-config/src/error_boundary.rs b/relay-dynamic-config/src/error_boundary.rs index dc4948a3b0..c80d2ab8b7 100644 --- a/relay-dynamic-config/src/error_boundary.rs +++ b/relay-dynamic-config/src/error_boundary.rs @@ -42,14 +42,6 @@ impl ErrorBoundary { } } - /// Converts from `ErrorBoundary` to `ErrorBoundary<&T>`. - pub fn as_ref(&self) -> ErrorBoundary<&T> { - match self { - Self::Ok(t) => ErrorBoundary::Ok(t), - Self::Err(e) => ErrorBoundary::Err(Arc::clone(e)), - } - } - /// Returns the contained [`Ok`] value or computes it from a closure. #[inline] pub fn unwrap_or_else(self, op: F) -> T diff --git a/relay-metrics/src/lib.rs b/relay-metrics/src/lib.rs index 4897553525..396236c2c4 100644 --- a/relay-metrics/src/lib.rs +++ b/relay-metrics/src/lib.rs @@ -70,6 +70,7 @@ pub mod aggregator; pub mod cogs; +pub mod meta; mod bucket; mod finite; @@ -79,5 +80,8 @@ mod view; pub use bucket::*; pub use finite::*; +#[cfg(feature = "redis")] +pub use meta::RedisMetricMetaStore; +pub use meta::{MetaAggregator, MetricMeta}; pub use protocol::*; pub use view::*; diff --git a/relay-metrics/src/meta/aggregator.rs b/relay-metrics/src/meta/aggregator.rs new file mode 100644 index 0000000000..db7c051efb --- /dev/null +++ b/relay-metrics/src/meta/aggregator.rs @@ -0,0 +1,152 @@ +use std::{ + collections::{HashMap, HashSet}, + hash::Hash, +}; + +use relay_base_schema::project::ProjectKey; + +use super::{Item, Location, MetricMeta, StartOfDayUnixTimestamp}; +use crate::{statsd::MetricCounters, MetricResourceIdentifier}; + +/// A metrics meta aggregator. +/// +/// Aggregates metric metadata based on their scope (project, mri, timestamp) and +/// only keeps the most relevant entries. +/// +/// Currently we track the first N amount of unique metric meta elements we get. +/// +/// This should represent the actual adoption rate of different code versions. +/// +/// This aggregator is purely in memeory and will lose its state on restart, +/// which may cause multiple different items being emitted after restarts. +/// For this we have de-deuplication in the storage and the volume overall +/// of this happening is small enough to just add it to the storage worst case. +#[derive(Debug)] +pub struct MetaAggregator { + /// All tracked code locations. + locations: hashbrown::HashMap>, + + /// Maximum tracked locations. + max_locations: usize, +} + +impl MetaAggregator { + /// Creates a new metrics meta aggregator. + pub fn new(max_locations: usize) -> Self { + Self { + locations: hashbrown::HashMap::new(), + max_locations, + } + } + + /// Adds a new meta item to the aggregator. + /// + /// Returns a new [`MetricMeta`] element when the element should be stored + /// or sent upstream for storage. + /// + /// Returns `None` when the meta item was already seen or is not considered relevant. + pub fn add(&mut self, project_key: ProjectKey, meta: MetricMeta) -> Option { + let mut send_upstream = HashMap::new(); + + let mut total = 0; + for (mri, items) in meta.mapping { + let scope = Scope { + timestamp: meta.timestamp, + project_key, + mri, + }; + + total += items.len(); + if let Some(items) = self.add_scoped(&scope, items) { + send_upstream.insert(scope.mri, items); + } + } + + relay_statsd::metric!(counter(MetricCounters::MetaAggregatorItems) += total as i64); + + if send_upstream.is_empty() { + return None; + } + + relay_statsd::metric!(counter(MetricCounters::MetaAggregatorUpdate) += 1); + Some(MetricMeta { + timestamp: meta.timestamp, + mapping: send_upstream, + }) + } + + /// Retrieves all currently relevant metric meta for a project. + pub fn get_all_relevant(&self, project_key: ProjectKey) -> impl Iterator { + let locations = self + .locations + .iter() + .filter(|(scope, _)| scope.project_key == project_key); + + let mut result = HashMap::new(); + + for (scope, locations) in locations { + result + .entry(scope.timestamp) + .or_insert_with(|| MetricMeta { + timestamp: scope.timestamp, + mapping: HashMap::new(), + }) + .mapping + .entry(scope.mri.clone()) // This clone sucks + .or_insert_with(Vec::new) + .extend(locations.iter().cloned().map(Item::Location)); + } + + result.into_values() + } + + /// Remove all contained state related to a project. + pub fn clear(&mut self, project_key: ProjectKey) { + self.locations + .retain(|scope, _| scope.project_key != project_key); + } + + fn add_scoped(&mut self, scope: &Scope, items: Vec) -> Option> { + // Entry ref needs hashbrown, we would have to clone the scope without or do a separate lookup. + let locations = self.locations.entry_ref(scope).or_default(); + let mut send_upstream = Vec::new(); + + for item in items { + match item { + Item::Location(location) => { + if locations.len() > self.max_locations { + break; + } + + if !locations.contains(&location) { + locations.insert(location.clone()); + send_upstream.push(Item::Location(location)); + } + } + Item::Unknown => {} + } + } + + (!send_upstream.is_empty()).then_some(send_upstream) + } +} + +/// The metadata scope. +/// +/// We scope metadata by project, mri and day, +/// represented as a unix timestamp at the beginning of the day. +/// +/// The technical scope (e.g. redis key) also includes the organization id, but this +/// can be inferred from the project. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +struct Scope { + pub timestamp: StartOfDayUnixTimestamp, + pub project_key: ProjectKey, + pub mri: MetricResourceIdentifier<'static>, +} + +impl From<&Scope> for Scope { + fn from(value: &Scope) -> Self { + value.clone() + } +} diff --git a/relay-metrics/src/meta/mod.rs b/relay-metrics/src/meta/mod.rs new file mode 100644 index 0000000000..5504e0dc5a --- /dev/null +++ b/relay-metrics/src/meta/mod.rs @@ -0,0 +1,11 @@ +//! Functionality for aggregating and storing of metrics metadata. + +mod aggregator; +mod protocol; +#[cfg(feature = "redis")] +mod redis; + +pub use self::aggregator::*; +pub use self::protocol::*; +#[cfg(feature = "redis")] +pub use self::redis::*; diff --git a/relay-metrics/src/meta/protocol.rs b/relay-metrics/src/meta/protocol.rs new file mode 100644 index 0000000000..7a1223cf8e --- /dev/null +++ b/relay-metrics/src/meta/protocol.rs @@ -0,0 +1,165 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use relay_common::time::UnixTimestamp; +use serde::{Deserialize, Serialize}; + +use crate::MetricResourceIdentifier; + +/// A metric metadata item. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct MetricMeta { + /// Timestamp scope for the contained metadata. + /// + /// Metric metadata is collected in daily intervals, so this may be truncated + /// to the start of the day (UTC) already. + pub timestamp: StartOfDayUnixTimestamp, + + /// The contained metadata mapped by MRI. + pub mapping: HashMap, Vec>, +} + +/// A metadata item. +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum Item { + /// A location metadata pointing to the code location where the metric originates from. + Location(Location), + /// Unknown item. + #[serde(other)] + Unknown, +} + +/// A code location. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)] +pub struct Location { + /// The relative file path. + #[serde(skip_serializing_if = "Option::is_none")] + pub filename: Option, + /// The absolute file path. + #[serde(skip_serializing_if = "Option::is_none")] + pub abs_path: Option, + /// The containing module name or path. + #[serde(skip_serializing_if = "Option::is_none")] + pub module: Option, + /// The containing function name. + #[serde(skip_serializing_if = "Option::is_none")] + pub function: Option, + /// The line number. + #[serde(skip_serializing_if = "Option::is_none")] + pub lineno: Option, + /// Source code leading up to `lineno`. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub pre_context: Vec>, + /// Source code of the current line (`lineno`). + #[serde(skip_serializing_if = "Option::is_none")] + pub context_line: Option, + /// Source code of the lines after `lineno`. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub post_context: Vec>, +} + +/// A Unix timestamp that is truncated to the start of the day. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct StartOfDayUnixTimestamp(UnixTimestamp); + +impl StartOfDayUnixTimestamp { + /// Creates a new `StartOfDayUnixTimestamp` from a timestamp by truncating it. + /// + /// May return none when passed an invalid date, but in practice this never fails + /// since the [`UnixTimestamp`] is already sufficiently validated. + pub fn new(ts: UnixTimestamp) -> Option { + let dt: DateTime = DateTime::from_timestamp(ts.as_secs().try_into().ok()?, 0)?; + let beginning_of_day = dt.date_naive().and_hms_opt(0, 0, 0)?.and_utc(); + Some(Self(UnixTimestamp::from_datetime(beginning_of_day)?)) + } + + /// Returns the underlying unix timestamp, truncated to the start of the day. + pub fn as_timestamp(&self) -> UnixTimestamp { + self.0 + } +} + +impl std::ops::Deref for StartOfDayUnixTimestamp { + type Target = UnixTimestamp; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Serialize for StartOfDayUnixTimestamp { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.0.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for StartOfDayUnixTimestamp { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let ts = UnixTimestamp::deserialize(deserializer)?; + StartOfDayUnixTimestamp::new(ts) + .ok_or_else(|| serde::de::Error::custom("invalid timestamp")) + } +} + +#[cfg(test)] +mod tests { + use insta::assert_json_snapshot; + + use super::*; + + #[test] + fn test_deserialize_null_context() { + let json = r#"{ + "timestamp": 1715904000, + "mapping": { + "d:memory.allocations@allocations": [{ + "abs_path": "/rails/config/initializers/active_job.rb", + "function": "block in
", + "lineno": 5, + "filename": "config/initializers/active_job.rb", + "pre_context": [null, " allocations = event.allocations\n", " allocations=#{allocations}\"\n"], + "context_line": " Sentry::Metrics.distribution('memory.allocations')\n", + "post_context": ["end\n",null,null], + "type":"location" + }] + } + }"#; + + let r: MetricMeta = serde_json::from_str(json).unwrap(); + + assert_json_snapshot!(r, @r###" + { + "timestamp": 1715904000, + "mapping": { + "d:custom/memory.allocations@allocations": [ + { + "type": "location", + "filename": "config/initializers/active_job.rb", + "abs_path": "/rails/config/initializers/active_job.rb", + "function": "block in
", + "lineno": 5, + "pre_context": [ + null, + " allocations = event.allocations\n", + " allocations=#{allocations}\"\n" + ], + "context_line": " Sentry::Metrics.distribution('memory.allocations')\n", + "post_context": [ + "end\n", + null, + null + ] + } + ] + } + } + "###); + } +} diff --git a/relay-metrics/src/meta/redis.rs b/relay-metrics/src/meta/redis.rs new file mode 100644 index 0000000000..60cda8d50f --- /dev/null +++ b/relay-metrics/src/meta/redis.rs @@ -0,0 +1,170 @@ +use std::time::Duration; + +use hash32::{FnvHasher, Hasher as _}; +use relay_base_schema::project::ProjectId; +use relay_common::time::UnixTimestamp; +use relay_redis::{RedisError, RedisPool}; + +use super::{Item, MetricMeta}; +use crate::{statsd::MetricCounters, MetricResourceIdentifier}; + +/// Redis metric meta +pub struct RedisMetricMetaStore { + redis: RedisPool, + expiry: Duration, +} + +impl RedisMetricMetaStore { + /// Creates a new Redis metrics meta store. + pub fn new(redis: RedisPool, expiry: Duration) -> Self { + Self { redis, expiry } + } + + /// Stores metric metadata in Redis. + pub fn store( + &self, + organization_id: u64, + project_id: ProjectId, + meta: MetricMeta, + ) -> Result<(), RedisError> { + let mut client = self.redis.client()?; + let mut connection = client.connection()?; + + let mut redis_updates = 0; + + let mut pipe = relay_redis::redis::pipe(); + for (mri, items) in meta.mapping { + let key = self.build_redis_key(organization_id, project_id, *meta.timestamp, &mri); + + // Should be fine if we don't batch here, we expect a very small amount of locations + // from the aggregator. + let location_cmd = pipe.cmd("SADD").arg(&key); + for item in items { + match item { + Item::Location(location) => { + let member = serde_json::to_string(&location).unwrap(); + location_cmd.arg(member); + } + Item::Unknown => {} + } + } + location_cmd.ignore(); + + redis_updates += 1; + relay_log::trace!("storing metric meta for project {organization_id}:{project_id}"); + + // use original timestamp to not bump expiry + let expire_at = meta.timestamp.as_secs() + self.expiry.as_secs(); + pipe.cmd("EXPIREAT").arg(key).arg(expire_at).ignore(); + } + pipe.query::<()>(&mut connection) + .map_err(RedisError::Redis)?; + + relay_statsd::metric!(counter(MetricCounters::MetaRedisUpdate) += redis_updates); + + Ok(()) + } + + fn build_redis_key( + &self, + organization_id: u64, + project_id: ProjectId, + timestamp: UnixTimestamp, + mri: &MetricResourceIdentifier<'_>, + ) -> String { + let mri_hash = mri_to_fnv1a32(mri); + + format!("mm:l:{{{organization_id}}}:{project_id}:{mri_hash}:{timestamp}") + } +} + +/// Converts an MRI to a fnv1a32 hash. +/// +/// Sentry also uses the exact same algorithm to hash MRIs. +fn mri_to_fnv1a32(mri: &MetricResourceIdentifier<'_>) -> u32 { + let mut hasher = FnvHasher::default(); + + let s = mri.to_string(); + // hash the bytes directly, `write_str` on the hasher adds a 0xff byte at the end + std::hash::Hasher::write(&mut hasher, s.as_bytes()); + hasher.finish32() +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use relay_redis::RedisConfigOptions; + + use crate::meta::{Location, StartOfDayUnixTimestamp}; + + use super::*; + + fn build_store() -> RedisMetricMetaStore { + let url = std::env::var("RELAY_REDIS_URL") + .unwrap_or_else(|_| "redis://127.0.0.1:6379".to_owned()); + + let redis = RedisPool::single(&url, RedisConfigOptions::default()).unwrap(); + + RedisMetricMetaStore::new(redis, Duration::from_secs(86400)) + } + + #[test] + fn test_store() { + let store = build_store(); + + let organization_id = 1000; + let project_id = ProjectId::new(2); + let mri = MetricResourceIdentifier::parse("c:foo").unwrap(); + + let timestamp = StartOfDayUnixTimestamp::new(UnixTimestamp::now()).unwrap(); + + let location = Location { + filename: Some("foobar".to_owned()), + abs_path: None, + module: None, + function: None, + lineno: Some(42), + pre_context: Vec::new(), + context_line: None, + post_context: Vec::new(), + }; + + store + .store( + organization_id, + project_id, + MetricMeta { + timestamp, + mapping: HashMap::from([(mri.clone(), vec![Item::Location(location.clone())])]), + }, + ) + .unwrap(); + + let mut client = store.redis.client().unwrap(); + let mut connection = client.connection().unwrap(); + let key = store.build_redis_key(organization_id, project_id, *timestamp, &mri); + let locations: Vec = relay_redis::redis::cmd("SMEMBERS") + .arg(key) + .query(&mut connection) + .unwrap(); + + assert_eq!(locations, vec![serde_json::to_string(&location).unwrap()]); + } + + #[test] + fn test_mri_hash() { + fn test_mri(s: &str, expected_hash: u32) { + let mri = MetricResourceIdentifier::parse(s).unwrap(); + assert_eq!(mri_to_fnv1a32(&mri), expected_hash); + } + + // Sentry has the same tests. + test_mri("c:transactions/count_per_root_project@none", 2684394786); + test_mri("d:transactions/duration@millisecond", 1147819254); + test_mri("s:transactions/user@none", 1739810785); + test_mri("c:custom/user.click@none", 1248146441); + test_mri("d:custom/page.load@millisecond", 2103554973); + test_mri("s:custom/username@none", 670706478); + } +} diff --git a/relay-metrics/src/statsd.rs b/relay-metrics/src/statsd.rs index f77ff107a2..ae7976a1fa 100644 --- a/relay-metrics/src/statsd.rs +++ b/relay-metrics/src/statsd.rs @@ -40,6 +40,15 @@ pub enum MetricCounters { /// - `aggregator`: The name of the metrics aggregator (usually `"default"`). /// - `namespace`: The namespace of the metric. MergeMiss, + + /// Incremented every time the meta aggregator emitted an update that needs to be stored or + /// sent upstream. + MetaAggregatorUpdate, + /// Incremnted for every metric meta item added to the metric meta aggregator. + MetaAggregatorItems, + /// Incremented every time a redis key is updated to store or update metadata. + #[cfg(feature = "redis")] + MetaRedisUpdate, } impl CounterMetric for MetricCounters { @@ -47,6 +56,10 @@ impl CounterMetric for MetricCounters { match *self { Self::MergeHit => "metrics.buckets.merge.hit", Self::MergeMiss => "metrics.buckets.merge.miss", + Self::MetaAggregatorUpdate => "metrics.meta.agg.update", + Self::MetaAggregatorItems => "metrics.meta.agg.items", + #[cfg(feature = "redis")] + Self::MetaRedisUpdate => "metrics.meta.redis.update", } } } diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index ba90bacaca..c1162d340e 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -1,6 +1,5 @@ use std::fmt; use std::str::FromStr; -use std::sync::{Arc, Mutex, PoisonError}; use std::time::{Duration, Instant}; use relay_base_schema::metrics::MetricNamespace; @@ -280,14 +279,15 @@ impl RateLimits { } /// Removes expired rate limits from this instance. - pub fn clean_expired(&mut self, now: Instant) { + pub fn clean_expired(&mut self) { + let now = Instant::now(); self.limits .retain(|limit| !limit.retry_after.expired_at(now)); } /// Checks whether any rate limits apply to the given scoping. /// - /// If no limits match, then the returned `RateLimits` instance evaluates `is_ok`. Otherwise, it + /// If no limits match, then the returned `RateLimits` instance evalutes `is_ok`. Otherwise, it /// contains rate limits that match the given scoping. pub fn check(&self, scoping: ItemScoping<'_>) -> Self { self.check_with_quotas(&[], scoping) @@ -298,7 +298,7 @@ impl RateLimits { /// This is similar to `check`. Additionally, it checks for quotas with a static limit `0`, and /// rejects items even if there is no active rate limit in this instance. /// - /// If no limits or quotas match, then the returned `RateLimits` instance evaluates `is_ok`. + /// If no limits or quotas match, then the returned `RateLimits` instance evalutes `is_ok`. /// Otherwise, it contains rate limits that match the given scoping. pub fn check_with_quotas<'a>( &self, @@ -339,7 +339,7 @@ impl RateLimits { /// Returns `true` if there are any limits contained. /// - /// This is equivalent to checking whether [`Self::longest`] returns `Some` + /// This is equavalent to checking whether [`Self::longest`] returns `Some` /// or [`Self::iter`] returns an iterator with at least one item. pub fn is_empty(&self) -> bool { self.limits.is_empty() @@ -402,7 +402,7 @@ impl<'a> IntoIterator for &'a RateLimits { /// /// The data structure makes sure no expired rate limits are enforced. #[derive(Debug, Default)] -pub struct CachedRateLimits(Mutex>); +pub struct CachedRateLimits(RateLimits); impl CachedRateLimits { /// Creates a new, empty instance without any rate limits enforced. @@ -413,31 +413,25 @@ impl CachedRateLimits { /// Adds a limit to this collection. /// /// See also: [`RateLimits::add`]. - pub fn add(&self, limit: RateLimit) { - let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); - let current = Arc::make_mut(&mut inner); - current.add(limit); + pub fn add(&mut self, limit: RateLimit) { + self.0.add(limit); } /// Merges more rate limits into this instance. /// /// See also: [`RateLimits::merge`]. - pub fn merge(&self, limits: RateLimits) { - let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); - let current = Arc::make_mut(&mut inner); - for limit in limits { - current.add(limit) + pub fn merge(&mut self, rate_limits: RateLimits) { + for limit in rate_limits { + self.add(limit) } } /// Returns a reference to the contained rate limits. /// - /// This call guarantees that at the time of call no returned rate limit is expired. - pub fn current_limits(&self) -> Arc { - let now = Instant::now(); - let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); - Arc::make_mut(&mut inner).clean_expired(now); - Arc::clone(&inner) + /// This call gurantuess that at the time of call no returned rate limit is expired. + pub fn current_limits(&mut self) -> &RateLimits { + self.0.clean_expired(); + &self.0 } } @@ -933,7 +927,7 @@ mod tests { // Sanity check before running `clean_expired` assert_eq!(rate_limits.iter().count(), 2); - rate_limits.clean_expired(Instant::now()); + rate_limits.clean_expired(); // Check that the expired limit has been removed insta::assert_ron_snapshot!(rate_limits, @r###" @@ -1126,7 +1120,7 @@ mod tests { #[test] fn test_cached_rate_limits_expired() { - let cached = CachedRateLimits::new(); + let mut cached = CachedRateLimits::new(); // Active error limit cached.add(RateLimit { diff --git a/relay-redis/src/noop.rs b/relay-redis/src/noop.rs index 944c9929c5..452fed960a 100644 --- a/relay-redis/src/noop.rs +++ b/relay-redis/src/noop.rs @@ -52,4 +52,6 @@ pub struct RedisPools { pub cardinality: RedisPool, /// The pool used for rate limiting/quotas. pub quotas: RedisPool, + /// The pool used for metrics metadata. + pub misc: RedisPool, } diff --git a/relay-redis/src/real.rs b/relay-redis/src/real.rs index 30fa44f773..4f6e3cfa8c 100644 --- a/relay-redis/src/real.rs +++ b/relay-redis/src/real.rs @@ -373,6 +373,8 @@ pub struct RedisPools { pub cardinality: RedisPool, /// The pool used for rate limiting/quotas. pub quotas: RedisPool, + /// The pool used for metrics metadata. + pub misc: RedisPool, } /// Stats about how the [`RedisPool`] is performing. diff --git a/relay-sampling/src/evaluation.rs b/relay-sampling/src/evaluation.rs index c3b3944dd7..7f08dc8fa8 100644 --- a/relay-sampling/src/evaluation.rs +++ b/relay-sampling/src/evaluation.rs @@ -70,7 +70,7 @@ impl<'a> ReservoirEvaluator<'a> { Arc::clone(&self.counters) } - /// Sets the Redis pool and organization ID for the [`ReservoirEvaluator`]. + /// Sets the Redis pool and organiation ID for the [`ReservoirEvaluator`]. /// /// These values are needed to synchronize with Redis. #[cfg(feature = "redis")] diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 34862cd152..dbec10dc95 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -29,7 +29,6 @@ processing = [ workspace = true [dependencies] -ahash = { workspace = true } anyhow = { workspace = true } serde_path_to_error = { workspace = true } axum = { workspace = true, features = ["macros", "matched-path", "tracing"] } @@ -54,7 +53,6 @@ mime = { workspace = true } minidump = { workspace = true, optional = true } multer = { workspace = true } once_cell = { workspace = true } -papaya = { workspace = true } pin-project-lite = { workspace = true } priority-queue = { workspace = true } rand = { workspace = true } diff --git a/relay-server/src/endpoints/batch_metrics.rs b/relay-server/src/endpoints/batch_metrics.rs index 19d1395168..4d108df90f 100644 --- a/relay-server/src/endpoints/batch_metrics.rs +++ b/relay-server/src/endpoints/batch_metrics.rs @@ -4,7 +4,8 @@ use serde::{Deserialize, Serialize}; use crate::extractors::{ReceivedAt, SignedBytes}; use crate::service::ServiceState; -use crate::services::processor::{BucketSource, ProcessBatchedMetrics}; +use crate::services::processor::ProcessBatchedMetrics; +use crate::services::projects::cache::BucketSource; #[derive(Debug, Serialize, Deserialize)] struct SendMetricsResponse {} diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 7430beb6dc..fe3f7c3446 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -12,8 +12,8 @@ use crate::envelope::{AttachmentType, Envelope, EnvelopeError, Item, ItemType, I use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{BucketSource, MetricData, ProcessMetrics, ProcessingGroup}; -use crate::services::projects::cache::legacy::ValidateEnvelope; +use crate::services::processor::{MetricData, ProcessMetricMeta, ProcessingGroup}; +use crate::services::projects::cache::{CheckEnvelope, ProcessMetrics, ValidateEnvelope}; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -40,6 +40,9 @@ impl IntoResponse for ServiceUnavailable { /// Error type for all store-like requests. #[derive(Debug, thiserror::Error)] pub enum BadStoreRequest { + #[error("could not schedule event processing")] + ScheduleFailed, + #[error("empty request body")] EmptyBody, @@ -110,7 +113,7 @@ impl IntoResponse for BadStoreRequest { (StatusCode::TOO_MANY_REQUESTS, headers, body).into_response() } - BadStoreRequest::QueueFailed => { + BadStoreRequest::ScheduleFailed | BadStoreRequest::QueueFailed => { // These errors indicate that something's wrong with our service system, most likely // mailbox congestion or a faulty shutdown. Indicate an unavailable service to the // client. It might retry event submission at a later time. @@ -271,14 +274,24 @@ fn queue_envelope( if !metric_items.is_empty() { relay_log::trace!("sending metrics into processing queue"); - state.processor().send(ProcessMetrics { + state.project_cache().send(ProcessMetrics { data: MetricData::Raw(metric_items.into_vec()), received_at: envelope.received_at(), sent_at: envelope.sent_at(), project_key: envelope.meta().public_key(), - source: BucketSource::from_meta(envelope.meta()), + source: envelope.meta().into(), }); } + + // Remove metric meta from the envelope and send them directly to processing. + let metric_meta = envelope.take_items_by(|item| matches!(item.ty(), ItemType::MetricMeta)); + if !metric_meta.is_empty() { + relay_log::trace!("sending metric meta into processing queue"); + state.processor().send(ProcessMetricMeta { + items: metric_meta.into_vec(), + project_key: envelope.meta().public_key(), + }) + } } // Split off the envelopes by item type. @@ -309,9 +322,7 @@ fn queue_envelope( } None => { relay_log::trace!("Sending envelope to project cache for V1 buffer"); - state - .legacy_project_cache() - .send(ValidateEnvelope::new(envelope)); + state.project_cache().send(ValidateEnvelope::new(envelope)); } } } @@ -368,9 +379,10 @@ pub async fn handle_envelope( } let checked = state - .project_cache_handle() - .get(managed_envelope.scoping().project_key) - .check_envelope(managed_envelope) + .project_cache() + .send(CheckEnvelope::new(managed_envelope)) + .await + .map_err(|_| BadStoreRequest::ScheduleFailed)? .map_err(BadStoreRequest::EventRejected)?; let Some(mut managed_envelope) = checked.envelope else { diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index ff59384618..391687a31f 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -3,9 +3,10 @@ use std::sync::Arc; use axum::extract::{Query, Request}; use axum::handler::Handler; -use axum::http::StatusCode; use axum::response::{IntoResponse, Result}; use axum::{Json, RequestExt}; +use futures::stream::FuturesUnordered; +use futures::StreamExt; use relay_base_schema::project::ProjectKey; use relay_dynamic_config::{ErrorBoundary, GlobalConfig}; use serde::{Deserialize, Serialize}; @@ -15,10 +16,15 @@ use crate::endpoints::forward; use crate::extractors::SignedJson; use crate::service::ServiceState; use crate::services::global_config::{self, StatusResponse}; +use crate::services::projects::cache::{GetCachedProjectState, GetProjectState}; use crate::services::projects::project::{ - LimitedParsedProjectState, ParsedProjectState, ProjectState, Revision, + LimitedParsedProjectState, ParsedProjectState, ProjectState, }; -use crate::utils::ApiErrorResponse; + +/// V2 version of this endpoint. +/// +/// The request is a list of [`ProjectKey`]s and the response a list of [`ProjectStateWrapper`]s +const ENDPOINT_V2: u16 = 2; /// V3 version of this endpoint. /// @@ -28,16 +34,6 @@ use crate::utils::ApiErrorResponse; /// returned, or a further poll ensues. const ENDPOINT_V3: u16 = 3; -#[derive(Debug, Clone, Copy, thiserror::Error)] -#[error("This API version is no longer supported, upgrade your Relay or Client")] -struct VersionOutdatedError; - -impl IntoResponse for VersionOutdatedError { - fn into_response(self) -> axum::response::Response { - (StatusCode::BAD_REQUEST, ApiErrorResponse::from_error(&self)).into_response() - } -} - /// Helper to deserialize the `version` query parameter. #[derive(Clone, Copy, Debug, Deserialize)] struct VersionQuery { @@ -106,17 +102,19 @@ struct GetProjectStatesRequest { /// /// This length of this list if specified must be the same length /// as [`Self::public_keys`], the items are asssociated by index. - revisions: Option>>, + revisions: Option>>>, #[serde(default)] full_config: bool, #[serde(default)] + no_cache: bool, + #[serde(default)] global: bool, } fn into_valid_keys( public_keys: Vec>, - revisions: Option>>, -) -> impl Iterator { + revisions: Option>>>, +) -> impl Iterator)> { let mut revisions = revisions.and_then(|e| e.ok()).unwrap_or_default(); if !revisions.is_empty() && revisions.len() != public_keys.len() { // The downstream sent us a different amount of revisions than project keys, @@ -129,9 +127,7 @@ fn into_valid_keys( ); revisions.clear(); } - let revisions = revisions - .into_iter() - .chain(std::iter::repeat_with(Revision::default)); + let revisions = revisions.into_iter().chain(std::iter::repeat(None)); std::iter::zip(public_keys, revisions).filter_map(|(public_key, revision)| { // Skip unparsable public keys. @@ -143,9 +139,30 @@ fn into_valid_keys( async fn inner( state: ServiceState, + Query(version): Query, body: SignedJson, ) -> Result { let SignedJson { inner, relay } = body; + let project_cache = &state.project_cache().clone(); + + let no_cache = inner.no_cache; + let keys_len = inner.public_keys.len(); + + let mut futures: FuturesUnordered<_> = into_valid_keys(inner.public_keys, inner.revisions) + .map(|(project_key, revision)| async move { + let state_result = if version.version >= ENDPOINT_V3 && !no_cache { + project_cache + .send(GetCachedProjectState::new(project_key)) + .await + } else { + project_cache + .send(GetProjectState::new(project_key).no_cache(no_cache)) + .await + }; + + (project_key, revision, state_result) + }) + .collect(); let (global, global_status) = if inner.global { match state.global_config().send(global_config::Get).await? { @@ -161,15 +178,12 @@ async fn inner( (None, None) }; - let keys_len = inner.public_keys.len(); let mut pending = Vec::with_capacity(keys_len); let mut unchanged = Vec::with_capacity(keys_len); let mut configs = HashMap::with_capacity(keys_len); - for (project_key, revision) in into_valid_keys(inner.public_keys, inner.revisions) { - let project = state.project_cache_handle().get(project_key); - - let project_info = match project.state() { + while let Some((project_key, revision, state_result)) = futures.next().await { + let project_info = match state_result? { ProjectState::Enabled(info) => info, ProjectState::Disabled => { // Don't insert project config. Downstream Relay will consider it disabled. @@ -182,7 +196,7 @@ async fn inner( }; // Only ever omit responses when there was a valid revision in the first place. - if project_info.rev == revision { + if revision.is_some() && project_info.rev == revision { unchanged.push(project_key); continue; } @@ -223,14 +237,9 @@ async fn inner( })) } -/// Returns `true` for all `?version` query parameters that are no longer supported by Relay and Sentry. -fn is_outdated(Query(query): Query) -> bool { - query.version < ENDPOINT_V3 -} - /// Returns `true` if the `?version` query parameter is compatible with this implementation. fn is_compatible(Query(query): Query) -> bool { - query.version == ENDPOINT_V3 + query.version >= ENDPOINT_V2 && query.version <= ENDPOINT_V3 } /// Endpoint handler for the project configs endpoint. @@ -246,11 +255,9 @@ fn is_compatible(Query(query): Query) -> bool { /// support old Relay versions. pub async fn handle(state: ServiceState, mut req: Request) -> Result { let data = req.extract_parts().await?; - if is_outdated(data) { - Err(VersionOutdatedError.into()) - } else if is_compatible(data) { - Ok(inner.call(req, state).await) + Ok(if is_compatible(data) { + inner.call(req, state).await } else { - Ok(forward::forward(state, req).await) - } + forward::forward(state, req).await + }) } diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index b823bb9507..dd7c74b330 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -105,6 +105,8 @@ pub enum ItemType { Statsd, /// Buckets of preaggregated metrics encoded as JSON. MetricBuckets, + /// Additional metadata for metrics + MetricMeta, /// Client internal report (eg: outcomes). ClientReport, /// Profile event payload encoded as JSON. @@ -166,6 +168,7 @@ impl ItemType { Self::Sessions => "sessions", Self::Statsd => "statsd", Self::MetricBuckets => "metric_buckets", + Self::MetricMeta => "metric_meta", Self::ClientReport => "client_report", Self::Profile => "profile", Self::ReplayEvent => "replay_event", @@ -189,7 +192,10 @@ impl ItemType { /// Returns `true` if the item is a metric type. pub fn is_metrics(&self) -> bool { - matches!(self, ItemType::Statsd | ItemType::MetricBuckets) + matches!( + self, + ItemType::Statsd | ItemType::MetricBuckets | ItemType::MetricMeta + ) } } @@ -218,6 +224,7 @@ impl std::str::FromStr for ItemType { "sessions" => Self::Sessions, "statsd" => Self::Statsd, "metric_buckets" => Self::MetricBuckets, + "metric_meta" => Self::MetricMeta, "client_report" => Self::ClientReport, "profile" => Self::Profile, "replay_event" => Self::ReplayEvent, @@ -678,7 +685,7 @@ impl Item { ItemType::UnrealReport => Some(DataCategory::Error), ItemType::Attachment => Some(DataCategory::Attachment), ItemType::Session | ItemType::Sessions => None, - ItemType::Statsd | ItemType::MetricBuckets => None, + ItemType::Statsd | ItemType::MetricBuckets | ItemType::MetricMeta => None, ItemType::FormData => None, ItemType::UserReport => None, ItemType::UserReportV2 => Some(DataCategory::UserReportV2), @@ -908,6 +915,7 @@ impl Item { | ItemType::Sessions | ItemType::Statsd | ItemType::MetricBuckets + | ItemType::MetricMeta | ItemType::ClientReport | ItemType::ReplayEvent | ItemType::ReplayRecording @@ -944,6 +952,7 @@ impl Item { ItemType::Sessions => false, ItemType::Statsd => false, ItemType::MetricBuckets => false, + ItemType::MetricMeta => false, ItemType::ClientReport => false, ItemType::ReplayRecording => false, ItemType::ReplayVideo => false, diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index ac953b13ff..d7e954fe86 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -1,4 +1,5 @@ use std::convert::Infallible; +use std::fmt; use std::sync::Arc; use std::time::Duration; @@ -7,12 +8,11 @@ use crate::services::buffer::{self, EnvelopeBufferService, ObservableEnvelopeBuf use crate::services::cogs::{CogsService, CogsServiceRecorder}; use crate::services::global_config::{GlobalConfigManager, GlobalConfigService}; use crate::services::health_check::{HealthCheck, HealthCheckService}; -use crate::services::metrics::RouterService; +use crate::services::metrics::{Aggregator, RouterService}; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectCacheService}; -use crate::services::projects::source::ProjectSource; +use crate::services::projects::cache::{ProjectCache, ProjectCacheService, Services}; use crate::services::relays::{RelayCache, RelayCacheService}; use crate::services::stats::RelayStats; #[cfg(feature = "processing")] @@ -52,8 +52,9 @@ pub enum ServiceError { Redis, } -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct Registry { + pub aggregator: Addr, pub health_check: Addr, pub outcome_producer: Addr, pub outcome_aggregator: Addr, @@ -61,11 +62,21 @@ pub struct Registry { pub test_store: Addr, pub relay_cache: Addr, pub global_config: Addr, - pub legacy_project_cache: Addr, + pub project_cache: Addr, pub upstream_relay: Addr, pub envelope_buffer: Option, +} - pub project_cache_handle: ProjectCacheHandle, +impl fmt::Debug for Registry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Registry") + .field("aggregator", &self.aggregator) + .field("health_check", &self.health_check) + .field("outcome_producer", &self.outcome_producer) + .field("outcome_aggregator", &self.outcome_aggregator) + .field("processor", &format_args!("Addr")) + .finish() + } } /// Constructs a tokio [`Runtime`] configured for running [services](relay_system::Service). @@ -133,7 +144,6 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, - registry: Registry, } @@ -188,23 +198,12 @@ impl ServiceState { // service fail if the service is not running. let global_config = global_config.start(); - let (legacy_project_cache, legacy_project_cache_rx) = - channel(legacy::ProjectCacheService::name()); - - let project_source = ProjectSource::start( - Arc::clone(&config), - upstream_relay.clone(), - redis_pools - .as_ref() - .map(|pools| pools.project_configs.clone()), - ); - let project_cache_handle = - ProjectCacheService::new(Arc::clone(&config), project_source).start(); + let (project_cache, project_cache_rx) = channel(ProjectCacheService::name()); let aggregator = RouterService::new( config.default_aggregator_config().clone(), config.secondary_aggregator_configs().clone(), - Some(legacy_project_cache.clone().recipient()), + Some(project_cache.clone().recipient()), ); let aggregator_handle = aggregator.handle(); let aggregator = aggregator.start(); @@ -239,11 +238,11 @@ impl ServiceState { create_processor_pool(&config)?, config.clone(), global_config_handle, - project_cache_handle.clone(), cogs, #[cfg(feature = "processing")] redis_pools.clone(), processor::Addrs { + project_cache: project_cache.clone(), outcome_aggregator: outcome_aggregator.clone(), upstream_relay: upstream_relay.clone(), test_store: test_store.clone(), @@ -262,7 +261,7 @@ impl ServiceState { global_config_rx.clone(), buffer::Services { envelopes_tx, - project_cache_handle: project_cache_handle.clone(), + project_cache: project_cache.clone(), outcome_aggregator: outcome_aggregator.clone(), test_store: test_store.clone(), }, @@ -270,24 +269,27 @@ impl ServiceState { .map(|b| b.start_observable()); // Keep all the services in one context. - let project_cache_services = legacy::Services { + let project_cache_services = Services { envelope_buffer: envelope_buffer.as_ref().map(ObservableEnvelopeBuffer::addr), aggregator: aggregator.clone(), envelope_processor: processor.clone(), outcome_aggregator: outcome_aggregator.clone(), - project_cache: legacy_project_cache.clone(), + project_cache: project_cache.clone(), test_store: test_store.clone(), + upstream_relay: upstream_relay.clone(), }; - legacy::ProjectCacheService::new( + ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), - project_cache_handle.clone(), project_cache_services, global_config_rx, envelopes_rx, + redis_pools + .as_ref() + .map(|pools| pools.project_configs.clone()), ) - .spawn_handler(legacy_project_cache_rx); + .spawn_handler(project_cache_rx); let health_check = HealthCheckService::new( config.clone(), @@ -309,6 +311,7 @@ impl ServiceState { let relay_cache = RelayCacheService::new(config.clone(), upstream_relay.clone()).start(); let registry = Registry { + aggregator, processor, health_check, outcome_producer, @@ -316,8 +319,7 @@ impl ServiceState { test_store, relay_cache, global_config, - legacy_project_cache, - project_cache_handle, + project_cache, upstream_relay, envelope_buffer, }; @@ -350,14 +352,9 @@ impl ServiceState { self.inner.registry.envelope_buffer.as_ref() } - /// Returns the address of the [`legacy::ProjectCache`] service. - pub fn legacy_project_cache(&self) -> &Addr { - &self.inner.registry.legacy_project_cache - } - - /// Returns a [`ProjectCacheHandle`]. - pub fn project_cache_handle(&self) -> &ProjectCacheHandle { - &self.inner.registry.project_cache_handle + /// Returns the address of the [`ProjectCache`] service. + pub fn project_cache(&self) -> &Addr { + &self.inner.registry.project_cache } /// Returns the address of the [`RelayCache`] service. @@ -434,21 +431,25 @@ pub fn create_redis_pools(configs: RedisPoolConfigs) -> Result { let project_configs = create_redis_pool(project_configs)?; let cardinality = create_redis_pool(cardinality)?; let quotas = create_redis_pool(quotas)?; + let misc = create_redis_pool(misc)?; Ok(RedisPools { project_configs, cardinality, quotas, + misc, }) } } @@ -459,10 +460,11 @@ fn initialize_redis_scripts_for_pools(redis_pools: &RedisPools) -> Result<(), Re let project_configs = redis_pools.project_configs.client()?; let cardinality = redis_pools.cardinality.client()?; let quotas = redis_pools.quotas.client()?; + let misc = redis_pools.misc.client()?; let scripts = RedisScripts::all(); - let pools = [project_configs, cardinality, quotas]; + let pools = [project_configs, cardinality, quotas, misc]; for pool in pools { initialize_redis_scripts(pool, &scripts)?; } diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index ff8d1cfd36..eeb561f16f 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -21,7 +21,8 @@ use crate::services::outcome::DiscardReason; use crate::services::outcome::Outcome; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectChange}; +use crate::services::projects::cache::{DequeuedEnvelope, ProjectCache, UpdateProject}; + use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::ManagedEnvelope; @@ -98,8 +99,8 @@ impl ObservableEnvelopeBuffer { pub struct Services { /// Bounded channel used exclusively to handle backpressure when sending envelopes to the /// project cache. - pub envelopes_tx: mpsc::Sender, - pub project_cache_handle: ProjectCacheHandle, + pub envelopes_tx: mpsc::Sender, + pub project_cache: Addr, pub outcome_aggregator: Addr, pub test_store: Addr, } @@ -156,7 +157,7 @@ impl EnvelopeBufferService { &mut self, buffer: &PolymorphicEnvelopeBuffer, dequeue: bool, - ) -> Option> { + ) -> Option> { relay_statsd::metric!( counter(RelayCounters::BufferReadyToPop) += 1, status = "checking" @@ -217,7 +218,7 @@ impl EnvelopeBufferService { config: &Config, buffer: &mut PolymorphicEnvelopeBuffer, services: &Services, - envelopes_tx_permit: Permit<'a, legacy::DequeuedEnvelope>, + envelopes_tx_permit: Permit<'a, DequeuedEnvelope>, ) -> Result { let sleep = match buffer.peek().await? { Peek::Empty => { @@ -250,7 +251,7 @@ impl EnvelopeBufferService { .pop() .await? .expect("Element disappeared despite exclusive excess"); - envelopes_tx_permit.send(legacy::DequeuedEnvelope(envelope)); + envelopes_tx_permit.send(DequeuedEnvelope(envelope)); Duration::ZERO // try next pop immediately } @@ -268,12 +269,12 @@ impl EnvelopeBufferService { relay_log::trace!("EnvelopeBufferService: requesting project(s) update"); let own_key = envelope.meta().public_key(); - services.project_cache_handle.fetch(own_key); + services.project_cache.send(UpdateProject(own_key)); match envelope.sampling_key() { None => {} Some(sampling_key) if sampling_key == own_key => {} // already sent. Some(sampling_key) => { - services.project_cache_handle.fetch(sampling_key); + services.project_cache.send(UpdateProject(sampling_key)); } } @@ -397,7 +398,6 @@ impl Service for EnvelopeBufferService { buffer.initialize().await; let mut shutdown = Controller::shutdown_handle(); - let mut project_events = self.services.project_cache_handle.changes(); relay_log::info!("EnvelopeBufferService: starting"); loop { @@ -427,10 +427,6 @@ impl Service for EnvelopeBufferService { } } } - Ok(ProjectChange::Ready(project_key)) = project_events.recv() => { - Self::handle_message(&mut buffer, EnvelopeBuffer::Ready(project_key)).await; - sleep = Duration::ZERO; - } Some(message) = rx.recv() => { Self::handle_message(&mut buffer, message).await; sleep = Duration::ZERO; @@ -487,8 +483,8 @@ mod tests { struct EnvelopeBufferServiceResult { service: EnvelopeBufferService, global_tx: watch::Sender, - envelopes_rx: mpsc::Receiver, - project_cache_handle: ProjectCacheHandle, + envelopes_rx: mpsc::Receiver, + project_cache_rx: mpsc::UnboundedReceiver, outcome_aggregator_rx: mpsc::UnboundedReceiver, } @@ -496,8 +492,6 @@ mod tests { config_json: Option, global_config_status: global_config::Status, ) -> EnvelopeBufferServiceResult { - relay_log::init_test!(); - let config_json = config_json.unwrap_or(serde_json::json!({ "spool": { "envelopes": { @@ -510,8 +504,8 @@ mod tests { let memory_stat = MemoryStat::default(); let (global_tx, global_rx) = watch::channel(global_config_status); let (envelopes_tx, envelopes_rx) = mpsc::channel(5); + let (project_cache, project_cache_rx) = Addr::custom(); let (outcome_aggregator, outcome_aggregator_rx) = Addr::custom(); - let project_cache_handle = ProjectCacheHandle::for_test(); let envelope_buffer_service = EnvelopeBufferService::new( config, @@ -519,7 +513,7 @@ mod tests { global_rx, Services { envelopes_tx, - project_cache_handle: project_cache_handle.clone(), + project_cache, outcome_aggregator, test_store: Addr::dummy(), }, @@ -530,7 +524,7 @@ mod tests { service: envelope_buffer_service, global_tx, envelopes_rx, - project_cache_handle, + project_cache_rx, outcome_aggregator_rx, } } @@ -543,7 +537,7 @@ mod tests { service, global_tx: _global_tx, envelopes_rx: _envelopes_rx, - project_cache_handle: _project_cache_handle, + project_cache_rx: _project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service(None, global_config::Status::Pending); @@ -568,8 +562,8 @@ mod tests { service, global_tx, envelopes_rx, + project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, - .. } = envelope_buffer_service(None, global_config::Status::Pending); let addr = service.start(); @@ -582,6 +576,7 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); + assert_eq!(project_cache_rx.len(), 0); global_tx.send_replace(global_config::Status::Ready(Arc::new( GlobalConfig::default(), @@ -590,6 +585,7 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 1); + assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -599,9 +595,9 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, + project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, global_tx: _global_tx, - .. } = envelope_buffer_service( Some(serde_json::json!({ "spool": { @@ -627,6 +623,7 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); + assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -636,7 +633,7 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_handle: _project_cache_handle, + project_cache_rx, mut outcome_aggregator_rx, global_tx: _global_tx, } = envelope_buffer_service( @@ -664,6 +661,7 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; assert_eq!(envelopes_rx.len(), 0); + assert_eq!(project_cache_rx.len(), 0); let outcome = outcome_aggregator_rx.try_recv().unwrap(); assert_eq!(outcome.category, DataCategory::TransactionIndexed); @@ -677,7 +675,7 @@ mod tests { let EnvelopeBufferServiceResult { service, mut envelopes_rx, - project_cache_handle, + mut project_cache_rx, global_tx: _global_tx, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service( @@ -691,10 +689,10 @@ mod tests { let project_key = envelope.meta().public_key(); addr.send(EnvelopeBuffer::Push(envelope.clone())); - tokio::time::sleep(Duration::from_secs(3)).await; - let message = tokio::time::timeout(Duration::from_secs(5), envelopes_rx.recv()); - let Some(legacy::DequeuedEnvelope(envelope)) = message.await.unwrap() else { + tokio::time::sleep(Duration::from_secs(1)).await; + + let Some(DequeuedEnvelope(envelope)) = envelopes_rx.recv().await else { panic!(); }; @@ -702,11 +700,20 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; - assert_eq!(project_cache_handle.test_num_fetches(), 1); + assert_eq!(project_cache_rx.len(), 1); + let message = project_cache_rx.recv().await; + assert!(matches!( + message, + Some(ProjectCache::UpdateProject(key)) if key == project_key + )); - tokio::time::sleep(Duration::from_millis(1300)).await; + tokio::time::sleep(Duration::from_secs(1)).await; - assert_eq!(project_cache_handle.test_num_fetches(), 2); + assert_eq!(project_cache_rx.len(), 1); + assert!(matches!( + message, + Some(ProjectCache::UpdateProject(key)) if key == project_key + )) } #[tokio::test] @@ -717,8 +724,8 @@ mod tests { service, mut envelopes_rx, global_tx: _global_tx, + project_cache_rx: _project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, - .. } = envelope_buffer_service( None, global_config::Status::Ready(Arc::new(GlobalConfig::default())), @@ -741,7 +748,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) + .filter(|message| matches!(message, DequeuedEnvelope(..))) .count(), 5 ); @@ -754,7 +761,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) + .filter(|message| matches!(message, DequeuedEnvelope(..))) .count(), 5 ); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index d5f5ef58d9..af212a5ec0 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -30,7 +30,7 @@ use relay_event_schema::protocol::{ ClientReport, Event, EventId, EventType, IpAddr, Metrics, NetworkReportError, }; use relay_filter::FilterStatKey; -use relay_metrics::{Bucket, BucketMetadata, BucketView, BucketsView, MetricNamespace}; +use relay_metrics::{Bucket, BucketMetadata, BucketView, BucketsView, MetricMeta, MetricNamespace}; use relay_pii::PiiConfigError; use relay_profiling::ProfileId; use relay_protocol::Annotated; @@ -52,6 +52,7 @@ use { RedisSetLimiterOptions, }, relay_dynamic_config::{CardinalityLimiterMode, GlobalConfig, MetricExtractionGroups}, + relay_metrics::RedisMetricMetaStore, relay_quotas::{Quota, RateLimitingError, RedisRateLimiter}, relay_redis::{RedisPool, RedisPools}, std::iter::Chain, @@ -72,7 +73,9 @@ use crate::services::global_config::GlobalConfigHandle; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; -use crate::services::projects::cache::ProjectCacheHandle; +use crate::services::projects::cache::{ + AddMetricMeta, BucketSource, ProcessMetrics, ProjectCache, UpdateRateLimits, +}; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; use crate::services::upstream::{ @@ -765,7 +768,7 @@ struct ProcessEnvelopeState<'a, Group> { /// Currently active cached rate limits of the project this envelope belongs to. #[cfg_attr(not(feature = "processing"), expect(dead_code))] - rate_limits: Arc, + rate_limits: RateLimits, /// The config of this Relay instance. config: Arc, @@ -882,7 +885,7 @@ pub struct ProcessEnvelope { /// The project info. pub project_info: Arc, /// Currently active cached rate limits for this project. - pub rate_limits: Arc, + pub rate_limits: RateLimits, /// Root sampling project info. pub sampling_project_info: Option>, /// Sampling reservoir counters. @@ -901,7 +904,16 @@ pub struct ProcessEnvelope { /// Additionally, processing applies clock drift correction using the system clock of this Relay, if /// the Envelope specifies the [`sent_at`](Envelope::sent_at) header. #[derive(Debug)] -pub struct ProcessMetrics { +pub struct ProcessProjectMetrics { + /// The project state the metrics belong to. + /// + /// The project state can be pending, in which case cached rate limits + /// and other project specific operations are skipped and executed once + /// the project state becomes available. + pub project_state: ProjectState, + /// Currently active cached rate limits for this project. + pub rate_limits: RateLimits, + /// A list of metric items. pub data: MetricData, /// The target project. @@ -989,30 +1001,13 @@ pub struct ProcessBatchedMetrics { pub sent_at: Option>, } -/// Source information where a metric bucket originates from. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum BucketSource { - /// The metric bucket originated from an internal Relay use case. - /// - /// The metric bucket originates either from within the same Relay - /// or was accepted coming from another Relay which is registered as - /// an internal Relay via Relay's configuration. - Internal, - /// The bucket source originated from an untrusted source. - /// - /// Managed Relays sending extracted metrics are considered external, - /// it's a project use case but it comes from an untrusted source. - External, -} - -impl BucketSource { - /// Infers the bucket source from [`RequestMeta::is_from_internal_relay`]. - pub fn from_meta(meta: &RequestMeta) -> Self { - match meta.is_from_internal_relay() { - true => Self::Internal, - false => Self::External, - } - } +/// Parses a list of metric meta items and pushes them to the project cache for aggregation. +#[derive(Debug)] +pub struct ProcessMetricMeta { + /// A list of metric meta items. + pub items: Vec, + /// The target project. + pub project_key: ProjectKey, } /// Metric buckets with additional project. @@ -1023,7 +1018,7 @@ pub struct ProjectMetrics { /// Project info for extracting quotas. pub project_info: Arc, /// Currently cached rate limits. - pub rate_limits: Arc, + pub rate_limits: RateLimits, } /// Encodes metrics into an envelope ready to be sent upstream. @@ -1033,6 +1028,18 @@ pub struct EncodeMetrics { pub scopes: BTreeMap, } +/// Encodes metric meta into an [`Envelope`] and sends it upstream. +/// +/// At the moment, upstream means directly into Redis for processing relays +/// and otherwise submitting the Envelope via HTTP to the [`UpstreamRelay`]. +#[derive(Debug)] +pub struct EncodeMetricMeta { + /// Scoping of the meta. + pub scoping: Scoping, + /// The metric meta. + pub meta: MetricMeta, +} + /// Sends an envelope to the upstream or Kafka. #[derive(Debug)] pub struct SubmitEnvelope { @@ -1052,9 +1059,11 @@ pub struct SubmitClientReports { #[derive(Debug)] pub enum EnvelopeProcessor { ProcessEnvelope(Box), - ProcessProjectMetrics(Box), + ProcessProjectMetrics(Box), ProcessBatchedMetrics(Box), + ProcessMetricMeta(Box), EncodeMetrics(Box), + EncodeMetricMeta(Box), SubmitEnvelope(Box), SubmitClientReports(Box), } @@ -1066,7 +1075,9 @@ impl EnvelopeProcessor { EnvelopeProcessor::ProcessEnvelope(_) => "ProcessEnvelope", EnvelopeProcessor::ProcessProjectMetrics(_) => "ProcessProjectMetrics", EnvelopeProcessor::ProcessBatchedMetrics(_) => "ProcessBatchedMetrics", + EnvelopeProcessor::ProcessMetricMeta(_) => "ProcessMetricMeta", EnvelopeProcessor::EncodeMetrics(_) => "EncodeMetrics", + EnvelopeProcessor::EncodeMetricMeta(_) => "EncodeMetricMeta", EnvelopeProcessor::SubmitEnvelope(_) => "SubmitEnvelope", EnvelopeProcessor::SubmitClientReports(_) => "SubmitClientReports", } @@ -1083,10 +1094,10 @@ impl FromMessage for EnvelopeProcessor { } } -impl FromMessage for EnvelopeProcessor { +impl FromMessage for EnvelopeProcessor { type Response = NoResponse; - fn from_message(message: ProcessMetrics, _: ()) -> Self { + fn from_message(message: ProcessProjectMetrics, _: ()) -> Self { Self::ProcessProjectMetrics(Box::new(message)) } } @@ -1099,6 +1110,14 @@ impl FromMessage for EnvelopeProcessor { } } +impl FromMessage for EnvelopeProcessor { + type Response = NoResponse; + + fn from_message(message: ProcessMetricMeta, _: ()) -> Self { + Self::ProcessMetricMeta(Box::new(message)) + } +} + impl FromMessage for EnvelopeProcessor { type Response = NoResponse; @@ -1107,6 +1126,14 @@ impl FromMessage for EnvelopeProcessor { } } +impl FromMessage for EnvelopeProcessor { + type Response = NoResponse; + + fn from_message(message: EncodeMetricMeta, _: ()) -> Self { + Self::EncodeMetricMeta(Box::new(message)) + } +} + impl FromMessage for EnvelopeProcessor { type Response = NoResponse; @@ -1133,6 +1160,7 @@ pub struct EnvelopeProcessorService { /// Contains the addresses of services that the processor publishes to. pub struct Addrs { + pub project_cache: Addr, pub outcome_aggregator: Addr, pub upstream_relay: Addr, pub test_store: Addr, @@ -1144,6 +1172,7 @@ pub struct Addrs { impl Default for Addrs { fn default() -> Self { Addrs { + project_cache: Addr::dummy(), outcome_aggregator: Addr::dummy(), upstream_relay: Addr::dummy(), test_store: Addr::dummy(), @@ -1158,7 +1187,6 @@ struct InnerProcessor { workers: WorkerGroup, config: Arc, global_config: GlobalConfigHandle, - project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] quotas_pool: Option, @@ -1167,18 +1195,18 @@ struct InnerProcessor { rate_limiter: Option, geoip_lookup: Option, #[cfg(feature = "processing")] + metric_meta_store: Option, + #[cfg(feature = "processing")] cardinality_limiter: Option, metric_outcomes: MetricOutcomes, } impl EnvelopeProcessorService { /// Creates a multi-threaded envelope processor. - #[cfg_attr(feature = "processing", expect(clippy::too_many_arguments))] pub fn new( pool: ThreadPool, config: Arc, global_config: GlobalConfigHandle, - project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] redis: Option, addrs: Addrs, @@ -1195,19 +1223,19 @@ impl EnvelopeProcessorService { }); #[cfg(feature = "processing")] - let (cardinality, quotas) = match redis { + let (cardinality, quotas, misc) = match redis { Some(RedisPools { cardinality, quotas, + misc, .. - }) => (Some(cardinality), Some(quotas)), - None => (None, None), + }) => (Some(cardinality), Some(quotas), Some(misc)), + None => (None, None, None), }; let inner = InnerProcessor { workers: WorkerGroup::new(pool), global_config, - project_cache, cogs, #[cfg(feature = "processing")] quotas_pool: quotas.clone(), @@ -1217,6 +1245,10 @@ impl EnvelopeProcessorService { addrs, geoip_lookup, #[cfg(feature = "processing")] + metric_meta_store: misc.map(|misc| { + RedisMetricMetaStore::new(misc, config.metrics_meta_locations_expiry()) + }), + #[cfg(feature = "processing")] cardinality_limiter: cardinality .map(|cardinality| { RedisSetLimiter::new( @@ -1273,7 +1305,7 @@ impl EnvelopeProcessorService { mut managed_envelope: TypedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: Arc, + rate_limits: RateLimits, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> ProcessEnvelopeState { @@ -1344,11 +1376,10 @@ impl EnvelopeProcessorService { // Update cached rate limits with the freshly computed ones. if !limits.is_empty() { - self.inner - .project_cache - .get(state.managed_envelope.scoping().project_key) - .rate_limits() - .merge(limits); + self.inner.addrs.project_cache.send(UpdateRateLimits::new( + state.managed_envelope.scoping().project_key, + limits, + )); } Ok(()) @@ -1884,7 +1915,7 @@ impl EnvelopeProcessorService { mut managed_envelope: ManagedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: Arc, + rate_limits: RateLimits, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> Result { @@ -2102,8 +2133,10 @@ impl EnvelopeProcessorService { } } - fn handle_process_metrics(&self, cogs: &mut Token, message: ProcessMetrics) { - let ProcessMetrics { + fn handle_process_project_metrics(&self, cogs: &mut Token, message: ProcessProjectMetrics) { + let ProcessProjectMetrics { + project_state, + rate_limits, data, project_key, received_at, @@ -2142,16 +2175,13 @@ impl EnvelopeProcessorService { true }); - let project = self.inner.project_cache.get(project_key); - // Best effort check to filter and rate limit buckets, if there is no project state // available at the current time, we will check again after flushing. - let buckets = match project.state() { - ProjectState::Enabled(project_info) => { - let rate_limits = project.rate_limits().current_limits(); - self.check_buckets(project_key, project_info, &rate_limits, buckets) + let buckets = match project_state.enabled() { + Some(project_info) => { + self.check_buckets(project_key, &project_info, &rate_limits, buckets) } - _ => buckets, + None => buckets, }; relay_log::trace!("merging metric buckets into the aggregator"); @@ -2186,17 +2216,50 @@ impl EnvelopeProcessorService { } }; + let mut feature_weights = FeatureWeights::none(); for (project_key, buckets) in buckets { - self.handle_process_metrics( - cogs, - ProcessMetrics { - data: MetricData::Parsed(buckets), - project_key, - source, - received_at, - sent_at, - }, - ) + feature_weights = feature_weights.merge(relay_metrics::cogs::BySize(&buckets).into()); + + self.inner.addrs.project_cache.send(ProcessMetrics { + data: MetricData::Parsed(buckets), + project_key, + source, + received_at, + sent_at, + }); + } + + if !feature_weights.is_empty() { + cogs.update(feature_weights); + } + } + + fn handle_process_metric_meta(&self, message: ProcessMetricMeta) { + let ProcessMetricMeta { items, project_key } = message; + + for item in items { + if item.ty() != &ItemType::MetricMeta { + relay_log::error!( + "invalid item of type {} passed to ProcessMetricMeta", + item.ty() + ); + continue; + } + + let payload = item.payload(); + match serde_json::from_slice::(&payload) { + Ok(meta) => { + relay_log::trace!("adding metric metadata to project cache"); + self.inner + .addrs + .project_cache + .send(AddMetricMeta { project_key, meta }); + } + Err(error) => { + metric!(counter(RelayCounters::MetricMetaParsingFailed) += 1); + relay_log::debug!(error = &error as &dyn Error, "failed to parse metric meta"); + } + } } } @@ -2244,7 +2307,7 @@ impl EnvelopeProcessorService { envelope, body, http_encoding, - project_cache: self.inner.project_cache.clone(), + project_cache: self.inner.addrs.project_cache.clone(), })); } Err(error) => { @@ -2397,11 +2460,10 @@ impl EnvelopeProcessorService { Outcome::RateLimited(reason_code), ); - self.inner - .project_cache - .get(item_scoping.scoping.project_key) - .rate_limits() - .merge(limits); + self.inner.addrs.project_cache.send(UpdateRateLimits::new( + item_scoping.scoping.project_key, + limits, + )); } } @@ -2476,10 +2538,9 @@ impl EnvelopeProcessorService { if was_enforced { // Update the rate limits in the project cache. self.inner + .addrs .project_cache - .get(scoping.project_key) - .rate_limits() - .merge(rate_limits); + .send(UpdateRateLimits::new(scoping.project_key, rate_limits)); } } } @@ -2775,6 +2836,54 @@ impl EnvelopeProcessorService { } } + fn handle_encode_metric_meta(&self, message: EncodeMetricMeta) { + #[cfg(feature = "processing")] + if self.inner.config.processing_enabled() { + return self.store_metric_meta(message); + } + + self.encode_metric_meta(message); + } + + fn encode_metric_meta(&self, message: EncodeMetricMeta) { + let EncodeMetricMeta { scoping, meta } = message; + + let upstream = self.inner.config.upstream_descriptor(); + let dsn = PartialDsn::outbound(&scoping, upstream); + + let mut item = Item::new(ItemType::MetricMeta); + item.set_payload(ContentType::Json, serde_json::to_vec(&meta).unwrap()); + let mut envelope = Envelope::from_request(None, RequestMeta::outbound(dsn)); + envelope.add_item(item); + + let envelope = ManagedEnvelope::new( + envelope, + self.inner.addrs.outcome_aggregator.clone(), + self.inner.addrs.test_store.clone(), + ProcessingGroup::Metrics, + ); + self.handle_submit_envelope(SubmitEnvelope { + envelope: envelope.into_processed(), + }); + } + + #[cfg(feature = "processing")] + fn store_metric_meta(&self, message: EncodeMetricMeta) { + let EncodeMetricMeta { scoping, meta } = message; + + let Some(ref metric_meta_store) = self.inner.metric_meta_store else { + return; + }; + + let r = metric_meta_store.store(scoping.organization_id, scoping.project_id, meta); + if let Err(error) = r { + relay_log::error!( + error = &error as &dyn std::error::Error, + "failed to store metric meta in redis" + ) + } + } + #[cfg(all(test, feature = "processing"))] fn redis_rate_limiter_enabled(&self) -> bool { self.inner.rate_limiter.is_some() @@ -2790,12 +2899,14 @@ impl EnvelopeProcessorService { match message { EnvelopeProcessor::ProcessEnvelope(m) => self.handle_process_envelope(*m), EnvelopeProcessor::ProcessProjectMetrics(m) => { - self.handle_process_metrics(&mut cogs, *m) + self.handle_process_project_metrics(&mut cogs, *m) } EnvelopeProcessor::ProcessBatchedMetrics(m) => { self.handle_process_batched_metrics(&mut cogs, *m) } + EnvelopeProcessor::ProcessMetricMeta(m) => self.handle_process_metric_meta(*m), EnvelopeProcessor::EncodeMetrics(m) => self.handle_encode_metrics(*m), + EnvelopeProcessor::EncodeMetricMeta(m) => self.handle_encode_metric_meta(*m), EnvelopeProcessor::SubmitEnvelope(m) => self.handle_submit_envelope(*m), EnvelopeProcessor::SubmitClientReports(m) => self.handle_submit_client_reports(*m), } @@ -2807,6 +2918,7 @@ impl EnvelopeProcessorService { EnvelopeProcessor::ProcessEnvelope(v) => AppFeature::from(v.envelope.group()).into(), EnvelopeProcessor::ProcessProjectMetrics(_) => AppFeature::Unattributed.into(), EnvelopeProcessor::ProcessBatchedMetrics(_) => AppFeature::Unattributed.into(), + EnvelopeProcessor::ProcessMetricMeta(_) => AppFeature::MetricMeta.into(), EnvelopeProcessor::EncodeMetrics(v) => v .scopes .values() @@ -2820,6 +2932,7 @@ impl EnvelopeProcessorService { } }) .fold(FeatureWeights::none(), FeatureWeights::merge), + EnvelopeProcessor::EncodeMetricMeta(_) => AppFeature::MetricMeta.into(), EnvelopeProcessor::SubmitEnvelope(v) => AppFeature::from(v.envelope.group()).into(), EnvelopeProcessor::SubmitClientReports(_) => AppFeature::ClientReports.into(), } @@ -2936,7 +3049,7 @@ pub struct SendEnvelope { envelope: TypedEnvelope, body: Bytes, http_encoding: HttpEncoding, - project_cache: ProjectCacheHandle, + project_cache: Addr, } impl UpstreamRequest for SendEnvelope { @@ -2988,10 +3101,10 @@ impl UpstreamRequest for SendEnvelope { self.envelope.accept(); if let UpstreamRequestError::RateLimited(limits) = error { - self.project_cache - .get(scoping.project_key) - .rate_limits() - .merge(limits.scope(&scoping)); + self.project_cache.send(UpdateRateLimits::new( + scoping.project_key, + limits.scope(&scoping), + )); } } Err(error) => { @@ -3731,14 +3844,16 @@ mod tests { ), (BucketSource::Internal, None), ] { - let message = ProcessMetrics { + let message = ProcessProjectMetrics { data: MetricData::Raw(vec![item.clone()]), + project_state: ProjectState::Pending, + rate_limits: Default::default(), project_key, source, received_at, sent_at: Some(Utc::now()), }; - processor.handle_process_metrics(&mut token, message); + processor.handle_process_project_metrics(&mut token, message); let value = aggregator_rx.recv().await.unwrap(); let Aggregator::MergeBuckets(merge_buckets) = value else { @@ -3756,11 +3871,11 @@ mod tests { let received_at = Utc::now(); let config = Config::default(); - let (aggregator, mut aggregator_rx) = Addr::custom(); + let (project_cache, mut project_cache_rx) = Addr::custom(); let processor = create_test_processor_with_addrs( config, Addrs { - aggregator, + project_cache, ..Default::default() }, ); @@ -3803,72 +3918,78 @@ mod tests { }; processor.handle_process_batched_metrics(&mut token, message); - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(mb1) = value else { + let value = project_cache_rx.recv().await.unwrap(); + let ProjectCache::ProcessMetrics(pm1) = value else { panic!() }; - let value = aggregator_rx.recv().await.unwrap(); - let Aggregator::MergeBuckets(mb2) = value else { + let value = project_cache_rx.recv().await.unwrap(); + let ProjectCache::ProcessMetrics(pm2) = value else { panic!() }; - let mut messages = vec![mb1, mb2]; + let mut messages = vec![pm1, pm2]; messages.sort_by_key(|pm| pm.project_key); let actual = messages .into_iter() - .map(|pm| (pm.project_key, pm.buckets)) + .map(|pm| (pm.project_key, pm.data, pm.source)) .collect::>(); assert_debug_snapshot!(actual, @r###" [ ( ProjectKey("11111111111111111111111111111111"), - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.response_time@millisecond", - ), - value: Distribution( - [ - 68.0, - ], - ), - tags: { - "route": "user_index", - }, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, + Parsed( + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.response_time@millisecond", + ), + value: Distribution( + [ + 68.0, + ], + ), + tags: { + "route": "user_index", + }, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, + }, }, - }, - ], + ], + ), + Internal, ), ( ProjectKey("22222222222222222222222222222222"), - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.cache_rate@none", - ), - value: Distribution( - [ - 36.0, - ], - ), - tags: {}, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, + Parsed( + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.cache_rate@none", + ), + value: Distribution( + [ + 36.0, + ], + ), + tags: {}, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, + }, }, - }, - ], + ], + ), + Internal, ), ] "###); diff --git a/relay-server/src/services/processor/event.rs b/relay-server/src/services/processor/event.rs index 8f9b22ae14..1759d0d078 100644 --- a/relay-server/src/services/processor/event.rs +++ b/relay-server/src/services/processor/event.rs @@ -435,6 +435,7 @@ fn is_duplicate(item: &Item, processing_enabled: bool) -> bool { ItemType::Sessions => false, ItemType::Statsd => false, ItemType::MetricBuckets => false, + ItemType::MetricMeta => false, ItemType::ClientReport => false, ItemType::Profile => false, ItemType::ReplayEvent => false, diff --git a/relay-server/src/services/processor/metrics.rs b/relay-server/src/services/processor/metrics.rs index 703898e425..0d28285e77 100644 --- a/relay-server/src/services/processor/metrics.rs +++ b/relay-server/src/services/processor/metrics.rs @@ -5,7 +5,7 @@ use relay_quotas::Scoping; use crate::metrics::MetricOutcomes; use crate::services::outcome::Outcome; -use crate::services::processor::BucketSource; +use crate::services::projects::cache::BucketSource; use crate::services::projects::project::ProjectInfo; /// Checks if the namespace of the passed bucket is valid. diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index a6452515b2..9ef4945a6e 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -848,7 +848,7 @@ mod tests { extracted_metrics: ProcessingExtractedMetrics::new(), config: Arc::new(Config::default()), project_info, - rate_limits: Arc::new(RateLimits::default()), + rate_limits: RateLimits::default(), sampling_project_info: None, project_id: ProjectId::new(42), managed_envelope: managed_envelope.try_into().unwrap(), diff --git a/relay-server/src/services/projects/cache/legacy.rs b/relay-server/src/services/projects/cache.rs similarity index 52% rename from relay-server/src/services/projects/cache/legacy.rs rename to relay-server/src/services/projects/cache.rs index 0dc95464c1..22e15e6329 100644 --- a/relay-server/src/services/projects/cache/legacy.rs +++ b/relay-server/src/services/projects/cache.rs @@ -3,40 +3,146 @@ use std::error::Error; use std::sync::Arc; use std::time::Duration; +use crate::extractors::RequestMeta; use crate::services::buffer::{EnvelopeBuffer, EnvelopeBufferError}; use crate::services::global_config; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, + EncodeMetrics, EnvelopeProcessor, MetricData, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; -use crate::services::projects::cache::{CheckedEnvelope, ProjectCacheHandle, ProjectChange}; use crate::Envelope; +use chrono::{DateTime, Utc}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; +use relay_metrics::{Bucket, MetricMeta}; +use relay_quotas::RateLimits; +use relay_redis::RedisPool; use relay_statsd::metric; -use relay_system::{Addr, FromMessage, Interface, Service}; +use relay_system::{Addr, FromMessage, Interface, Sender, Service}; use tokio::sync::{mpsc, watch}; +use tokio::time::Instant; -use crate::services::metrics::{Aggregator, FlushBuckets, MergeBuckets}; +use crate::services::metrics::{Aggregator, FlushBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::services::projects::project::ProjectState; +use crate::services::projects::project::{Project, ProjectFetchState, ProjectSender, ProjectState}; +use crate::services::projects::source::ProjectSource; use crate::services::spooler::{ self, Buffer, BufferService, DequeueMany, Enqueue, QueueKey, RemoveMany, RestoreIndex, UnspooledEnvelope, BATCH_KEY_COUNT, }; use crate::services::test_store::TestStore; +use crate::services::upstream::UpstreamRelay; -use crate::statsd::{RelayCounters, RelayGauges, RelayTimers}; -use crate::utils::{ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; +use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; +use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; + +/// Requests a refresh of a project state from one of the available sources. +/// +/// The project state is resolved in the following precedence: +/// +/// 1. Local file system +/// 2. Redis cache (processing mode only) +/// 3. Upstream (managed and processing mode only) +/// +/// Requests to the upstream are performed via `UpstreamProjectSource`, which internally batches +/// individual requests. +#[derive(Clone, Debug)] +pub struct RequestUpdate { + /// The public key to fetch the project by. + pub project_key: ProjectKey, + /// If true, all caches should be skipped and a fresh state should be computed. + pub no_cache: bool, + /// Previously cached fetch state, if available. + /// + /// The upstream request will include the revision of the currently cached state, + /// if the upstream does not have a different revision, this cached + /// state is re-used and its expiry bumped. + pub cached_state: ProjectFetchState, +} + +/// Returns the project state. +/// +/// The project state is fetched if it is missing or outdated. If `no_cache` is specified, then the +/// state is always refreshed. +#[derive(Debug)] +pub struct GetProjectState { + project_key: ProjectKey, + no_cache: bool, +} + +impl GetProjectState { + /// Fetches the project state and uses the cached version if up-to-date. + pub fn new(project_key: ProjectKey) -> Self { + Self { + project_key, + no_cache: false, + } + } + + /// Fetches the project state and conditionally skips the cache. + pub fn no_cache(mut self, no_cache: bool) -> Self { + self.no_cache = no_cache; + self + } +} + +/// Returns the project state if it is already cached. +/// +/// This is used for cases when we only want to perform operations that do +/// not require waiting for network requests. +#[derive(Debug)] +pub struct GetCachedProjectState { + project_key: ProjectKey, +} + +impl GetCachedProjectState { + pub fn new(project_key: ProjectKey) -> Self { + Self { project_key } + } +} + +/// A checked envelope and associated rate limits. +/// +/// Items violating the rate limits have been removed from the envelope. If all items are removed +/// from the envelope, `None` is returned in place of the envelope. +#[derive(Debug)] +pub struct CheckedEnvelope { + pub envelope: Option, + pub rate_limits: RateLimits, +} + +/// Checks the envelope against project configuration and rate limits. +/// +/// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated +/// project state may be used, or otherwise the envelope is passed through unaltered. +/// +/// To check the envelope, this runs: +/// - Validate origins and public keys +/// - Quotas with a limit of `0` +/// - Cached rate limits +#[derive(Debug)] +pub struct CheckEnvelope { + envelope: ManagedEnvelope, +} + +impl CheckEnvelope { + /// Uses a cached project state and checks the envelope. + pub fn new(envelope: ManagedEnvelope) -> Self { + Self { envelope } + } +} /// Validates the envelope against project configuration and rate limits. /// -/// This ensures internally that the project state is up to date . -/// Once the envelope has been validated, remaining items are forwarded to the next stage: +/// This ensures internally that the project state is up to date and then runs the same checks as +/// [`CheckEnvelope`]. Once the envelope has been validated, remaining items are forwarded to the +/// next stage: /// /// - If the envelope needs dynamic sampling, and the project state is not cached or out of the /// date, the envelopes is spooled and we continue when the state is fetched. /// - Otherwise, the envelope is directly submitted to the [`EnvelopeProcessor`]. +/// +/// [`EnvelopeProcessor`]: crate::services::processor::EnvelopeProcessor #[derive(Debug)] pub struct ValidateEnvelope { envelope: ManagedEnvelope, @@ -48,6 +154,75 @@ impl ValidateEnvelope { } } +#[derive(Debug)] +pub struct UpdateRateLimits { + project_key: ProjectKey, + rate_limits: RateLimits, +} + +impl UpdateRateLimits { + pub fn new(project_key: ProjectKey, rate_limits: RateLimits) -> UpdateRateLimits { + Self { + project_key, + rate_limits, + } + } +} + +/// Source information where a metric bucket originates from. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum BucketSource { + /// The metric bucket originated from an internal Relay use case. + /// + /// The metric bucket originates either from within the same Relay + /// or was accepted coming from another Relay which is registered as + /// an internal Relay via Relay's configuration. + Internal, + /// The bucket source originated from an untrusted source. + /// + /// Managed Relays sending extracted metrics are considered external, + /// it's a project use case but it comes from an untrusted source. + External, +} + +impl From<&RequestMeta> for BucketSource { + fn from(value: &RequestMeta) -> Self { + if value.is_from_internal_relay() { + Self::Internal + } else { + Self::External + } + } +} + +/// Starts the processing flow for received metrics. +/// +/// Enriches the raw data with projcet information and forwards +/// the metrics using [`ProcessProjectMetrics`](crate::services::processor::ProcessProjectMetrics). +#[derive(Debug)] +pub struct ProcessMetrics { + /// A list of metric items. + pub data: MetricData, + /// The target project. + pub project_key: ProjectKey, + /// Whether to keep or reset the metric metadata. + pub source: BucketSource, + /// The wall clock time at which the request was received. + pub received_at: DateTime, + /// The value of the Envelope's [`sent_at`](crate::envelope::Envelope::sent_at) + /// header for clock drift correction. + pub sent_at: Option>, +} + +/// Add metric metadata to the aggregator. +#[derive(Debug)] +pub struct AddMetricMeta { + /// The project key. + pub project_key: ProjectKey, + /// The metadata. + pub meta: MetricMeta, +} + /// Updates the buffer index for [`ProjectKey`] with the [`QueueKey`] keys. /// /// This message is sent from the project buffer in case of the error while fetching the data from @@ -73,24 +248,61 @@ pub struct RefreshIndexCache(pub HashSet); #[derive(Debug)] pub struct DequeuedEnvelope(pub Box); -/// The legacy project cache. +/// A request to update a project, typically sent by the envelope buffer. +/// +/// This message is similar to [`GetProjectState`], except it has no `no_cache` option +/// and it does not send a response, but sends a signal back to the buffer instead. +pub struct UpdateProject(pub ProjectKey); + +/// A cache for [`ProjectState`]s. +/// +/// The project maintains information about organizations, projects, and project keys along with +/// settings required to ingest traffic to Sentry. Internally, it tries to keep information +/// up-to-date and automatically retires unused old data. +/// +/// To retrieve information from the cache, use [`GetProjectState`] for guaranteed up-to-date +/// information, or [`GetCachedProjectState`] for immediately available but potentially older +/// information. /// -/// It manages spool v1 and some remaining messages which handle project state. +/// There are also higher-level operations, such as [`CheckEnvelope`] and [`ValidateEnvelope`] that +/// inspect contents of envelopes for ingestion, as well as [`ProcessMetrics`] to aggregate metrics +/// associated with a project. +/// +/// See the enumerated variants for a full list of available messages for this service. #[derive(Debug)] pub enum ProjectCache { + RequestUpdate(RequestUpdate), + Get(GetProjectState, ProjectSender), + GetCached(GetCachedProjectState, Sender), + CheckEnvelope( + CheckEnvelope, + Sender>, + ), ValidateEnvelope(ValidateEnvelope), + UpdateRateLimits(UpdateRateLimits), + ProcessMetrics(ProcessMetrics), + AddMetricMeta(AddMetricMeta), FlushBuckets(FlushBuckets), UpdateSpoolIndex(UpdateSpoolIndex), RefreshIndexCache(RefreshIndexCache), + UpdateProject(ProjectKey), } impl ProjectCache { pub fn variant(&self) -> &'static str { match self { + Self::RequestUpdate(_) => "RequestUpdate", + Self::Get(_, _) => "Get", + Self::GetCached(_, _) => "GetCached", + Self::CheckEnvelope(_, _) => "CheckEnvelope", Self::ValidateEnvelope(_) => "ValidateEnvelope", + Self::UpdateRateLimits(_) => "UpdateRateLimits", + Self::ProcessMetrics(_) => "ProcessMetrics", + Self::AddMetricMeta(_) => "AddMetricMeta", Self::FlushBuckets(_) => "FlushBuckets", Self::UpdateSpoolIndex(_) => "UpdateSpoolIndex", Self::RefreshIndexCache(_) => "RefreshIndexCache", + Self::UpdateProject(_) => "UpdateProject", } } } @@ -113,6 +325,41 @@ impl FromMessage for ProjectCache { } } +impl FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: RequestUpdate, _: ()) -> Self { + Self::RequestUpdate(message) + } +} + +impl FromMessage for ProjectCache { + type Response = relay_system::BroadcastResponse; + + fn from_message(message: GetProjectState, sender: ProjectSender) -> Self { + Self::Get(message, sender) + } +} + +impl FromMessage for ProjectCache { + type Response = relay_system::AsyncResponse; + + fn from_message(message: GetCachedProjectState, sender: Sender) -> Self { + Self::GetCached(message, sender) + } +} + +impl FromMessage for ProjectCache { + type Response = relay_system::AsyncResponse>; + + fn from_message( + message: CheckEnvelope, + sender: Sender>, + ) -> Self { + Self::CheckEnvelope(message, sender) + } +} + impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -121,6 +368,30 @@ impl FromMessage for ProjectCache { } } +impl FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: UpdateRateLimits, _: ()) -> Self { + Self::UpdateRateLimits(message) + } +} + +impl FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: ProcessMetrics, _: ()) -> Self { + Self::ProcessMetrics(message) + } +} + +impl FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: AddMetricMeta, _: ()) -> Self { + Self::AddMetricMeta(message) + } +} + impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -129,6 +400,27 @@ impl FromMessage for ProjectCache { } } +impl FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: UpdateProject, _: ()) -> Self { + let UpdateProject(project_key) = message; + Self::UpdateProject(project_key) + } +} + +/// Updates the cache with new project state information. +struct UpdateProjectState { + /// The public key to fetch the project by. + project_key: ProjectKey, + + /// New project state information. + state: ProjectFetchState, + + /// If true, all caches should be skipped and a fresh state should be computed. + no_cache: bool, +} + /// Holds the addresses of all services required for [`ProjectCache`]. #[derive(Debug, Clone)] pub struct Services { @@ -138,6 +430,7 @@ pub struct Services { pub outcome_aggregator: Addr, pub project_cache: Addr, pub test_store: Addr, + pub upstream_relay: Addr, } /// Main broker of the [`ProjectCacheService`]. @@ -149,7 +442,14 @@ struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, services: Services, - projects: ProjectCacheHandle, + // Need hashbrown because extract_if is not stable in std yet. + projects: hashbrown::HashMap, + /// Utility for disposing of expired project data in a background thread. + garbage_disposal: GarbageDisposal, + /// Source for fetching project states from the upstream or from disk. + source: ProjectSource, + /// Tx channel used to send the updated project state whenever requested. + state_tx: mpsc::UnboundedSender, /// Handle to schedule periodic unspooling of buffered envelopes (spool V1). spool_v1_unspool_handle: SleepHandle, @@ -212,35 +512,197 @@ impl ProjectCacheBroker { .send(DequeueMany::new(keys, spool_v1.buffer_tx.clone())) } - fn evict_project(&mut self, project_key: ProjectKey) { - let Some(ref mut spool_v1) = self.spool_v1 else { - return; - }; + /// Evict projects that are over its expiry date. + /// + /// Ideally, we would use `check_expiry` to determine expiry here. + /// However, for eviction, we want to add an additional delay, such that we do not delete + /// a project that has expired recently and for which a fetch is already underway in + /// [`super::source::upstream`]. + fn evict_stale_project_caches(&mut self) { + let eviction_start = Instant::now(); + let delta = 2 * self.config.project_cache_expiry() + self.config.project_grace_period(); + + let expired = self + .projects + .extract_if(|_, entry| entry.last_updated_at() + delta <= eviction_start); + + // Defer dropping the projects to a dedicated thread: + let mut count = 0; + for (project_key, project) in expired { + if let Some(spool_v1) = self.spool_v1.as_mut() { + let keys = spool_v1 + .index + .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) + .collect::>(); + + if !keys.is_empty() { + spool_v1.buffer.send(RemoveMany::new(project_key, keys)) + } + } + + self.garbage_disposal.dispose(project); + count += 1; + } + metric!(counter(RelayCounters::EvictingStaleProjectCaches) += count); - let keys = spool_v1 - .index - .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) - .collect::>(); + // Log garbage queue size: + let queue_size = self.garbage_disposal.queue_size() as f64; + metric!(gauge(RelayGauges::ProjectCacheGarbageQueueSize) = queue_size); - if !keys.is_empty() { - spool_v1.buffer.send(RemoveMany::new(project_key, keys)) + metric!(timer(RelayTimers::ProjectStateEvictionDuration) = eviction_start.elapsed()); + } + + fn get_or_create_project(&mut self, project_key: ProjectKey) -> &mut Project { + metric!(histogram(RelayHistograms::ProjectStateCacheSize) = self.projects.len() as u64); + + let config = self.config.clone(); + + self.projects + .entry(project_key) + .and_modify(|_| { + metric!(counter(RelayCounters::ProjectCacheHit) += 1); + }) + .or_insert_with(move || { + metric!(counter(RelayCounters::ProjectCacheMiss) += 1); + Project::new(project_key, config) + }) + } + + /// Updates the [`Project`] with received [`ProjectState`]. + /// + /// If the project state is valid we also send the message to the buffer service to dequeue the + /// envelopes for this project. + fn merge_state(&mut self, message: UpdateProjectState) { + let UpdateProjectState { + project_key, + state, + no_cache, + } = message; + + let project_cache = self.services.project_cache.clone(); + let envelope_processor = self.services.envelope_processor.clone(); + + let old_state = self.get_or_create_project(project_key).update_state( + &project_cache, + state, + &envelope_processor, + no_cache, + ); + if let Some(old_state) = old_state { + self.garbage_disposal.dispose(old_state); } + + // Try to schedule unspool if it's not scheduled yet. + self.schedule_unspool(); + + if let Some(envelope_buffer) = self.services.envelope_buffer.as_ref() { + envelope_buffer.send(EnvelopeBuffer::Ready(project_key)) + }; + } + + fn handle_request_update(&mut self, message: RequestUpdate) { + let RequestUpdate { + project_key, + no_cache, + cached_state, + } = message; + + // Bump the update time of the project in our hashmap to evade eviction. + let project = self.get_or_create_project(project_key); + project.refresh_updated_timestamp(); + let next_attempt = project.next_fetch_attempt(); + + let source = self.source.clone(); + let sender = self.state_tx.clone(); + + tokio::spawn(async move { + // Wait on the new attempt time when set. + if let Some(next_attempt) = next_attempt { + tokio::time::sleep_until(next_attempt).await; + } + let state = source + .fetch(project_key, no_cache, cached_state) + .await + .unwrap_or_else(|e| { + relay_log::error!( + error = &e as &dyn Error, + tags.project_key = %project_key, + "Failed to fetch project from source" + ); + // TODO: change this to ProjectFetchState::pending() once we consider it safe to do so. + // see https://github.com/getsentry/relay/pull/4140. + ProjectFetchState::disabled() + }); + + let message = UpdateProjectState { + project_key, + no_cache, + state, + }; + + sender.send(message).ok(); + }); } - fn handle_project_event(&mut self, event: ProjectChange) { - match event { - ProjectChange::Ready(_) => self.schedule_unspool(), - ProjectChange::Evicted(project_key) => self.evict_project(project_key), + fn handle_get(&mut self, message: GetProjectState, sender: ProjectSender) { + let GetProjectState { + project_key, + no_cache, + } = message; + let project_cache = self.services.project_cache.clone(); + let project = self.get_or_create_project(project_key); + + project.get_state(project_cache, sender, no_cache); + } + + fn handle_get_cached(&mut self, message: GetCachedProjectState) -> ProjectState { + let project_cache = self.services.project_cache.clone(); + self.get_or_create_project(message.project_key) + .get_cached_state(project_cache, false) + } + + fn handle_check_envelope( + &mut self, + message: CheckEnvelope, + ) -> Result { + let CheckEnvelope { envelope: context } = message; + let project_cache = self.services.project_cache.clone(); + let project_key = context.envelope().meta().public_key(); + if let Some(sampling_key) = context.envelope().sampling_key() { + if sampling_key != project_key { + let sampling_project = self.get_or_create_project(sampling_key); + sampling_project.prefetch(project_cache.clone(), false); + } } + let project = self.get_or_create_project(project_key); + + // Preload the project cache so that it arrives a little earlier in processing. However, + // do not pass `no_cache`. In case the project is rate limited, we do not want to force + // a full reload. Fetching must not block the store request. + project.prefetch(project_cache, false); + + project.check_envelope(context) } /// Handles the processing of the provided envelope. - fn handle_processing(&mut self, mut managed_envelope: ManagedEnvelope) { + fn handle_processing(&mut self, key: QueueKey, mut managed_envelope: ManagedEnvelope) { let project_key = managed_envelope.envelope().meta().public_key(); - let project = self.projects.get(project_key); + let Some(project) = self.projects.get_mut(&project_key) else { + relay_log::error!( + tags.project_key = %project_key, + "project could not be found in the cache", + ); + + let mut project = Project::new(project_key, self.config.clone()); + project.prefetch(self.services.project_cache.clone(), false); + self.projects.insert(project_key, project); + self.enqueue(key, managed_envelope); + return; + }; - let project_info = match project.state() { + let project_cache = self.services.project_cache.clone(); + let project_info = match project.get_cached_state(project_cache.clone(), false) { ProjectState::Enabled(info) => info, ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -267,19 +729,20 @@ impl ProjectCacheBroker { .. }) = project.check_envelope(managed_envelope) { - let rate_limits = project.rate_limits().current_limits(); - let reservoir_counters = project.reservoir_counters().clone(); + let rate_limits = project.current_rate_limits().clone(); + + let reservoir_counters = project.reservoir_counters(); let sampling_project_info = managed_envelope .envelope() .sampling_key() - .map(|key| self.projects.get(key)) - .and_then(|p| p.state().clone().enabled()) - .filter(|info| info.organization_id == project_info.organization_id); + .and_then(|key| self.projects.get_mut(&key)) + .and_then(|p| p.get_cached_state(project_cache, false).enabled()) + .filter(|state| state.organization_id == project_info.organization_id); let process = ProcessEnvelope { envelope: managed_envelope, - project_info: Arc::clone(project_info), + project_info, rate_limits, sampling_project_info, reservoir_counters, @@ -307,13 +770,17 @@ impl ProjectCacheBroker { envelope: mut managed_envelope, } = message; + let project_cache = self.services.project_cache.clone(); let envelope = managed_envelope.envelope(); // Fetch the project state for our key and make sure it's not invalid. let own_key = envelope.meta().public_key(); - let project = self.projects.get(own_key); + let project = self.get_or_create_project(own_key); - let project_state = match project.state() { + let project_state = + project.get_cached_state(project_cache.clone(), envelope.meta().no_cache()); + + let project_state = match project_state { ProjectState::Enabled(state) => Some(state), ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -325,10 +792,12 @@ impl ProjectCacheBroker { // Also, fetch the project state for sampling key and make sure it's not invalid. let sampling_key = envelope.sampling_key(); let mut requires_sampling_state = sampling_key.is_some(); - let sampling_info = if let Some(sampling_key) = sampling_key { - let sampling_project = self.projects.get(sampling_key); - match sampling_project.state() { - ProjectState::Enabled(info) => Some(Arc::clone(info)), + let sampling_state = if let Some(sampling_key) = sampling_key { + let state = self + .get_or_create_project(sampling_key) + .get_cached_state(project_cache, envelope.meta().no_cache()); + match state { + ProjectState::Enabled(state) => Some(state), ProjectState::Disabled => { relay_log::trace!("Sampling state is disabled ({sampling_key})"); // We accept events even if its root project has been disabled. @@ -349,33 +818,57 @@ impl ProjectCacheBroker { // Trigger processing once we have a project state and we either have a sampling project // state or we do not need one. if project_state.is_some() - && (sampling_info.is_some() || !requires_sampling_state) + && (sampling_state.is_some() || !requires_sampling_state) && self.memory_checker.check_memory().has_capacity() && self.global_config.is_ready() { // TODO: Add ready project infos to the processing message. relay_log::trace!("Sending envelope to processor"); - return self.handle_processing(managed_envelope); + return self.handle_processing(key, managed_envelope); } relay_log::trace!("Enqueueing envelope"); self.enqueue(key, managed_envelope); } + fn handle_rate_limits(&mut self, message: UpdateRateLimits) { + self.get_or_create_project(message.project_key) + .merge_rate_limits(message.rate_limits); + } + + fn handle_process_metrics(&mut self, message: ProcessMetrics) { + let project_cache = self.services.project_cache.clone(); + + let message = self + .get_or_create_project(message.project_key) + .prefetch(project_cache, false) + .process_metrics(message); + + self.services.envelope_processor.send(message); + } + + fn handle_add_metric_meta(&mut self, message: AddMetricMeta) { + let envelope_processor = self.services.envelope_processor.clone(); + + self.get_or_create_project(message.project_key) + .add_metric_meta(message.meta, envelope_processor); + } + fn handle_flush_buckets(&mut self, message: FlushBuckets) { let aggregator = self.services.aggregator.clone(); + let project_cache = self.services.project_cache.clone(); let mut no_project = 0; let mut scoped_buckets = BTreeMap::new(); for (project_key, buckets) in message.buckets { - let project = self.projects.get(project_key); + let project = self.get_or_create_project(project_key); - let project_info = match project.state() { + let project_info = match project.current_state() { ProjectState::Pending => { no_project += 1; - - // Return the buckets to the aggregator. - aggregator.send(MergeBuckets::new(project_key, buckets)); + // Schedule an update for the project just in case. + project.prefetch(project_cache.clone(), false); + project.return_buckets(&aggregator, buckets); continue; } ProjectState::Disabled => { @@ -384,12 +877,13 @@ impl ProjectCacheBroker { // Ideally we log outcomes for the metrics here, but currently for metric // outcomes we need a valid scoping, which we cannot construct for disabled // projects. + self.garbage_disposal.dispose(buckets); continue; } ProjectState::Enabled(project_info) => project_info, }; - let Some(scoping) = project_info.scoping(project_key) else { + let Some(scoping) = project.scoping() else { relay_log::error!( tags.project_key = project_key.as_str(), "there is no scoping: dropping {} buckets", @@ -402,8 +896,8 @@ impl ProjectCacheBroker { match scoped_buckets.entry(scoping) { Vacant(entry) => { entry.insert(ProjectMetrics { - project_info: Arc::clone(project_info), - rate_limits: project.rate_limits().current_limits(), + project_info, + rate_limits: project.current_rate_limits().clone(), buckets, }); } @@ -430,14 +924,16 @@ impl ProjectCacheBroker { fn handle_refresh_index_cache(&mut self, message: RefreshIndexCache) { let RefreshIndexCache(index) = message; + let project_cache = self.services.project_cache.clone(); for key in index { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); spool_v1.index.insert(key); - - self.projects.fetch(key.own_key); + self.get_or_create_project(key.own_key) + .prefetch(project_cache.clone(), false); if key.own_key != key.sampling_key { - self.projects.fetch(key.sampling_key); + self.get_or_create_project(key.sampling_key) + .prefetch(project_cache.clone(), false); } } } @@ -451,10 +947,11 @@ impl ProjectCacheBroker { let services = self.services.clone(); let own_key = envelope.meta().public_key(); - let project = self.projects.get(own_key); + let project = self.get_or_create_project(own_key); + let project_state = project.get_cached_state(services.project_cache.clone(), false); // Check if project config is enabled. - let project_info = match project.state() { + let project_info = match project_state { ProjectState::Enabled(info) => info, ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( @@ -476,7 +973,8 @@ impl ProjectCacheBroker { let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, - self.projects.get(sampling_key).state().clone(), + self.get_or_create_project(sampling_key) + .get_cached_state(services.project_cache, false), ) }) { Some((_, ProjectState::Enabled(info))) => { @@ -495,6 +993,8 @@ impl ProjectCacheBroker { None => None, }; + let project = self.get_or_create_project(own_key); + // Reassign processing groups and proceed to processing. for (group, envelope) in ProcessingGroup::split_envelope(*envelope) { let managed_envelope = ManagedEnvelope::new( @@ -512,11 +1012,11 @@ impl ProjectCacheBroker { continue; // Outcomes are emitted by check_envelope }; - let reservoir_counters = project.reservoir_counters().clone(); + let reservoir_counters = project.reservoir_counters(); services.envelope_processor.send(ProcessEnvelope { envelope: managed_envelope, project_info: project_info.clone(), - rate_limits: project.rate_limits().current_limits(), + rate_limits: project.current_rate_limits().clone(), sampling_project_info: sampling_project_info.clone(), reservoir_counters, }); @@ -525,6 +1025,22 @@ impl ProjectCacheBroker { Ok(()) } + fn handle_update_project(&mut self, project_key: ProjectKey) { + let project_cache = self.services.project_cache.clone(); + let envelope_buffer = self.services.envelope_buffer.clone(); + let project = self.get_or_create_project(project_key); + + // If the project is already loaded, inform the envelope buffer. + if !project.current_state().is_pending() { + if let Some(envelope_buffer) = envelope_buffer { + envelope_buffer.send(EnvelopeBuffer::Ready(project_key)); + } + } + + let no_cache = false; + project.prefetch(project_cache, no_cache); + } + /// Returns backoff timeout for an unspool attempt. fn next_unspool_attempt(&mut self) -> Duration { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); @@ -549,9 +1065,15 @@ impl ProjectCacheBroker { /// Which includes the own key and the sampling key for the project. /// Note: this function will trigger [`ProjectState`] refresh if it's already expired. fn is_state_cached(&mut self, key: &QueueKey) -> bool { - key.unique_keys() - .iter() - .all(|key| !self.projects.get(*key).state().is_pending()) + key.unique_keys().iter().all(|key| { + self.projects.get_mut(key).is_some_and(|project| { + // Returns `Some` if the project is cached otherwise None and also triggers refresh + // in background. + !project + .get_cached_state(self.services.project_cache.clone(), false) + .is_pending() + }) + }) } /// Iterates the buffer index and tries to unspool the envelopes for projects with a valid @@ -611,18 +1133,30 @@ impl ProjectCacheBroker { fn handle_message(&mut self, message: ProjectCache) { let ty = message.variant(); metric!( - timer(RelayTimers::LegacyProjectCacheMessageDuration), + timer(RelayTimers::ProjectCacheMessageDuration), message = ty, { match message { + ProjectCache::RequestUpdate(message) => self.handle_request_update(message), + ProjectCache::Get(message, sender) => self.handle_get(message, sender), + ProjectCache::GetCached(message, sender) => { + sender.send(self.handle_get_cached(message)) + } + ProjectCache::CheckEnvelope(message, sender) => { + sender.send(self.handle_check_envelope(message)) + } ProjectCache::ValidateEnvelope(message) => { self.handle_validate_envelope(message) } + ProjectCache::UpdateRateLimits(message) => self.handle_rate_limits(message), + ProjectCache::ProcessMetrics(message) => self.handle_process_metrics(message), + ProjectCache::AddMetricMeta(message) => self.handle_add_metric_meta(message), ProjectCache::FlushBuckets(message) => self.handle_flush_buckets(message), ProjectCache::UpdateSpoolIndex(message) => self.handle_buffer_index(message), ProjectCache::RefreshIndexCache(message) => { self.handle_refresh_index_cache(message) } + ProjectCache::UpdateProject(project) => self.handle_update_project(project), } } ) @@ -649,11 +1183,11 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, - project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, /// Bounded channel used exclusively to receive envelopes from the envelope buffer. envelopes_rx: mpsc::Receiver, + redis: Option, } impl ProjectCacheService { @@ -661,18 +1195,18 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, - project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, envelopes_rx: mpsc::Receiver, + redis: Option, ) -> Self { Self { config, memory_checker, - project_cache_handle, services, global_config_rx, envelopes_rx, + redis, } } } @@ -684,19 +1218,22 @@ impl Service for ProjectCacheService { let Self { config, memory_checker, - project_cache_handle, services, mut global_config_rx, mut envelopes_rx, + redis, } = self; - let mut project_events = project_cache_handle.changes(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); tokio::spawn(async move { + let mut ticker = tokio::time::interval(config.cache_eviction_interval()); relay_log::info!("project cache started"); + // Channel for async project state responses back into the project cache. + let (state_tx, mut state_rx) = mpsc::unbounded_channel(); + let global_config = match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => { relay_log::info!("global config received"); @@ -749,11 +1286,20 @@ impl Service for ProjectCacheService { }), }; + // Main broker that serializes public and internal messages, and triggers project state + // fetches via the project source. let mut broker = ProjectCacheBroker { config: config.clone(), memory_checker, - projects: project_cache_handle, + projects: hashbrown::HashMap::new(), + garbage_disposal: GarbageDisposal::new(), + source: ProjectSource::start( + config.clone(), + services.upstream_relay.clone(), + redis, + ), services, + state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1, global_config, @@ -764,7 +1310,7 @@ impl Service for ProjectCacheService { biased; Ok(()) = global_config_rx.changed() => { - metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "update_global_config", { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "update_global_config", { match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => broker.set_global_config_ready(), // The watch should only be updated if it gets a new value. @@ -773,30 +1319,35 @@ impl Service for ProjectCacheService { } }) }, - Ok(project_event) = project_events.recv() => { - metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_project_event", { - broker.handle_project_event(project_event); + Some(message) = state_rx.recv() => { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "merge_state", { + broker.merge_state(message) }) } // Buffer will not dequeue the envelopes from the spool if there is not enough // permits in `BufferGuard` available. Currently this is 50%. - Some(UnspooledEnvelope { managed_envelope, .. }) = buffer_rx.recv() => { - metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_processing", { - broker.handle_processing(managed_envelope) + Some(UnspooledEnvelope { managed_envelope, key }) = buffer_rx.recv() => { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_processing", { + broker.handle_processing(key, managed_envelope) }) }, + _ = ticker.tick() => { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "evict_project_caches", { + broker.evict_stale_project_caches() + }) + } () = &mut broker.spool_v1_unspool_handle => { - metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "periodic_unspool", { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "periodic_unspool", { broker.handle_periodic_unspool() }) } Some(message) = rx.recv() => { - metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_message", { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_message", { broker.handle_message(message) }) } Some(message) = envelopes_rx.recv() => { - metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_envelope", { + metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_envelope", { broker.handle_envelope(message) }) } @@ -809,6 +1360,33 @@ impl Service for ProjectCacheService { } } +/// Sum type for all objects which need to be discareded through the [`GarbageDisposal`]. +#[derive(Debug)] +#[allow(dead_code)] // Fields are never read, only used for discarding/dropping data. +enum ProjectGarbage { + Project(Project), + ProjectFetchState(ProjectFetchState), + Metrics(Vec), +} + +impl From for ProjectGarbage { + fn from(value: Project) -> Self { + Self::Project(value) + } +} + +impl From for ProjectGarbage { + fn from(value: ProjectFetchState) -> Self { + Self::ProjectFetchState(value) + } +} + +impl From> for ProjectGarbage { + fn from(value: Vec) -> Self { + Self::Metrics(value) + } +} + #[cfg(test)] mod tests { use relay_test::mock_service; @@ -827,6 +1405,7 @@ mod tests { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); + let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); Services { envelope_buffer: None, @@ -835,11 +1414,13 @@ mod tests { project_cache, outcome_aggregator, test_store, + upstream_relay, } } async fn project_cache_broker_setup( services: Services, + state_tx: mpsc::UnboundedSender, buffer_tx: mpsc::UnboundedSender, ) -> (ProjectCacheBroker, Addr) { let config: Arc<_> = Config::from_json_value(serde_json::json!({ @@ -878,8 +1459,11 @@ mod tests { ProjectCacheBroker { config: config.clone(), memory_checker, - projects: ProjectCacheHandle::for_test(), + projects: hashbrown::HashMap::new(), + garbage_disposal: GarbageDisposal::new(), + source: ProjectSource::start(config, services.upstream_relay.clone(), None), services, + state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1: Some(SpoolV1 { buffer_tx, @@ -898,13 +1482,13 @@ mod tests { relay_log::init_test!(); let services = mocked_services(); + let (state_tx, _) = mpsc::unbounded_channel(); let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); let (mut broker, _buffer_svc) = - project_cache_broker_setup(services.clone(), buffer_tx).await; - let projects = broker.projects.clone(); - let mut project_events = projects.changes(); + project_cache_broker_setup(services.clone(), state_tx, buffer_tx).await; broker.global_config = GlobalConfigStatus::Ready; + let (tx_update, mut rx_update) = mpsc::unbounded_channel(); let (tx_assert, mut rx_assert) = mpsc::unbounded_channel(); let dsn1 = "111d836b15bb49d7bbf99e64295d995b"; @@ -931,12 +1515,11 @@ mod tests { tokio::task::spawn(async move { loop { select! { - Ok(project_event) = project_events.recv() => { - broker.handle_project_event(project_event); - } + Some(assert) = rx_assert.recv() => { assert_eq!(broker.spool_v1.as_ref().unwrap().index.len(), assert); }, + Some(update) = rx_update.recv() => broker.merge_state(update), () = &mut broker.spool_v1_unspool_handle => broker.handle_periodic_unspool(), } } @@ -945,11 +1528,13 @@ mod tests { // Before updating any project states. tx_assert.send(2).unwrap(); - projects.test_set_project_state( - ProjectKey::parse(dsn1).unwrap(), - ProjectState::new_allowed(), - ); + let update_dsn1_project_state = UpdateProjectState { + project_key: ProjectKey::parse(dsn1).unwrap(), + state: ProjectFetchState::allowed(), + no_cache: false, + }; + tx_update.send(update_dsn1_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // One of the project should be unspooled. tx_assert.send(1).unwrap(); @@ -957,15 +1542,59 @@ mod tests { // Schedule some work... tokio::time::sleep(Duration::from_secs(2)).await; - projects.test_set_project_state( - ProjectKey::parse(dsn2).unwrap(), - ProjectState::new_allowed(), - ); + let update_dsn2_project_state = UpdateProjectState { + project_key: ProjectKey::parse(dsn2).unwrap(), + state: ProjectFetchState::allowed(), + no_cache: false, + }; + tx_update.send(update_dsn2_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // The last project should be unspooled. tx_assert.send(0).unwrap(); // Make sure the last assert is tested. tokio::time::sleep(Duration::from_millis(100)).await; } + + #[tokio::test] + async fn handle_processing_without_project() { + let services = mocked_services(); + let (state_tx, _) = mpsc::unbounded_channel(); + let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); + let (mut broker, buffer_svc) = + project_cache_broker_setup(services.clone(), state_tx, buffer_tx.clone()).await; + + let dsn = "111d836b15bb49d7bbf99e64295d995b"; + let project_key = ProjectKey::parse(dsn).unwrap(); + let key = QueueKey { + own_key: project_key, + sampling_key: project_key, + }; + let envelope = ManagedEnvelope::new( + empty_envelope_with_dsn(dsn), + services.outcome_aggregator.clone(), + services.test_store.clone(), + ProcessingGroup::Ungrouped, + ); + + // Index and projects are empty. + assert!(broker.projects.is_empty()); + assert!(broker.spool_v1.as_mut().unwrap().index.is_empty()); + + // Since there is no project we should not process anything but create a project and spool + // the envelope. + broker.handle_processing(key, envelope); + + // Assert that we have a new project and also added an index. + assert!(broker.projects.get(&project_key).is_some()); + assert!(broker.spool_v1.as_mut().unwrap().index.contains(&key)); + + // Check is we actually spooled anything. + buffer_svc.send(DequeueMany::new([key].into(), buffer_tx.clone())); + let UnspooledEnvelope { + managed_envelope, .. + } = buffer_rx.recv().await.unwrap(); + + assert_eq!(key, QueueKey::from_envelope(managed_envelope.envelope())); + } } diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs deleted file mode 100644 index 415bad5b75..0000000000 --- a/relay-server/src/services/projects/cache/handle.rs +++ /dev/null @@ -1,104 +0,0 @@ -use std::fmt; -use std::sync::Arc; - -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use relay_system::Addr; -use tokio::sync::broadcast; - -use super::state::Shared; -use crate::services::projects::cache::service::ProjectChange; -use crate::services::projects::cache::{Project, ProjectCache}; - -/// A synchronous handle to the [`ProjectCache`]. -/// -/// The handle allows lock free access to cached projects. It also acts as an interface -/// to the [`ProjectCacheService`](super::ProjectCacheService). -#[derive(Clone)] -pub struct ProjectCacheHandle { - pub(super) shared: Arc, - pub(super) config: Arc, - pub(super) service: Addr, - pub(super) project_changes: broadcast::Sender, -} - -impl ProjectCacheHandle { - /// Returns the current project state for the `project_key`. - pub fn get(&self, project_key: ProjectKey) -> Project<'_> { - let project = self.shared.get_or_create(project_key); - // Always trigger a fetch after retrieving the project to make sure the state is up to date. - self.fetch(project_key); - - Project::new(project, &self.config) - } - - /// Triggers a fetch/update check in the project cache for the supplied project. - pub fn fetch(&self, project_key: ProjectKey) { - self.service.send(ProjectCache::Fetch(project_key)); - } - - /// Returns a subscription to all [`ProjectChange`]'s. - /// - /// This stream notifies the subscriber about project state changes in the project cache. - /// Events may arrive in arbitrary order and be delivered multiple times. - pub fn changes(&self) -> broadcast::Receiver { - self.project_changes.subscribe() - } -} - -impl fmt::Debug for ProjectCacheHandle { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ProjectCacheHandle") - .field("shared", &self.shared) - .finish() - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::services::projects::project::ProjectState; - - impl ProjectCacheHandle { - /// Creates a new [`ProjectCacheHandle`] for testing only. - /// - /// A project cache handle created this way does not require a service to function. - pub fn for_test() -> Self { - Self { - shared: Default::default(), - config: Default::default(), - service: Addr::dummy(), - project_changes: broadcast::channel(999_999).0, - } - } - - /// Sets the project state for a project. - /// - /// This can be used to emulate a project cache update in tests. - pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { - let is_pending = state.is_pending(); - self.shared.test_set_project_state(project_key, state); - if is_pending { - let _ = self - .project_changes - .send(ProjectChange::Evicted(project_key)); - } else { - let _ = self.project_changes.send(ProjectChange::Ready(project_key)); - } - } - - /// Returns `true` if there is a project created for this `project_key`. - /// - /// A project is automatically created on access via [`Self::get`]. - pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { - self.shared.test_has_project_created(project_key) - } - - /// The amount of fetches triggered for projects. - /// - /// A fetch is triggered for both [`Self::get`] and [`Self::fetch`]. - pub fn test_num_fetches(&self) -> u64 { - self.service.len() - } - } -} diff --git a/relay-server/src/services/projects/cache/mod.rs b/relay-server/src/services/projects/cache/mod.rs deleted file mode 100644 index 35ee749ecc..0000000000 --- a/relay-server/src/services/projects/cache/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -mod handle; -mod project; -mod service; -mod state; - -pub mod legacy; - -pub use self::handle::ProjectCacheHandle; -pub use self::project::{CheckedEnvelope, Project}; -pub use self::service::{ProjectCache, ProjectCacheService, ProjectChange}; diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs deleted file mode 100644 index 4b9e95a2be..0000000000 --- a/relay-server/src/services/projects/cache/project.rs +++ /dev/null @@ -1,299 +0,0 @@ -use std::sync::Arc; - -use relay_config::Config; -use relay_dynamic_config::Feature; -use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits}; -use relay_sampling::evaluation::ReservoirCounters; - -use crate::envelope::ItemType; -use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::projects::cache::state::SharedProject; -use crate::services::projects::project::ProjectState; -use crate::utils::{CheckLimits, Enforcement, EnvelopeLimiter, ManagedEnvelope}; - -/// A loaded project. -pub struct Project<'a> { - shared: SharedProject, - config: &'a Config, -} - -impl<'a> Project<'a> { - pub(crate) fn new(shared: SharedProject, config: &'a Config) -> Self { - Self { shared, config } - } - - /// Returns a reference to the currently cached project state. - pub fn state(&self) -> &ProjectState { - self.shared.project_state() - } - - /// Returns a reference to the currently cached rate limits. - pub fn rate_limits(&self) -> &CachedRateLimits { - self.shared.cached_rate_limits() - } - - /// Returns a reference to the currently reservoir counters. - pub fn reservoir_counters(&self) -> &ReservoirCounters { - self.shared.reservoir_counters() - } - - /// Checks the envelope against project configuration and rate limits. - /// - /// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated - /// project state may be used, or otherwise the envelope is passed through unaltered. - /// - /// To check the envelope, this runs: - /// - Validate origins and public keys - /// - Quotas with a limit of `0` - /// - Cached rate limits - pub fn check_envelope( - &self, - mut envelope: ManagedEnvelope, - ) -> Result { - let state = match self.state() { - ProjectState::Enabled(state) => Some(Arc::clone(state)), - ProjectState::Disabled => { - // TODO(jjbayer): We should refactor this function to either return a Result or - // handle envelope rejections internally, but not both. - envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); - return Err(DiscardReason::ProjectId); - } - ProjectState::Pending => None, - }; - - let mut scoping = envelope.scoping(); - - if let Some(ref state) = state { - scoping = state.scope_request(envelope.envelope().meta()); - envelope.scope(scoping); - - if let Err(reason) = state.check_envelope(envelope.envelope(), self.config) { - envelope.reject(Outcome::Invalid(reason)); - return Err(reason); - } - } - - let current_limits = self.rate_limits().current_limits(); - - let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); - let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { - Ok(current_limits.check_with_quotas(quotas, item_scoping)) - }); - - let (mut enforcement, mut rate_limits) = - envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; - - let check_nested_spans = state - .as_ref() - .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); - - // If we can extract spans from the event, we want to try and count the number of nested - // spans to correctly emit negative outcomes in case the transaction itself is dropped. - if check_nested_spans { - sync_spans_to_enforcement(&envelope, &mut enforcement); - } - - enforcement.apply_with_outcomes(&mut envelope); - - envelope.update(); - - // Special case: Expose active rate limits for all metric namespaces if there is at least - // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary - // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. - if envelope.envelope().items().any(|i| i.ty().is_metrics()) { - let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); - metrics_scoping.namespace = MetricNamespaceScoping::Any; - rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); - } - - let envelope = if envelope.envelope().is_empty() { - // Individual rate limits have already been issued above - envelope.reject(Outcome::RateLimited(None)); - None - } else { - Some(envelope) - }; - - Ok(CheckedEnvelope { - envelope, - rate_limits, - }) - } -} - -/// A checked envelope and associated rate limits. -/// -/// Items violating the rate limits have been removed from the envelope. If all items are removed -/// from the envelope, `None` is returned in place of the envelope. -#[derive(Debug)] -pub struct CheckedEnvelope { - pub envelope: Option, - pub rate_limits: RateLimits, -} - -/// Adds category limits for the nested spans inside a transaction. -/// -/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted -/// as top-level spans, thus if we limited a transaction, we want to count and emit negative -/// outcomes for each of the spans nested inside that transaction. -fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { - if !enforcement.is_event_active() { - return; - } - - let spans_count = count_nested_spans(envelope); - if spans_count == 0 { - return; - } - - if enforcement.event.is_active() { - enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); - } - - if enforcement.event_indexed.is_active() { - enforcement.spans_indexed = enforcement - .event_indexed - .clone_for(DataCategory::SpanIndexed, spans_count); - } -} - -/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). -fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { - #[derive(Debug, serde::Deserialize)] - struct PartialEvent { - spans: crate::utils::SeqCount, - } - - envelope - .envelope() - .items() - .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) - .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) - // We do + 1, since we count the transaction itself because it will be extracted - // as a span and counted during the slow path of rate limiting. - .map_or(0, |event| event.spans.0 + 1) -} - -#[cfg(test)] -mod tests { - use crate::envelope::{ContentType, Envelope, Item}; - use crate::extractors::RequestMeta; - use crate::services::processor::ProcessingGroup; - use crate::services::projects::project::{ProjectInfo, PublicKeyConfig}; - use relay_base_schema::project::{ProjectId, ProjectKey}; - use relay_event_schema::protocol::EventId; - use serde_json::json; - use smallvec::smallvec; - - use super::*; - - fn create_project(config: &Config, data: Option) -> Project<'_> { - let mut project_info = ProjectInfo { - project_id: Some(ProjectId::new(42)), - ..Default::default() - }; - project_info.public_keys = smallvec![PublicKeyConfig { - public_key: ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), - numeric_id: None, - }]; - - if let Some(data) = data { - project_info.config = serde_json::from_value(data).unwrap(); - } - - Project::new( - SharedProject::for_test(ProjectState::Enabled(project_info.into())), - config, - ) - } - - fn request_meta() -> RequestMeta { - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - RequestMeta::new(dsn) - } - - #[test] - fn test_track_nested_spans_outcomes() { - let config = Default::default(); - let project = create_project( - &config, - Some(json!({ - "features": [ - "organizations:indexed-spans-extraction" - ], - "quotas": [{ - "id": "foo", - "categories": ["transaction"], - "window": 3600, - "limit": 0, - "reasonCode": "foo", - }] - })), - ); - - let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); - - let mut transaction = Item::new(ItemType::Transaction); - transaction.set_payload( - ContentType::Json, - r#"{ - "event_id": "52df9022835246eeb317dbd739ccd059", - "type": "transaction", - "transaction": "I have a stale timestamp, but I'm recent!", - "start_timestamp": 1, - "timestamp": 2, - "contexts": { - "trace": { - "trace_id": "ff62a8b040f340bda5d830223def1d81", - "span_id": "bd429c44b67a3eb4" - } - }, - "spans": [ - { - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - }, - { - "span_id": "bd429c44b67a3eb5", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - } - ] -}"#, - ); - - envelope.add_item(transaction); - - let (outcome_aggregator, mut outcome_aggregator_rx) = relay_system::Addr::custom(); - let (test_store, _) = relay_system::Addr::custom(); - - let managed_envelope = ManagedEnvelope::new( - envelope, - outcome_aggregator.clone(), - test_store, - ProcessingGroup::Transaction, - ); - - project.check_envelope(managed_envelope).unwrap(); - drop(outcome_aggregator); - - let expected = [ - (DataCategory::Transaction, 1), - (DataCategory::TransactionIndexed, 1), - (DataCategory::Span, 3), - (DataCategory::SpanIndexed, 3), - ]; - - for (expected_category, expected_quantity) in expected { - let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); - assert_eq!(outcome.category, expected_category); - assert_eq!(outcome.quantity, expected_quantity); - } - } -} diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs deleted file mode 100644 index 23a21e553c..0000000000 --- a/relay-server/src/services/projects/cache/service.rs +++ /dev/null @@ -1,225 +0,0 @@ -use std::sync::Arc; - -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use relay_statsd::metric; -use relay_system::Service; -use tokio::sync::{broadcast, mpsc}; - -use crate::services::projects::cache::handle::ProjectCacheHandle; -use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; -use crate::services::projects::project::ProjectState; -use crate::services::projects::source::ProjectSource; -use crate::statsd::{RelayGauges, RelayTimers}; - -/// Size of the broadcast channel for project events. -/// -/// This is set to a value which theoretically should never be reachable, -/// the number of events is approximately bounded by the amount of projects -/// receiving events. -/// -/// It is set to such a large amount because receivers of events currently -/// do not deal with lags in the channel gracefully. -const PROJECT_EVENTS_CHANNEL_SIZE: usize = 512_000; - -/// A cache for projects, which allows concurrent access to the cached projects. -#[derive(Debug)] -pub enum ProjectCache { - /// Schedules another fetch or update for the specified project. - /// - /// A project which is not fetched will eventually expire and be evicted - /// from the cache. Fetches for an already cached project ensure the project - /// is always up to date and not evicted. - Fetch(ProjectKey), -} - -impl ProjectCache { - fn variant(&self) -> &'static str { - match self { - Self::Fetch(_) => "fetch", - } - } -} - -impl relay_system::Interface for ProjectCache {} - -impl relay_system::FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: Self, _: ()) -> Self { - message - } -} - -/// Project life-cycle changes produced by the project cache. -#[derive(Debug, Copy, Clone)] -pub enum ProjectChange { - /// A project was successfully fetched and is now ready to use. - Ready(ProjectKey), - /// A project expired from the cache and was evicted. - Evicted(ProjectKey), -} - -/// A service implementing the [`ProjectCache`] interface. -pub struct ProjectCacheService { - store: ProjectStore, - source: ProjectSource, - config: Arc, - - project_update_rx: mpsc::UnboundedReceiver, - project_update_tx: mpsc::UnboundedSender, - - project_events_tx: broadcast::Sender, -} - -impl ProjectCacheService { - /// Creates a new [`ProjectCacheService`]. - pub fn new(config: Arc, source: ProjectSource) -> Self { - let (project_update_tx, project_update_rx) = mpsc::unbounded_channel(); - let project_events_tx = broadcast::channel(PROJECT_EVENTS_CHANNEL_SIZE).0; - - Self { - store: ProjectStore::default(), - source, - config, - project_update_rx, - project_update_tx, - project_events_tx, - } - } - - /// Consumes and starts a [`ProjectCacheService`]. - /// - /// Returns a [`ProjectCacheHandle`] to access the cache concurrently. - pub fn start(self) -> ProjectCacheHandle { - let (addr, addr_rx) = relay_system::channel(Self::name()); - - let handle = ProjectCacheHandle { - shared: self.store.shared(), - config: Arc::clone(&self.config), - service: addr, - project_changes: self.project_events_tx.clone(), - }; - - self.spawn_handler(addr_rx); - - handle - } - - /// Schedules a new [`Fetch`] and delivers the result to the [`Self::project_update_tx`] channel. - fn schedule_fetch(&self, fetch: Fetch) { - let source = self.source.clone(); - let project_updates = self.project_update_tx.clone(); - - tokio::spawn(async move { - tokio::time::sleep_until(fetch.when()).await; - - let state = match source - .fetch(fetch.project_key(), false, fetch.revision()) - .await - { - Ok(result) => result, - Err(err) => { - relay_log::error!( - tags.project_key = fetch.project_key().as_str(), - error = &err as &dyn std::error::Error, - "failed to fetch project from source: {fetch:?}" - ); - - // TODO: change this to ProjectState::Pending once we consider it safe to do so. - // see https://github.com/getsentry/relay/pull/4140. - ProjectState::Disabled.into() - } - }; - - let _ = project_updates.send(fetch.complete(state)); - }); - } -} - -/// All [`ProjectCacheService`] message handlers. -impl ProjectCacheService { - fn handle_fetch(&mut self, project_key: ProjectKey) { - if let Some(fetch) = self.store.try_begin_fetch(project_key, &self.config) { - self.schedule_fetch(fetch); - } - } - - fn handle_completed_fetch(&mut self, fetch: CompletedFetch) { - let project_key = fetch.project_key(); - - if let Some(fetch) = self.store.complete_fetch(fetch, &self.config) { - relay_log::trace!( - project_key = fetch.project_key().as_str(), - "re-scheduling project fetch: {fetch:?}" - ); - self.schedule_fetch(fetch); - return; - } - - let _ = self - .project_events_tx - .send(ProjectChange::Ready(project_key)); - - metric!( - gauge(RelayGauges::ProjectCacheNotificationChannel) = - self.project_events_tx.len() as u64 - ); - } - - fn handle_evict_stale_projects(&mut self) { - let on_evict = |project_key| { - let _ = self - .project_events_tx - .send(ProjectChange::Evicted(project_key)); - }; - - self.store.evict_stale_projects(&self.config, on_evict); - } - - fn handle_message(&mut self, message: ProjectCache) { - match message { - ProjectCache::Fetch(project_key) => self.handle_fetch(project_key), - } - } -} - -impl relay_system::Service for ProjectCacheService { - type Interface = ProjectCache; - - fn spawn_handler(mut self, mut rx: relay_system::Receiver) { - macro_rules! timed { - ($task:expr, $body:expr) => {{ - let task_name = $task; - metric!( - timer(RelayTimers::ProjectCacheTaskDuration), - task = task_name, - { $body } - ) - }}; - } - - tokio::spawn(async move { - let mut eviction_ticker = tokio::time::interval(self.config.cache_eviction_interval()); - - loop { - tokio::select! { - biased; - - Some(update) = self.project_update_rx.recv() => timed!( - "project_update", - self.handle_completed_fetch(update) - ), - Some(message) = rx.recv() => timed!( - message.variant(), - self.handle_message(message) - ), - _ = eviction_ticker.tick() => timed!( - "evict_stale_projects", - self.handle_evict_stale_projects() - ), - } - } - }); - } -} diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs deleted file mode 100644 index ada692cef0..0000000000 --- a/relay-server/src/services/projects/cache/state.rs +++ /dev/null @@ -1,769 +0,0 @@ -use std::fmt; -use std::sync::Arc; -use tokio::time::Instant; - -use arc_swap::ArcSwap; -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use relay_quotas::CachedRateLimits; -use relay_sampling::evaluation::ReservoirCounters; -use relay_statsd::metric; - -use crate::services::projects::project::{ProjectState, Revision}; -use crate::services::projects::source::SourceProjectState; -use crate::statsd::{RelayCounters, RelayHistograms}; -use crate::utils::RetryBackoff; - -/// The backing storage for a project cache. -/// -/// Exposes the only interface to delete from [`Shared`], guaranteed by -/// requiring exclusive/mutable access to [`ProjectStore`]. -/// -/// [`Shared`] can be extended through [`Shared::get_or_create`], in which case -/// the private state is missing. Users of [`Shared::get_or_create`] *must* trigger -/// a fetch to create the private state and keep it updated. -/// This guarantees that eventually the project state is populated, but for a undetermined, -/// time it is possible that shared state exists without the respective private state. -#[derive(Default)] -pub struct ProjectStore { - /// The shared state, which can be accessed concurrently. - shared: Arc, - /// The private, mutably exclusive state, used to maintain the project state. - private: hashbrown::HashMap, -} - -impl ProjectStore { - /// Retrieves a [`Shared`] handle which can be freely shared with multiple consumers. - pub fn shared(&self) -> Arc { - Arc::clone(&self.shared) - } - - /// Tries to begin a new fetch for the passed `project_key`. - /// - /// Returns `None` if no fetch is necessary or there is already a fetch ongoing. - /// A returned [`Fetch`] must be scheduled and completed with [`Fetch::complete`] and - /// [`Self::complete_fetch`]. - pub fn try_begin_fetch(&mut self, project_key: ProjectKey, config: &Config) -> Option { - self.get_or_create(project_key, config) - .try_begin_fetch(config) - } - - /// Completes a [`CompletedFetch`] started with [`Self::try_begin_fetch`]. - /// - /// Returns a new [`Fetch`] if another fetch must be scheduled. This happens when the fetched - /// [`ProjectState`] is still pending or already deemed expired. - #[must_use = "an incomplete fetch must be retried"] - pub fn complete_fetch(&mut self, fetch: CompletedFetch, config: &Config) -> Option { - // Eviction is not possible for projects which are currently being fetched. - // Hence if there was a started fetch, the project state must always exist at this stage. - debug_assert!(self.shared.projects.pin().get(&fetch.project_key).is_some()); - debug_assert!(self.private.get(&fetch.project_key).is_some()); - - let mut project = self.get_or_create(fetch.project_key, config); - project.complete_fetch(fetch); - // Schedule another fetch if necessary, usually should only happen if - // the completed fetch is pending. - project.try_begin_fetch(config) - } - - /// Evicts all stale, expired projects from the cache. - /// - /// Evicted projects are passed to the `on_evict` callback. Returns the total amount of evicted - /// projects. - pub fn evict_stale_projects(&mut self, config: &Config, mut on_evict: F) -> usize - where - F: FnMut(ProjectKey), - { - let eviction_start = Instant::now(); - - let expired = self.private.extract_if(|_, private| { - // We only evict projects which have fully expired and are not currently being fetched. - // - // This relies on the fact that a project should never remain in the `pending` state - // for long and is either always being fetched or successfully fetched. - private.last_fetch().map_or(false, |ts| { - ts.check_expiry(eviction_start, config).is_expired() - }) - }); - - let mut evicted = 0; - - let shared = self.shared.projects.pin(); - for (project_key, _) in expired { - let _removed = shared.remove(&project_key); - debug_assert!( - _removed.is_some(), - "an expired project must exist in the shared state" - ); - relay_log::trace!(tags.project_key = project_key.as_str(), "project evicted"); - - evicted += 1; - - on_evict(project_key); - } - drop(shared); - - metric!( - histogram(RelayHistograms::ProjectStateCacheSize) = self.shared.projects.len() as u64, - storage = "shared" - ); - metric!( - histogram(RelayHistograms::ProjectStateCacheSize) = self.private.len() as u64, - storage = "private" - ); - metric!(counter(RelayCounters::EvictingStaleProjectCaches) += evicted as u64); - - evicted - } - - /// Get a reference to the current project or create a new project. - /// - /// For internal use only, a created project must always be fetched immediately. - fn get_or_create(&mut self, project_key: ProjectKey, config: &Config) -> ProjectRef<'_> { - #[cfg(debug_assertions)] - if self.private.contains_key(&project_key) { - // We have exclusive access to the private part, there are no concurrent deletions - // hence if we have a private state there must always be a shared state as well. - // - // The opposite is not true, the shared state may have been created concurrently - // through the shared access. - debug_assert!(self.shared.projects.pin().contains_key(&project_key)); - } - - let private = self - .private - .entry(project_key) - .and_modify(|_| { - metric!(counter(RelayCounters::ProjectCacheHit) += 1); - }) - .or_insert_with(|| { - metric!(counter(RelayCounters::ProjectCacheMiss) += 1); - PrivateProjectState::new(project_key, config) - }); - - let shared = self - .shared - .projects - .pin() - .get_or_insert_with(project_key, Default::default) - .clone(); - - ProjectRef { private, shared } - } -} - -/// The shared and concurrently accessible handle to the project cache. -#[derive(Default)] -pub struct Shared { - projects: papaya::HashMap, -} - -impl Shared { - /// Returns the existing project state or creates a new one. - /// - /// The caller must ensure that the project cache is instructed to - /// [`super::ProjectCache::Fetch`] the retrieved project. - pub fn get_or_create(&self, project_key: ProjectKey) -> SharedProject { - // The fast path, we expect the project to exist. - let projects = self.projects.pin(); - if let Some(project) = projects.get(&project_key) { - return project.to_shared_project(); - } - - // The slow path, try to attempt to insert, somebody else may have been faster, but that's okay. - match projects.try_insert(project_key, Default::default()) { - Ok(inserted) => inserted.to_shared_project(), - Err(occupied) => occupied.current.to_shared_project(), - } - } -} - -/// TEST ONLY bypass to make the project cache mockable. -#[cfg(test)] -impl Shared { - /// Updates the project state for a project. - /// - /// TEST ONLY! - pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { - self.projects - .pin() - .get_or_insert_with(project_key, Default::default) - .set_project_state(state); - } - - /// Returns `true` if there exists a shared state for the passed `project_key`. - pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { - self.projects.pin().contains_key(&project_key) - } -} - -impl fmt::Debug for Shared { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Shared") - .field("num_projects", &self.projects.len()) - .finish() - } -} - -/// A single project from the [`Shared`] project cache. -pub struct SharedProject(Arc); - -impl SharedProject { - /// Returns a reference to the contained [`ProjectState`]. - pub fn project_state(&self) -> &ProjectState { - &self.0.state - } - - /// Returns a reference to the contained [`CachedRateLimits`]. - pub fn cached_rate_limits(&self) -> &CachedRateLimits { - // Exposing cached rate limits may be a bad idea, this allows mutation - // and caching of rate limits for pending projects, which may or may not be fine. - // Although, for now this is fine. - // - // Read only access is easily achievable if we return only the current rate limits. - &self.0.rate_limits - } - - /// Returns a reference to the contained [`ReservoirCounters`]. - pub fn reservoir_counters(&self) -> &ReservoirCounters { - &self.0.reservoir_counters - } -} - -/// TEST ONLY bypass to make the project cache mockable. -#[cfg(test)] -impl SharedProject { - /// Creates a new [`SharedProject`] for testing only. - pub fn for_test(state: ProjectState) -> Self { - Self(Arc::new(SharedProjectStateInner { - state, - ..Default::default() - })) - } -} - -/// Reference to a full project wrapping shared and private state. -struct ProjectRef<'a> { - shared: SharedProjectState, - private: &'a mut PrivateProjectState, -} - -impl ProjectRef<'_> { - fn try_begin_fetch(&mut self, config: &Config) -> Option { - let now = Instant::now(); - self.private - .try_begin_fetch(now, config) - .map(|fetch| fetch.with_revision(self.shared.revision())) - } - - fn complete_fetch(&mut self, fetch: CompletedFetch) { - let now = Instant::now(); - self.private.complete_fetch(&fetch, now); - - // Keep the old state around if the current fetch is pending. - // It may still be useful to callers. - match fetch.state { - SourceProjectState::New(state) if !state.is_pending() => { - self.shared.set_project_state(state); - } - _ => {} - } - } -} - -/// A [`Fetch`] token. -/// -/// When returned it must be executed and completed using [`Self::complete`]. -#[must_use = "a fetch must be executed"] -#[derive(Debug)] -pub struct Fetch { - project_key: ProjectKey, - when: Instant, - revision: Revision, -} - -impl Fetch { - /// Returns the [`ProjectKey`] of the project to fetch. - pub fn project_key(&self) -> ProjectKey { - self.project_key - } - - /// Returns when the fetch for the project should be scheduled. - /// - /// This can be now (as soon as possible) or a later point in time, if the project is currently - /// in a backoff. - pub fn when(&self) -> Instant { - self.when - } - - /// Returns the revisions of the currently cached project. - /// - /// If the upstream indicates it does not have a different version of this project - /// we do not need to update the local state. - pub fn revision(&self) -> Revision { - self.revision.clone() - } - - /// Completes the fetch with a result and returns a [`CompletedFetch`]. - pub fn complete(self, state: SourceProjectState) -> CompletedFetch { - CompletedFetch { - project_key: self.project_key, - state, - } - } - - fn with_revision(mut self, revision: Revision) -> Self { - self.revision = revision; - self - } -} - -/// The result of an executed [`Fetch`]. -#[must_use = "a completed fetch must be acted upon"] -#[derive(Debug)] -pub struct CompletedFetch { - project_key: ProjectKey, - state: SourceProjectState, -} - -impl CompletedFetch { - /// Returns the [`ProjectKey`] of the project which was fetched. - pub fn project_key(&self) -> ProjectKey { - self.project_key - } - - /// Returns `true` if the fetch completed with a pending status. - fn is_pending(&self) -> bool { - match &self.state { - SourceProjectState::New(state) => state.is_pending(), - SourceProjectState::NotModified => false, - } - } -} - -/// The state of a project contained in the [`Shared`] project cache. -/// -/// This state is interior mutable and allows updates to the project. -#[derive(Debug, Default, Clone)] -struct SharedProjectState(Arc>); - -impl SharedProjectState { - /// Updates the project state. - fn set_project_state(&self, state: ProjectState) { - let prev = self.0.rcu(|stored| SharedProjectStateInner { - state: state.clone(), - rate_limits: Arc::clone(&stored.rate_limits), - reservoir_counters: Arc::clone(&stored.reservoir_counters), - }); - - // Try clean expired reservoir counters. - // - // We do it after the `rcu`, to not re-run this more often than necessary. - if let Some(state) = state.enabled() { - let config = state.config.sampling.as_ref(); - if let Some(config) = config.and_then(|eb| eb.as_ref().ok()) { - // We can safely use previous here, the `rcu` just replaced the state, the - // reservoir counters did not change. - // - // `try_lock` to not potentially block, it's a best effort cleanup. - if let Ok(mut counters) = prev.reservoir_counters.try_lock() { - counters.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); - } - } - } - } - - /// Extracts and clones the revision from the contained project state. - fn revision(&self) -> Revision { - self.0.as_ref().load().state.revision().clone() - } - - /// Transforms this interior mutable handle to an immutable [`SharedProject`]. - fn to_shared_project(&self) -> SharedProject { - SharedProject(self.0.as_ref().load_full()) - } -} - -/// The data contained in a [`SharedProjectState`]. -/// -/// All fields must be cheap to clone and are ideally just a single `Arc`. -/// Partial updates to [`SharedProjectState`], are performed using `rcu` cloning all fields. -#[derive(Debug, Default)] -struct SharedProjectStateInner { - state: ProjectState, - rate_limits: Arc, - reservoir_counters: ReservoirCounters, -} - -/// Current fetch state for a project. -#[derive(Debug)] -enum FetchState { - /// There is a fetch currently in progress. - InProgress, - /// A successful fetch is pending. - /// - /// This state is essentially only the initial state, a project - /// for the most part should always have a fetch in progress or be - /// in the non-pending state. - Pending { - /// Time when the next fetch should be attempted. - /// - /// `None` means soon as possible. - next_fetch_attempt: Option, - }, - /// There was a successful non-pending fetch. - Complete { - /// Time when the fetch was completed. - last_fetch: LastFetch, - }, -} - -/// Contains all mutable state necessary to maintain the project cache. -struct PrivateProjectState { - /// Project key this state belongs to. - project_key: ProjectKey, - - /// The current fetch state. - state: FetchState, - /// The current backoff used for calculating the next fetch attempt. - /// - /// The backoff is reset after a successful, non-pending fetch. - backoff: RetryBackoff, -} - -impl PrivateProjectState { - fn new(project_key: ProjectKey, config: &Config) -> Self { - Self { - project_key, - state: FetchState::Pending { - next_fetch_attempt: None, - }, - backoff: RetryBackoff::new(config.http_max_retry_interval()), - } - } - - /// Returns the [`LastFetch`] if there is currently no fetch in progress and the project - /// was fetched successfully before. - fn last_fetch(&self) -> Option { - match &self.state { - FetchState::Complete { last_fetch } => Some(*last_fetch), - _ => None, - } - } - - fn try_begin_fetch(&mut self, now: Instant, config: &Config) -> Option { - let when = match &self.state { - FetchState::InProgress {} => { - relay_log::trace!( - tags.project_key = self.project_key.as_str(), - "project fetch skipped, fetch in progress" - ); - return None; - } - FetchState::Pending { next_fetch_attempt } => { - // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. - next_fetch_attempt.unwrap_or(now) - } - FetchState::Complete { last_fetch } => { - if last_fetch.check_expiry(now, config).is_fresh() { - // The current state is up to date, no need to start another fetch. - relay_log::trace!( - tags.project_key = self.project_key.as_str(), - "project fetch skipped, already up to date" - ); - return None; - } - now - } - }; - - // Mark a current fetch in progress. - self.state = FetchState::InProgress {}; - - relay_log::trace!( - tags.project_key = &self.project_key.as_str(), - attempts = self.backoff.attempt() + 1, - "project state fetch scheduled in {:?}", - when.saturating_duration_since(Instant::now()), - ); - - Some(Fetch { - project_key: self.project_key, - when, - revision: Revision::default(), - }) - } - - fn complete_fetch(&mut self, fetch: &CompletedFetch, now: Instant) { - debug_assert!( - matches!(self.state, FetchState::InProgress), - "fetch completed while there was no current fetch registered" - ); - - if fetch.is_pending() { - self.state = FetchState::Pending { - next_fetch_attempt: now.checked_add(self.backoff.next_backoff()), - }; - relay_log::trace!( - tags.project_key = &self.project_key.as_str(), - "project state fetch completed but still pending" - ); - } else { - relay_log::trace!( - tags.project_key = &self.project_key.as_str(), - "project state fetch completed with non-pending config" - ); - self.backoff.reset(); - self.state = FetchState::Complete { - last_fetch: LastFetch(now), - }; - } - } -} - -/// New type containing the last successful fetch time as an [`Instant`]. -#[derive(Debug, Copy, Clone)] -struct LastFetch(Instant); - -impl LastFetch { - /// Returns the [`Expiry`] of the last fetch in relation to `now`. - fn check_expiry(&self, now: Instant, config: &Config) -> Expiry { - let expiry = config.project_cache_expiry(); - let elapsed = now.saturating_duration_since(self.0); - - if elapsed >= expiry + config.project_grace_period() { - Expiry::Expired - } else if elapsed >= expiry { - Expiry::Stale - } else { - Expiry::Fresh - } - } -} - -/// Expiry state of a project. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -enum Expiry { - /// The project state is perfectly up to date. - Fresh, - /// The project state is outdated but events depending on this project state can still be - /// processed. The state should be refreshed in the background though. - Stale, - /// The project state is completely outdated and events need to be buffered up until the new - /// state has been fetched. - Expired, -} - -impl Expiry { - /// Returns `true` if the project is up-to-date and does not need to be fetched. - fn is_fresh(&self) -> bool { - matches!(self, Self::Fresh) - } - - /// Returns `true` if the project is expired and can be evicted. - fn is_expired(&self) -> bool { - matches!(self, Self::Expired) - } -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use super::*; - - fn collect_evicted(store: &mut ProjectStore, config: &Config) -> Vec { - let mut evicted = Vec::new(); - let num_evicted = store.evict_stale_projects(config, |pk| evicted.push(pk)); - assert_eq!(evicted.len(), num_evicted); - evicted - } - - macro_rules! assert_state { - ($store:ident, $project_key:ident, $state:pat) => { - assert!(matches!( - $store.shared().get_or_create($project_key).project_state(), - $state - )); - }; - } - - #[test] - fn test_store_fetch() { - let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); - let mut store = ProjectStore::default(); - let config = Default::default(); - - let fetch = store.try_begin_fetch(project_key, &config).unwrap(); - assert_eq!(fetch.project_key(), project_key); - assert!(fetch.when() < Instant::now()); - assert_eq!(fetch.revision().as_str(), None); - assert_state!(store, project_key, ProjectState::Pending); - - // Fetch already in progress, nothing to do. - assert!(store.try_begin_fetch(project_key, &config).is_none()); - - // A pending fetch should trigger a new fetch immediately. - let fetch = fetch.complete(ProjectState::Pending.into()); - let fetch = store.complete_fetch(fetch, &config).unwrap(); - assert_eq!(fetch.project_key(), project_key); - // First backoff is still immediately. - assert!(fetch.when() < Instant::now()); - assert_eq!(fetch.revision().as_str(), None); - assert_state!(store, project_key, ProjectState::Pending); - - // Pending again. - let fetch = fetch.complete(ProjectState::Pending.into()); - let fetch = store.complete_fetch(fetch, &config).unwrap(); - assert_eq!(fetch.project_key(), project_key); - // This time it needs to be in the future (backoff). - assert!(fetch.when() > Instant::now()); - assert_eq!(fetch.revision().as_str(), None); - assert_state!(store, project_key, ProjectState::Pending); - - // Now complete with disabled. - let fetch = fetch.complete(ProjectState::Disabled.into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - assert_state!(store, project_key, ProjectState::Disabled); - - // A new fetch is not yet necessary. - assert!(store.try_begin_fetch(project_key, &config).is_none()); - } - - #[tokio::test(start_paused = true)] - async fn test_store_fetch_pending_does_not_replace_state() { - let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); - let mut store = ProjectStore::default(); - let config = Config::from_json_value(serde_json::json!({ - "cache": { - "project_expiry": 5, - "project_grace_period": 5, - } - })) - .unwrap(); - - let fetch = store.try_begin_fetch(project_key, &config).unwrap(); - let fetch = fetch.complete(ProjectState::Disabled.into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - assert_state!(store, project_key, ProjectState::Disabled); - - tokio::time::advance(Duration::from_secs(6)).await; - - let fetch = store.try_begin_fetch(project_key, &config).unwrap(); - let fetch = fetch.complete(ProjectState::Pending.into()); - // We're returned a new fetch, because the current one completed pending. - let fetch = store.complete_fetch(fetch, &config).unwrap(); - // The old cached state is still available and not replaced. - assert_state!(store, project_key, ProjectState::Disabled); - - let fetch = fetch.complete(ProjectState::new_allowed().into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - assert_state!(store, project_key, ProjectState::Enabled(_)); - } - - #[tokio::test(start_paused = true)] - async fn test_store_evict_projects() { - let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); - let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); - let mut store = ProjectStore::default(); - let config = Config::from_json_value(serde_json::json!({ - "cache": { - "project_expiry": 5, - "project_grace_period": 0, - } - })) - .unwrap(); - - let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); - let fetch = fetch.complete(ProjectState::Disabled.into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - - assert_eq!(collect_evicted(&mut store, &config), Vec::new()); - assert_state!(store, project_key1, ProjectState::Disabled); - - // 3 seconds is not enough to expire any project. - tokio::time::advance(Duration::from_secs(3)).await; - - assert_eq!(collect_evicted(&mut store, &config), Vec::new()); - assert_state!(store, project_key1, ProjectState::Disabled); - - let fetch = store.try_begin_fetch(project_key2, &config).unwrap(); - let fetch = fetch.complete(ProjectState::Disabled.into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - - // A total of 6 seconds should expire the first project. - tokio::time::advance(Duration::from_secs(3)).await; - - assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); - assert_state!(store, project_key1, ProjectState::Pending); - assert_state!(store, project_key2, ProjectState::Disabled); - } - - #[tokio::test(start_paused = true)] - async fn test_store_evict_projects_pending_not_expired() { - let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); - let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); - let mut store = ProjectStore::default(); - let config = Config::from_json_value(serde_json::json!({ - "cache": { - "project_expiry": 5, - "project_grace_period": 0, - } - })) - .unwrap(); - - let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); - // Create a new project in a pending state, but never fetch it, this should also never expire. - store.shared().get_or_create(project_key2); - - tokio::time::advance(Duration::from_secs(6)).await; - - // No evictions, project is pending. - assert_eq!(collect_evicted(&mut store, &config), Vec::new()); - - // Complete the project. - let fetch = fetch.complete(ProjectState::Disabled.into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - - // Still should not be evicted, because we do have 5 seconds to expire since completion. - assert_eq!(collect_evicted(&mut store, &config), Vec::new()); - tokio::time::advance(Duration::from_secs(4)).await; - assert_eq!(collect_evicted(&mut store, &config), Vec::new()); - assert_state!(store, project_key1, ProjectState::Disabled); - - // Just enough to expire the project. - tokio::time::advance(Duration::from_millis(1001)).await; - assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); - assert_state!(store, project_key1, ProjectState::Pending); - assert_state!(store, project_key2, ProjectState::Pending); - } - - #[tokio::test(start_paused = true)] - async fn test_store_evict_projects_stale() { - let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); - let mut store = ProjectStore::default(); - let config = Config::from_json_value(serde_json::json!({ - "cache": { - "project_expiry": 5, - "project_grace_period": 5, - } - })) - .unwrap(); - - let fetch = store.try_begin_fetch(project_key, &config).unwrap(); - let fetch = fetch.complete(ProjectState::Disabled.into()); - assert!(store.complete_fetch(fetch, &config).is_none()); - - // This is in the grace period, but not yet expired. - tokio::time::advance(Duration::from_millis(9500)).await; - - assert_eq!(collect_evicted(&mut store, &config), Vec::new()); - assert_state!(store, project_key, ProjectState::Disabled); - - // Now it's expired. - tokio::time::advance(Duration::from_secs(1)).await; - - assert_eq!(collect_evicted(&mut store, &config), vec![project_key]); - assert_state!(store, project_key, ProjectState::Pending); - } -} diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index 5e23327370..94d0828e14 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -1,127 +1,855 @@ -//! Types that represent the current project state. use std::sync::Arc; +use std::time::Duration; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_dynamic_config::{ErrorBoundary, Feature}; +use relay_metrics::{Bucket, MetaAggregator, MetricMeta}; +use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits, Scoping}; +use relay_sampling::evaluation::ReservoirCounters; +use relay_statsd::metric; +use relay_system::{Addr, BroadcastChannel}; use serde::{Deserialize, Serialize}; +use tokio::time::Instant; -use relay_base_schema::project::ProjectKey; -use relay_quotas::Scoping; - -mod info; - -pub use self::info::*; - -/// Representation of a project's current state. -#[derive(Clone, Debug, Default)] -pub enum ProjectState { - /// A valid project that is not disabled. - Enabled(Arc), - /// A project that was marked as "gone" by the upstream. This variant does not expose - /// any other project information. - Disabled, - /// A project to which one of the following conditions apply: - /// - The project has not yet been fetched. - /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). - /// - The upstream returned an unparsable project so we have to try again. - /// - The project has expired and must be treated as "has not been fetched". - #[default] - Pending, +use crate::envelope::ItemType; +use crate::services::metrics::{Aggregator, MergeBuckets}; +use crate::services::outcome::{DiscardReason, Outcome}; +use crate::services::processor::{EncodeMetricMeta, EnvelopeProcessor, ProcessProjectMetrics}; +use crate::services::projects::cache::{ + CheckedEnvelope, ProcessMetrics, ProjectCache, RequestUpdate, +}; +use crate::utils::{Enforcement, SeqCount}; + +use crate::statsd::RelayCounters; +use crate::utils::{CheckLimits, EnvelopeLimiter, ManagedEnvelope, RetryBackoff}; + +pub mod state; + +pub use state::{ + ExpiryState, LimitedParsedProjectState, ParsedProjectState, ProjectFetchState, ProjectInfo, + ProjectState, +}; + +/// Sender type for messages that respond with project states. +pub type ProjectSender = relay_system::BroadcastSender; + +/// Represents a public key received from the projectconfig endpoint. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PublicKeyConfig { + /// Public part of key (random hash). + pub public_key: ProjectKey, + + /// The primary key of the DSN in Sentry's main database. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub numeric_id: Option, +} + +/// Channel used to respond to state requests (e.g. by project config endpoint). +#[derive(Debug)] +struct StateChannel { + inner: BroadcastChannel, + no_cache: bool, } -impl ProjectState { - /// Project state for an unknown but allowed project. +impl StateChannel { + pub fn new() -> Self { + Self { + inner: BroadcastChannel::new(), + no_cache: false, + } + } + + pub fn no_cache(&mut self, no_cache: bool) -> &mut Self { + self.no_cache = no_cache; + self + } +} + +#[derive(Debug)] +enum GetOrFetch<'a> { + Cached(ProjectState), + Scheduled(&'a mut StateChannel), +} + +/// Structure representing organization and project configuration for a project key. +/// +/// This structure no longer uniquely identifies a project. Instead, it identifies a project key. +/// Projects can define multiple keys, in which case this structure is duplicated for each instance. +#[derive(Debug)] +pub struct Project { + backoff: RetryBackoff, + next_fetch_attempt: Option, + last_updated_at: Instant, + project_key: ProjectKey, + config: Arc, + state: ProjectFetchState, + state_channel: Option, + rate_limits: CachedRateLimits, + last_no_cache: Instant, + reservoir_counters: ReservoirCounters, + metric_meta_aggregator: MetaAggregator, + has_pending_metric_meta: bool, +} + +impl Project { + /// Creates a new `Project`. + pub fn new(key: ProjectKey, config: Arc) -> Self { + Project { + backoff: RetryBackoff::new(config.http_max_retry_interval()), + next_fetch_attempt: None, + last_updated_at: Instant::now(), + project_key: key, + state: ProjectFetchState::expired(), + state_channel: None, + rate_limits: CachedRateLimits::new(), + last_no_cache: Instant::now(), + reservoir_counters: Arc::default(), + metric_meta_aggregator: MetaAggregator::new(config.metrics_meta_locations_max()), + has_pending_metric_meta: false, + config, + } + } + + /// Returns the [`ReservoirCounters`] for the project. + pub fn reservoir_counters(&self) -> ReservoirCounters { + self.reservoir_counters.clone() + } + + /// Returns the current [`ProjectState`] attached to the project. + pub fn current_state(&self) -> ProjectState { + self.state.current_state(&self.config) + } + + /// Returns the currently active cached rate limits. + pub fn current_rate_limits(&mut self) -> &RateLimits { + self.rate_limits.current_limits() + } + + /// If a reservoir rule is no longer in the sampling config, we will remove those counters. + fn remove_expired_reservoir_rules(&self) { + let Some(state) = self.current_state().enabled() else { + return; + }; + + let Some(ErrorBoundary::Ok(config)) = state.config.sampling.as_ref() else { + return; + }; + + // Using try_lock to not slow down the project cache service. + if let Ok(mut guard) = self.reservoir_counters.try_lock() { + guard.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); + } + } + + pub fn merge_rate_limits(&mut self, rate_limits: RateLimits) { + self.rate_limits.merge(rate_limits); + } + + /// Returns the next attempt `Instant` if backoff is initiated, or None otherwise. + pub fn next_fetch_attempt(&self) -> Option { + self.next_fetch_attempt + } + + /// The last time the project state was updated + pub fn last_updated_at(&self) -> Instant { + self.last_updated_at + } + + /// Refresh the update time of the project in order to delay eviction. /// - /// This state is used for forwarding in Proxy mode. - pub fn new_allowed() -> Self { - Self::Enabled(Arc::new(ProjectInfo { - project_id: None, - last_change: None, - rev: Default::default(), - public_keys: Default::default(), - slug: None, - config: Default::default(), - organization_id: None, - })) + /// Called by the project cache when the project state is refreshed. + pub fn refresh_updated_timestamp(&mut self) { + self.last_updated_at = Instant::now(); } - /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). - pub fn sanitized(self) -> Self { - match self { - Self::Enabled(state) => Self::Enabled(Arc::new(state.as_ref().clone().sanitized())), - Self::Disabled => Self::Disabled, - Self::Pending => Self::Pending, + /// Collects internal project state and assembles a [`ProcessProjectMetrics`] message. + pub fn process_metrics(&mut self, message: ProcessMetrics) -> ProcessProjectMetrics { + let project_state = self.current_state(); + let rate_limits = self.rate_limits.current_limits().clone(); + + ProcessProjectMetrics { + project_state, + rate_limits, + + data: message.data, + project_key: message.project_key, + source: message.source, + received_at: message.received_at, + sent_at: message.sent_at, } } - /// Whether or not this state is pending. - pub fn is_pending(&self) -> bool { - matches!(self, Self::Pending) + /// Returns a list of buckets back to the aggregator. + /// + /// This is used to return flushed buckets back to the aggregator if the project has not been + /// loaded at the time of flush. + /// + /// Buckets at this stage are expected to be validated already. + pub fn return_buckets(&self, aggregator: &Addr, buckets: Vec) { + aggregator.send(MergeBuckets::new( + self.project_key, + buckets.into_iter().collect(), + )); + } + + pub fn add_metric_meta( + &mut self, + meta: MetricMeta, + envelope_processor: Addr, + ) { + // Only track metadata if custom metrics are enabled, or we don't know yet whether they are + // enabled. + let is_enabled = match self.current_state() { + ProjectState::Enabled(info) => info.has_feature(Feature::CustomMetrics), + ProjectState::Disabled => false, + ProjectState::Pending => true, + }; + + if !is_enabled { + relay_log::trace!("metric meta not enabled for project {}", self.project_key); + return; + } + + let Some(meta) = self.metric_meta_aggregator.add(self.project_key, meta) else { + // Nothing to do. Which means there is also no pending data. + relay_log::trace!("metric meta aggregator already has data, nothing to send upstream"); + return; + }; + + let scoping = self.scoping(); + match scoping { + Some(scoping) => { + // We can only have a scoping if we also have a state, which means at this point feature + // flags are already checked. + envelope_processor.send(EncodeMetricMeta { scoping, meta }) + } + None => self.has_pending_metric_meta = true, + } + } + + fn flush_metric_meta(&mut self, envelope_processor: &Addr) { + if !self.has_pending_metric_meta { + return; + } + let is_enabled = match self.current_state() { + ProjectState::Enabled(project_info) => project_info.has_feature(Feature::CustomMetrics), + ProjectState::Disabled => false, + ProjectState::Pending => { + // Cannot flush, wait for project state to be loaded. + return; + } + }; + + let Some(scoping) = self.scoping() else { + return; + }; + + // All relevant info has been gathered, consider us flushed. + self.has_pending_metric_meta = false; + + if !is_enabled { + relay_log::debug!( + "clearing metric meta aggregator, because project {} does not have feature flag enabled", + self.project_key, + ); + // Project/Org does not have the feature, forget everything. + self.metric_meta_aggregator.clear(self.project_key); + return; + } + + // Flush the entire aggregator containing all code locations for the project. + // + // There is the rare case when this flushes items which have already been flushed before. + // This happens only if we temporarily lose a previously valid and loaded project state + // and then reveive an update for the project state. + // While this is a possible occurence the effect should be relatively limited, + // especially since the final store does de-deuplication. + for meta in self + .metric_meta_aggregator + .get_all_relevant(self.project_key) + { + relay_log::debug!( + "flushing aggregated metric meta for project {}", + self.project_key + ); + metric!(counter(RelayCounters::ProjectStateFlushAllMetricMeta) += 1); + envelope_processor.send(EncodeMetricMeta { scoping, meta }); + } + } + + /// Returns `true` if backoff expired and new attempt can be triggered. + fn can_fetch(&self) -> bool { + self.next_fetch_attempt + .map(|next_attempt_at| next_attempt_at <= Instant::now()) + .unwrap_or(true) + } + + /// Triggers a debounced refresh of the project state. + /// + /// If the state is already being updated in the background, this method checks if the request + /// needs to be upgraded with the `no_cache` flag to ensure a more recent update. + fn fetch_state( + &mut self, + project_cache: Addr, + no_cache: bool, + ) -> &mut StateChannel { + // If there is a running request and we do not need to upgrade it to no_cache, or if the + // backoff is started and the new attempt is still somewhere in the future, skip + // scheduling a new fetch. + let should_fetch = !matches!(self.state_channel, Some(ref channel) if channel.no_cache || !no_cache) + && self.can_fetch(); + + let channel = self.state_channel.get_or_insert_with(StateChannel::new); + + if should_fetch { + channel.no_cache(no_cache); + let attempts = self.backoff.attempt() + 1; + relay_log::debug!( + "project {} state requested {attempts} times", + self.project_key + ); + project_cache.send(RequestUpdate { + project_key: self.project_key, + no_cache, + cached_state: self.state.clone(), + }); + } + + channel + } + + fn get_or_fetch_state( + &mut self, + project_cache: Addr, + mut no_cache: bool, + ) -> GetOrFetch<'_> { + // count number of times we are looking for the project state + metric!(counter(RelayCounters::ProjectStateGet) += 1); + + // Allow at most 1 no_cache request per second. Gracefully degrade to cached requests. + if no_cache { + if self.last_no_cache.elapsed() < Duration::from_secs(1) { + no_cache = false; + } else { + metric!(counter(RelayCounters::ProjectStateNoCache) += 1); + self.last_no_cache = Instant::now(); + } + } + + let cached_state = match self.state.expiry_state(&self.config) { + // Never use the cached state if `no_cache` is set. + _ if no_cache => None, + // There is no project state that can be used, fetch a state and return it. + ExpiryState::Expired => None, + // The project is semi-outdated, fetch new state but return old one. + ExpiryState::Stale(state) => Some(state.clone()), + // The project is not outdated, return early here to jump over fetching logic below. + ExpiryState::Updated(state) => return GetOrFetch::Cached(state.clone()), + }; + + let channel = self.fetch_state(project_cache, no_cache); + + match cached_state { + Some(state) => GetOrFetch::Cached(state), + None => GetOrFetch::Scheduled(channel), + } + } + + /// Returns the cached project state if it is valid. + /// + /// Depending on the state of the cache, this method takes different action: + /// + /// - If the cached state is up-to-date, this method simply returns `Some`. + /// - If the cached state is stale, this method triggers a refresh in the background and + /// returns `Some`. The stale period can be configured through + /// [`Config::project_grace_period`]. + /// - If there is no cached state or the cached state is fully outdated, this method triggers a + /// refresh in the background and returns `None`. + /// + /// If `no_cache` is set to true, this method always returns `None` and always triggers a + /// background refresh. + /// + /// To wait for a valid state instead, use [`get_state`](Self::get_state). + pub fn get_cached_state( + &mut self, + project_cache: Addr, + no_cache: bool, + ) -> ProjectState { + match self.get_or_fetch_state(project_cache, no_cache) { + GetOrFetch::Cached(state) => state, + GetOrFetch::Scheduled(_) => ProjectState::Pending, + } } - /// Utility function that returns the project config if enabled. - pub fn enabled(self) -> Option> { - match self { - Self::Enabled(info) => Some(info), - Self::Disabled | Self::Pending => None, + /// Obtains a valid project state and passes it to the sender once ready. + /// + /// This first checks if the state needs to be updated. This is the case if the project state + /// has passed its cache timeout. The `no_cache` flag forces an update. This does nothing if an + /// update is already running in the background. + /// + /// Independent of updating, _stale_ states are passed to the sender immediately as long as they + /// are in the [grace period](Config::project_grace_period). + pub fn get_state( + &mut self, + project_cache: Addr, + sender: ProjectSender, + no_cache: bool, + ) { + match self.get_or_fetch_state(project_cache, no_cache) { + GetOrFetch::Cached(state) => { + sender.send(state); + } + + GetOrFetch::Scheduled(channel) => { + channel.inner.attach(sender); + } } } - /// Returns the revision of the contained project info. + /// Ensures the project state gets updated. /// - /// `None` if the revision is missing or not available. - pub fn revision(&self) -> Revision { - match &self { - Self::Enabled(info) => info.rev.clone(), - Self::Disabled | Self::Pending => Revision::default(), + /// This first checks if the state needs to be updated. This is the case if the project state + /// has passed its cache timeout. The `no_cache` flag forces another update unless one is + /// already running in the background. + /// + /// If an update is required, the update will start in the background and complete at a later + /// point. Therefore, this method is useful to trigger an update early if it is already clear + /// that the project state will be needed soon. To retrieve an updated state, use + /// [`Project::get_state`] instead. + pub fn prefetch(&mut self, project_cache: Addr, no_cache: bool) -> &mut Self { + self.get_cached_state(project_cache, no_cache); + self + } + + /// Replaces the internal project state with a new one and triggers pending actions. + /// + /// Returns the *old* project state if it was replaced. + /// + /// This flushes pending envelopes from [`ValidateEnvelope`] and + /// notifies all pending receivers from [`get_state`](Self::get_state). + /// + /// `no_cache` should be passed from the requesting call. Updates with `no_cache` will always + /// take precedence. + /// + /// [`ValidateEnvelope`]: crate::services::projects::cache::ValidateEnvelope + pub fn update_state( + &mut self, + project_cache: &Addr, + state: ProjectFetchState, + envelope_processor: &Addr, + no_cache: bool, + ) -> Option { + // Initiate the backoff if the incoming state is invalid. Reset it otherwise. + if state.is_pending() { + self.next_fetch_attempt = Instant::now().checked_add(self.backoff.next_backoff()); + } else { + self.next_fetch_attempt = None; + self.backoff.reset(); + } + + let Some(channel) = self.state_channel.take() else { + relay_log::error!(tags.project_key = %self.project_key, "channel is missing for the state update"); + return None; + }; + + // If the channel has `no_cache` set but we are not a `no_cache` request, we have + // been superseeded. Put it back and let the other request take precedence. + if channel.no_cache && !no_cache { + self.state_channel = Some(channel); + return None; + } + + // If the state is pending, return back the taken channel and schedule state update. + if state.is_pending() { + // Only overwrite if the old state is expired: + let is_expired = matches!(self.state.expiry_state(&self.config), ExpiryState::Expired); + let old_state = match is_expired { + true => Some(std::mem::replace(&mut self.state, state)), + false => None, + }; + + self.state_channel = Some(channel); + let attempts = self.backoff.attempt() + 1; + relay_log::debug!( + "project {} state requested {attempts} times", + self.project_key + ); + + project_cache.send(RequestUpdate { + project_key: self.project_key, + no_cache, + cached_state: self.state.clone(), + }); + return old_state; } + + let old_state = std::mem::replace(&mut self.state, state); + + // Flush all waiting recipients. + relay_log::debug!("project state {} updated", self.project_key); + channel.inner.send(self.state.current_state(&self.config)); + + self.after_state_updated(envelope_processor); + + Some(old_state) + } + + /// Called after all state validations and after the project state is updated. + /// + /// See also: [`Self::update_state`]. + fn after_state_updated(&mut self, envelope_processor: &Addr) { + self.flush_metric_meta(envelope_processor); + // Check if the new sampling config got rid of any reservoir rules we have counters for. + self.remove_expired_reservoir_rules(); } /// Creates `Scoping` for this project if the state is loaded. /// /// Returns `Some` if the project state has been fetched and contains a project identifier, /// otherwise `None`. - pub fn scoping(&self, project_key: ProjectKey) -> Option { - match self { - Self::Enabled(info) => info.scoping(project_key), - _ => None, + pub fn scoping(&self) -> Option { + self.current_state().scoping(self.project_key) + } + + /// Runs the checks on incoming envelopes. + /// + /// See, [`crate::services::projects::cache::CheckEnvelope`] for more information + /// + /// * checks the rate limits + /// * validates the envelope meta in `check_request` - determines whether the given request + /// should be accepted or discarded + /// + /// IMPORTANT: If the [`ProjectState`] is invalid, the `check_request` will be skipped and only + /// rate limits will be validated. This function **must not** be called in the main processing + /// pipeline. + pub fn check_envelope( + &mut self, + mut envelope: ManagedEnvelope, + ) -> Result { + let state = match self.current_state() { + ProjectState::Enabled(state) => Some(state.clone()), + ProjectState::Disabled => { + // TODO(jjbayer): We should refactor this function to either return a Result or + // handle envelope rejections internally, but not both. + envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); + return Err(DiscardReason::ProjectId); + } + ProjectState::Pending => None, + }; + let mut scoping = envelope.scoping(); + + if let Some(ref state) = state { + scoping = state.scope_request(envelope.envelope().meta()); + envelope.scope(scoping); + + if let Err(reason) = state.check_envelope(envelope.envelope(), &self.config) { + envelope.reject(Outcome::Invalid(reason)); + return Err(reason); + } } + + let current_limits = self.rate_limits.current_limits(); + + let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); + let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { + Ok(current_limits.check_with_quotas(quotas, item_scoping)) + }); + + let (mut enforcement, mut rate_limits) = + envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; + + let check_nested_spans = state + .as_ref() + .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); + + // If we can extract spans from the event, we want to try and count the number of nested + // spans to correctly emit negative outcomes in case the transaction itself is dropped. + if check_nested_spans { + sync_spans_to_enforcement(&envelope, &mut enforcement); + } + + enforcement.apply_with_outcomes(&mut envelope); + + envelope.update(); + + // Special case: Expose active rate limits for all metric namespaces if there is at least + // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary + // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. + if envelope.envelope().items().any(|i| i.ty().is_metrics()) { + let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); + metrics_scoping.namespace = MetricNamespaceScoping::Any; + rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); + } + + let envelope = if envelope.envelope().is_empty() { + // Individual rate limits have already been issued above + envelope.reject(Outcome::RateLimited(None)); + None + } else { + Some(envelope) + }; + + Ok(CheckedEnvelope { + envelope, + rate_limits, + }) } } -impl From for ProjectState { - fn from(value: ParsedProjectState) -> Self { - let ParsedProjectState { disabled, info } = value; - match disabled { - true => Self::Disabled, - false => Self::Enabled(Arc::new(info)), - } +/// Adds category limits for the nested spans inside a transaction. +/// +/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted +/// as top-level spans, thus if we limited a transaction, we want to count and emit negative +/// outcomes for each of the spans nested inside that transaction. +fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { + if !enforcement.is_event_active() { + return; + } + + let spans_count = count_nested_spans(envelope); + if spans_count == 0 { + return; + } + + if enforcement.event.is_active() { + enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); + } + + if enforcement.event_indexed.is_active() { + enforcement.spans_indexed = enforcement + .event_indexed + .clone_for(DataCategory::SpanIndexed, spans_count); } } -/// Project state as used in serialization / deserialization. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedProjectState { - /// Whether the project state is disabled. - #[serde(default)] - pub disabled: bool, - /// Project info. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(flatten)] - pub info: ProjectInfo, +/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). +fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { + #[derive(Debug, Deserialize)] + struct PartialEvent { + spans: SeqCount, + } + + envelope + .envelope() + .items() + .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) + .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) + // We do + 1, since we count the transaction itself because it will be extracted + // as a span and counted during the slow path of rate limiting. + .map_or(0, |event| event.spans.0 + 1) } -/// Limited project state for external Relays. -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] -pub struct LimitedParsedProjectState { - /// Whether the project state is disabled. - pub disabled: bool, - /// Limited project info for external Relays. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(with = "LimitedProjectInfo")] - #[serde(flatten)] - pub info: ProjectInfo, +#[cfg(test)] +mod tests { + use crate::envelope::{ContentType, Envelope, Item}; + use crate::extractors::RequestMeta; + use crate::services::processor::ProcessingGroup; + use relay_base_schema::project::ProjectId; + use relay_event_schema::protocol::EventId; + use relay_test::mock_service; + use serde_json::json; + use smallvec::SmallVec; + + use super::*; + + #[test] + fn get_state_expired() { + for expiry in [9999, 0] { + let config = Arc::new( + Config::from_json_value(json!( + { + "cache": { + "project_expiry": expiry, + "project_grace_period": 0, + "eviction_interval": 9999 // do not evict + } + } + )) + .unwrap(), + ); + + // Initialize project with a state + let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let project_info = ProjectInfo { + project_id: Some(ProjectId::new(123)), + ..Default::default() + }; + let mut project = Project::new(project_key, config.clone()); + project.state = ProjectFetchState::enabled(project_info); + + if expiry > 0 { + // With long expiry, should get a state + assert!(matches!(project.current_state(), ProjectState::Enabled(_))); + } else { + // With 0 expiry, project should expire immediately. No state can be set. + assert!(matches!(project.current_state(), ProjectState::Pending)); + } + } + } + + #[tokio::test] + async fn test_stale_cache() { + let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); + let (envelope_processor, _) = mock_service("envelope_processor", (), |&mut (), _| {}); + + let config = Arc::new( + Config::from_json_value(json!( + { + "cache": { + "project_expiry": 100, + "project_grace_period": 0, + "eviction_interval": 9999 // do not evict + } + } + )) + .unwrap(), + ); + + let channel = StateChannel::new(); + + // Initialize project with a state. + let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); + let mut project = Project::new(project_key, config); + project.state_channel = Some(channel); + project.state = ProjectFetchState::allowed(); + + assert!(project.next_fetch_attempt.is_none()); + // Try to update project with errored project state. + project.update_state( + &addr, + ProjectFetchState::pending(), + &envelope_processor, + false, + ); + // Since we got invalid project state we still keep the old one meaning there + // still must be the project id set. + assert!(matches!(project.current_state(), ProjectState::Enabled(_))); + assert!(project.next_fetch_attempt.is_some()); + + // This tests that we actually initiate the backoff and the backoff mechanism works: + // * first call to `update_state` with invalid ProjectState starts the backoff, but since + // it's the first attemt, we get Duration of 0. + // * second call to `update_state` here will bumpt the `next_backoff` Duration to somehing + // like ~ 1s + // * and now, by calling `fetch_state` we test that it's a noop, since if backoff is active + // we should never fetch + // * without backoff it would just panic, not able to call the ProjectCache service + let channel = StateChannel::new(); + project.state_channel = Some(channel); + project.update_state( + &addr, + ProjectFetchState::pending(), + &envelope_processor, + false, + ); + project.fetch_state(addr, false); + } + + fn create_project(config: Option) -> Project { + let project_key = ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(); + let mut project = Project::new(project_key, Arc::new(Config::default())); + let mut project_info = ProjectInfo { + project_id: Some(ProjectId::new(42)), + ..Default::default() + }; + let mut public_keys = SmallVec::new(); + public_keys.push(PublicKeyConfig { + public_key: project_key, + numeric_id: None, + }); + project_info.public_keys = public_keys; + if let Some(config) = config { + project_info.config = serde_json::from_value(config).unwrap(); + } + project.state = ProjectFetchState::enabled(project_info); + project + } + + fn request_meta() -> RequestMeta { + let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" + .parse() + .unwrap(); + + RequestMeta::new(dsn) + } + + #[test] + fn test_track_nested_spans_outcomes() { + let mut project = create_project(Some(json!({ + "features": [ + "organizations:indexed-spans-extraction" + ], + "quotas": [{ + "id": "foo", + "categories": ["transaction"], + "window": 3600, + "limit": 0, + "reasonCode": "foo", + }] + }))); + + let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); + + let mut transaction = Item::new(ItemType::Transaction); + transaction.set_payload( + ContentType::Json, + r#"{ + "event_id": "52df9022835246eeb317dbd739ccd059", + "type": "transaction", + "transaction": "I have a stale timestamp, but I'm recent!", + "start_timestamp": 1, + "timestamp": 2, + "contexts": { + "trace": { + "trace_id": "ff62a8b040f340bda5d830223def1d81", + "span_id": "bd429c44b67a3eb4" + } + }, + "spans": [ + { + "span_id": "bd429c44b67a3eb4", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + }, + { + "span_id": "bd429c44b67a3eb5", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + } + ] +}"#, + ); + + envelope.add_item(transaction); + + let (outcome_aggregator, mut outcome_aggregator_rx) = Addr::custom(); + let (test_store, _) = Addr::custom(); + + let managed_envelope = ManagedEnvelope::new( + envelope, + outcome_aggregator.clone(), + test_store, + ProcessingGroup::Transaction, + ); + + let _ = project.check_envelope(managed_envelope); + drop(outcome_aggregator); + + let expected = [ + (DataCategory::Transaction, 1), + (DataCategory::TransactionIndexed, 1), + (DataCategory::Span, 3), + (DataCategory::SpanIndexed, 3), + ]; + + for (expected_category, expected_quantity) in expected { + let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); + assert_eq!(outcome.category, expected_category); + assert_eq!(outcome.quantity, expected_quantity); + } + } } diff --git a/relay-server/src/services/projects/project/state/fetch_state.rs b/relay-server/src/services/projects/project/state/fetch_state.rs new file mode 100644 index 0000000000..020150f303 --- /dev/null +++ b/relay-server/src/services/projects/project/state/fetch_state.rs @@ -0,0 +1,162 @@ +use std::sync::Arc; + +use tokio::time::Instant; + +use relay_config::Config; +use relay_dynamic_config::ProjectConfig; + +use crate::services::projects::project::state::info::ProjectInfo; +use crate::services::projects::project::ProjectState; + +/// Hides a cached project state and only exposes it if it has not expired. +#[derive(Clone, Debug)] +pub struct ProjectFetchState { + /// The time at which this project state was last updated. + last_fetch: Option, + state: ProjectState, +} + +impl ProjectFetchState { + /// Takes a [`ProjectState`] and sets it's last fetch to the current time. + pub fn new(state: ProjectState) -> Self { + Self { + last_fetch: Some(Instant::now()), + state, + } + } + + /// Refreshes the expiry of the fetch state. + pub fn refresh(old: ProjectFetchState) -> Self { + Self { + last_fetch: Some(Instant::now()), + state: old.state, + } + } + + /// Project state for an unknown but allowed project. + /// + /// This state is used for forwarding in Proxy mode. + pub fn allowed() -> Self { + Self::enabled(ProjectInfo { + project_id: None, + last_change: None, + rev: None, + public_keys: Default::default(), + slug: None, + config: ProjectConfig::default(), + organization_id: None, + }) + } + + /// An enabled project state created from a project info. + pub fn enabled(project_info: ProjectInfo) -> Self { + Self::new(ProjectState::Enabled(Arc::new(project_info))) + } + + // Returns a disabled state. + pub fn disabled() -> Self { + Self::new(ProjectState::Disabled) + } + + /// Returns a pending or invalid state. + pub fn pending() -> Self { + Self::new(ProjectState::Pending) + } + + /// Create a config that immediately counts as expired. + /// + /// This is what [`Project`](crate::services::projects::project::Project) initializes itself with. + pub fn expired() -> Self { + Self { + // Make sure the state immediately qualifies as expired: + last_fetch: None, + state: ProjectState::Pending, + } + } + + /// Sanitizes the contained project state. See [`ProjectState::sanitized`]. + pub fn sanitized(self) -> Self { + let Self { last_fetch, state } = self; + Self { + last_fetch, + state: state.sanitized(), + } + } + + /// Returns `true` if the contained state is pending. + pub fn is_pending(&self) -> bool { + matches!(self.state, ProjectState::Pending) + } + + /// Returns information about the expiry of a project state. + /// + /// If no detailed information is needed, use [`Self::current_state`] instead. + pub fn expiry_state(&self, config: &Config) -> ExpiryState { + match self.check_expiry(config) { + Expiry::Updated => ExpiryState::Updated(&self.state), + Expiry::Stale => ExpiryState::Stale(&self.state), + Expiry::Expired => ExpiryState::Expired, + } + } + + /// Returns the current project state, if it has not yet expired. + pub fn current_state(&self, config: &Config) -> ProjectState { + match self.expiry_state(config) { + ExpiryState::Updated(state) | ExpiryState::Stale(state) => state.clone(), + ExpiryState::Expired => ProjectState::Pending, + } + } + + /// Returns whether this state is outdated and needs to be refetched. + fn check_expiry(&self, config: &Config) -> Expiry { + let Some(last_fetch) = self.last_fetch else { + return Expiry::Expired; + }; + let expiry = match &self.state { + ProjectState::Enabled(info) if info.project_id.is_some() => { + config.project_cache_expiry() + } + _ => config.cache_miss_expiry(), + }; + + let elapsed = last_fetch.elapsed(); + if elapsed >= expiry + config.project_grace_period() { + Expiry::Expired + } else if elapsed >= expiry { + Expiry::Stale + } else { + Expiry::Updated + } + } + + /// Returns the revision of the contained project state. + /// + /// See: [`ProjectState::revision`]. + pub fn revision(&self) -> Option<&str> { + self.state.revision() + } +} + +/// Wrapper for a project state, with expiry information. +#[derive(Clone, Copy, Debug)] +pub enum ExpiryState<'a> { + /// An up-to-date project state. See [`Expiry::Updated`]. + Updated(&'a ProjectState), + /// A stale project state that can still be used. See [`Expiry::Stale`]. + Stale(&'a ProjectState), + /// An expired project state that should not be used. See [`Expiry::Expired`]. + Expired, +} + +/// The expiry status of a project state. Return value of [`ProjectFetchState::check_expiry`]. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +enum Expiry { + /// The project state is perfectly up to date. + Updated, + /// The project state is outdated but events depending on this project state can still be + /// processed. The state should be refreshed in the background though. + Stale, + /// The project state is completely outdated and events need to be buffered up until the new + /// state has been fetched. + Expired, +} diff --git a/relay-server/src/services/projects/project/info.rs b/relay-server/src/services/projects/project/state/info.rs similarity index 86% rename from relay-server/src/services/projects/project/info.rs rename to relay-server/src/services/projects/project/state/info.rs index d6fb7cfaab..eab9b8fb62 100644 --- a/relay-server/src/services/projects/project/info.rs +++ b/relay-server/src/services/projects/project/state/info.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use chrono::{DateTime, Utc}; use relay_base_schema::project::{ProjectId, ProjectKey}; @@ -18,6 +16,7 @@ use url::Url; use crate::envelope::Envelope; use crate::extractors::RequestMeta; use crate::services::outcome::DiscardReason; +use crate::services::projects::project::PublicKeyConfig; /// Information about an enabled project. /// @@ -33,7 +32,7 @@ pub struct ProjectInfo { /// are faked locally. pub last_change: Option>, /// The revision id of the project config. - pub rev: Revision, + pub rev: Option, /// Indicates that the project is disabled. /// A container of known public keys in the project. /// @@ -58,7 +57,7 @@ pub struct ProjectInfo { pub struct LimitedProjectInfo { pub project_id: Option, pub last_change: Option>, - pub rev: Revision, + pub rev: Option, pub public_keys: SmallVec<[PublicKeyConfig; 1]>, pub slug: Option, #[serde(with = "LimitedProjectConfig")] @@ -188,6 +187,9 @@ impl ProjectInfo { /// This scoping amends `RequestMeta::get_partial_scoping` by adding organization and key info. /// The processor must fetch the full scoping before attempting to rate limit with partial /// scoping. + /// + /// To get the own scoping of this ProjectKey without amending request information, use + /// [`Project::scoping`](crate::services::projects::project::Project::scoping) instead. pub fn scope_request(&self, meta: &RequestMeta) -> Scoping { let mut scoping = meta.get_partial_scoping(); @@ -246,51 +248,3 @@ impl ProjectInfo { self.config.features.has(feature) } } - -/// Represents a public key received from the projectconfig endpoint. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct PublicKeyConfig { - /// Public part of key (random hash). - pub public_key: ProjectKey, - - /// The primary key of the DSN in Sentry's main database. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub numeric_id: Option, -} - -/// Represents a project info revision. -/// -/// A revision can be missing, a missing revision never compares equal -/// to any other revision. -/// -/// Revisions are internally reference counted and cheap to create. -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -#[serde(transparent)] -pub struct Revision(Option>); - -impl Revision { - /// Returns the revision as a string reference. - /// - /// `None` is a revision that does not match any other revision, - /// not even another revision which is represented as `None`. - pub fn as_str(&self) -> Option<&str> { - self.0.as_deref() - } -} - -impl PartialEq for Revision { - fn eq(&self, other: &Self) -> bool { - match (&self.0, &other.0) { - (None, _) => false, - (_, None) => false, - (Some(left), Some(right)) => left == right, - } - } -} - -impl From<&str> for Revision { - fn from(value: &str) -> Self { - Self(Some(value.into())) - } -} diff --git a/relay-server/src/services/projects/project/state/mod.rs b/relay-server/src/services/projects/project/state/mod.rs new file mode 100644 index 0000000000..868ad5ed4a --- /dev/null +++ b/relay-server/src/services/projects/project/state/mod.rs @@ -0,0 +1,126 @@ +//! Types that represent the current project state. +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + +use relay_base_schema::project::ProjectKey; +use relay_quotas::Scoping; + +mod fetch_state; +mod info; + +pub use self::fetch_state::{ExpiryState, ProjectFetchState}; +pub use self::info::{LimitedProjectInfo, ProjectInfo}; + +/// Representation of a project's current state. +#[derive(Clone, Debug)] +pub enum ProjectState { + /// A valid project that is not disabled. + Enabled(Arc), + /// A project that was marked as "gone" by the upstream. This variant does not expose + /// any other project information. + Disabled, + /// A project to which one of the following conditions apply: + /// - The project has not yet been fetched. + /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). + /// - The upstream returned an unparsable project so we have to try again. + /// - The project has expired and must be treated as "has not been fetched". + Pending, +} + +impl ProjectState { + /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). + pub fn sanitized(self) -> Self { + match self { + ProjectState::Enabled(state) => { + ProjectState::Enabled(Arc::new(state.as_ref().clone().sanitized())) + } + ProjectState::Disabled => ProjectState::Disabled, + ProjectState::Pending => ProjectState::Pending, + } + } + + /// Whether or not this state is pending. + pub fn is_pending(&self) -> bool { + matches!(self, ProjectState::Pending) + } + + /// Utility function that returns the project config if enabled. + pub fn enabled(self) -> Option> { + match self { + ProjectState::Enabled(info) => Some(info), + ProjectState::Disabled | ProjectState::Pending => None, + } + } + + /// Returns the revision of the contained project info. + /// + /// `None` if the revision is missing or not available. + pub fn revision(&self) -> Option<&str> { + match &self { + ProjectState::Enabled(info) => info.rev.as_deref(), + ProjectState::Disabled => None, + ProjectState::Pending => None, + } + } + + /// Creates `Scoping` for this project if the state is loaded. + /// + /// Returns `Some` if the project state has been fetched and contains a project identifier, + /// otherwise `None`. + pub fn scoping(&self, project_key: ProjectKey) -> Option { + match self { + Self::Enabled(info) => info.scoping(project_key), + _ => None, + } + } +} + +impl From for ProjectState { + fn from(value: ParsedProjectState) -> Self { + let ParsedProjectState { disabled, info } = value; + match disabled { + true => ProjectState::Disabled, + false => ProjectState::Enabled(Arc::new(info)), + } + } +} + +/// Project state as used in serialization / deserialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParsedProjectState { + /// Whether the project state is disabled. + #[serde(default)] + pub disabled: bool, + /// Project info. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(flatten)] + pub info: ProjectInfo, +} + +/// Limited project state for external Relays. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] +pub struct LimitedParsedProjectState { + /// Whether the project state is disabled. + pub disabled: bool, + /// Limited project info for external Relays. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(with = "LimitedProjectInfo")] + #[serde(flatten)] + pub info: ProjectInfo, +} + +/// Response indicating whether a project state needs to be updated +/// or the upstream does not have a newer version. +#[derive(Debug, Clone)] +pub enum UpstreamProjectState { + /// The upstream sent a [`ProjectState`]. + New(ProjectState), + /// The upstream indicated that there is no newer version of the state available. + NotModified, +} diff --git a/relay-server/src/services/projects/source/mod.rs b/relay-server/src/services/projects/source/mod.rs index 1311acfc40..5b208ef3ae 100644 --- a/relay-server/src/services/projects/source/mod.rs +++ b/relay-server/src/services/projects/source/mod.rs @@ -15,7 +15,8 @@ pub mod local; pub mod redis; pub mod upstream; -use crate::services::projects::project::{ProjectState, Revision}; +use crate::services::projects::project::state::UpstreamProjectState; +use crate::services::projects::project::ProjectFetchState; use crate::services::upstream::UpstreamRelay; use self::local::{LocalProjectSource, LocalProjectSourceService}; @@ -90,55 +91,53 @@ impl ProjectSource { } } - /// Fetches a project with `project_key` from the configured sources. - /// - /// Returns a fully sanitized project. pub async fn fetch( self, project_key: ProjectKey, no_cache: bool, - current_revision: Revision, - ) -> Result { + cached_state: ProjectFetchState, + ) -> Result { let state_opt = self .local_source .send(FetchOptionalProjectState { project_key }) .await?; if let Some(state) = state_opt { - return Ok(state.into()); + return Ok(ProjectFetchState::new(state)); } match self.config.relay_mode() { - RelayMode::Proxy => return Ok(ProjectState::new_allowed().into()), - RelayMode::Static => return Ok(ProjectState::Disabled.into()), - RelayMode::Capture => return Ok(ProjectState::new_allowed().into()), + RelayMode::Proxy => return Ok(ProjectFetchState::allowed()), + RelayMode::Static => return Ok(ProjectFetchState::disabled()), + RelayMode::Capture => return Ok(ProjectFetchState::allowed()), RelayMode::Managed => (), // Proceed with loading the config from redis or upstream } + let current_revision = cached_state.revision().map(String::from); #[cfg(feature = "processing")] if let Some(redis_source) = self.redis_source { let current_revision = current_revision.clone(); let redis_permit = self.redis_semaphore.acquire().await?; let state_fetch_result = tokio::task::spawn_blocking(move || { - redis_source.get_config_if_changed(project_key, current_revision) + redis_source.get_config_if_changed(project_key, current_revision.as_deref()) }) .await?; drop(redis_permit); match state_fetch_result { // New state fetched from Redis, possibly pending. - // - // If it is pending, we must fallback to fetching from the upstream. - Ok(SourceProjectState::New(state)) => { + Ok(UpstreamProjectState::New(state)) => { let state = state.sanitized(); if !state.is_pending() { - return Ok(state.into()); + return Ok(ProjectFetchState::new(state)); } } // Redis reported that we're holding an up-to-date version of the state already, // refresh the state and return the old cached state again. - Ok(SourceProjectState::NotModified) => return Ok(SourceProjectState::NotModified), + Ok(UpstreamProjectState::NotModified) => { + return Ok(ProjectFetchState::refresh(cached_state)) + } Err(error) => { relay_log::error!( error = &error as &dyn std::error::Error, @@ -157,10 +156,10 @@ impl ProjectSource { }) .await?; - Ok(match state { - SourceProjectState::New(state) => SourceProjectState::New(state.sanitized()), - SourceProjectState::NotModified => SourceProjectState::NotModified, - }) + match state { + UpstreamProjectState::New(state) => Ok(ProjectFetchState::new(state.sanitized())), + UpstreamProjectState::NotModified => Ok(ProjectFetchState::refresh(cached_state)), + } } } @@ -190,7 +189,10 @@ pub struct FetchProjectState { /// The upstream is allowed to omit full project configs /// for requests for which the requester already has the most /// recent revision. - pub current_revision: Revision, + /// + /// Settings this to `None` will essentially always re-fetch + /// the project config. + pub current_revision: Option, /// If true, all caches should be skipped and a fresh state should be computed. pub no_cache: bool, @@ -206,19 +208,3 @@ impl FetchOptionalProjectState { self.project_key } } - -/// Response indicating whether a project state needs to be updated -/// or the upstream does not have a newer version. -#[derive(Debug, Clone)] -pub enum SourceProjectState { - /// The upstream sent a [`ProjectState`]. - New(ProjectState), - /// The upstream indicated that there is no newer version of the state available. - NotModified, -} - -impl From for SourceProjectState { - fn from(value: ProjectState) -> Self { - Self::New(value) - } -} diff --git a/relay-server/src/services/projects/source/redis.rs b/relay-server/src/services/projects/source/redis.rs index 7109c408cd..e2a520f4e7 100644 --- a/relay-server/src/services/projects/source/redis.rs +++ b/relay-server/src/services/projects/source/redis.rs @@ -5,8 +5,8 @@ use relay_config::Config; use relay_redis::{RedisError, RedisPool}; use relay_statsd::metric; -use crate::services::projects::project::{ParsedProjectState, ProjectState, Revision}; -use crate::services::projects::source::SourceProjectState; +use crate::services::projects::project::state::UpstreamProjectState; +use crate::services::projects::project::{ParsedProjectState, ProjectState}; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; #[derive(Debug, Clone)] @@ -60,13 +60,13 @@ impl RedisProjectSource { pub fn get_config_if_changed( &self, key: ProjectKey, - revision: Revision, - ) -> Result { + revision: Option<&str>, + ) -> Result { let mut client = self.redis.client()?; let mut connection = client.connection()?; // Only check for the revision if we were passed a revision. - if let Some(revision) = revision.as_str() { + if let Some(revision) = revision { let current_revision: Option = relay_redis::redis::cmd("GET") .arg(self.get_redis_rev_key(key)) .query(&mut connection) @@ -80,7 +80,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "revision", ); - return Ok(SourceProjectState::NotModified); + return Ok(UpstreamProjectState::NotModified); } } @@ -94,7 +94,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "false" ); - return Ok(SourceProjectState::New(ProjectState::Pending)); + return Ok(UpstreamProjectState::New(ProjectState::Pending)); }; let response = ProjectState::from(parse_redis_response(response.as_slice())?); @@ -106,18 +106,18 @@ impl RedisProjectSource { // // While this is theoretically possible this should always been handled using the above revision // check using the additional Redis key. - if response.revision() == revision { + if revision.is_some() && response.revision() == revision { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config_revision" ); - Ok(SourceProjectState::NotModified) + Ok(UpstreamProjectState::NotModified) } else { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config" ); - Ok(SourceProjectState::New(response)) + Ok(UpstreamProjectState::New(response)) } } diff --git a/relay-server/src/services/projects/source/upstream.rs b/relay-server/src/services/projects/source/upstream.rs index 6f051c5ec4..bcbfd5a279 100644 --- a/relay-server/src/services/projects/source/upstream.rs +++ b/relay-server/src/services/projects/source/upstream.rs @@ -18,9 +18,10 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::Instant; -use crate::services::projects::project::Revision; -use crate::services::projects::project::{ParsedProjectState, ProjectState}; -use crate::services::projects::source::{FetchProjectState, SourceProjectState}; +use crate::services::projects::project::state::UpstreamProjectState; +use crate::services::projects::project::ParsedProjectState; +use crate::services::projects::project::ProjectState; +use crate::services::projects::source::FetchProjectState; use crate::services::upstream::{ Method, RequestPriority, SendQuery, UpstreamQuery, UpstreamRelay, UpstreamRequestError, }; @@ -40,7 +41,7 @@ pub struct GetProjectStates { /// /// The revisions are mapped by index to the project key, /// this is a separate field to keep the API compatible. - revisions: Vec, + revisions: Vec>, /// If `true` the upstream should return a full configuration. /// /// Upstreams will ignore this for non-internal Relays. @@ -98,10 +99,10 @@ impl UpstreamQuery for GetProjectStates { #[derive(Debug)] struct ProjectStateChannel { // Main broadcast channel. - channel: BroadcastChannel, + channel: BroadcastChannel, // Additional broadcast channels tracked from merge operations. - merged: Vec>, - revision: Revision, + merged: Vec>, + revision: Option, deadline: Instant, no_cache: bool, attempts: u64, @@ -113,8 +114,8 @@ struct ProjectStateChannel { impl ProjectStateChannel { pub fn new( - sender: BroadcastSender, - revision: Revision, + sender: BroadcastSender, + revision: Option, timeout: Duration, no_cache: bool, ) -> Self { @@ -141,14 +142,18 @@ impl ProjectStateChannel { /// If the new revision is different from the contained revision this clears the revision. /// To not have multiple fetches per revision per batch, we need to find a common denominator /// for requests with different revisions, which is always to fetch the full project config. - pub fn attach(&mut self, sender: BroadcastSender, revision: Revision) { + pub fn attach( + &mut self, + sender: BroadcastSender, + revision: Option, + ) { self.channel.attach(sender); if self.revision != revision { - self.revision = Revision::default(); + self.revision = None; } } - pub fn send(self, state: SourceProjectState) { + pub fn send(self, state: UpstreamProjectState) { for channel in self.merged { channel.send(state.clone()); } @@ -174,7 +179,7 @@ impl ProjectStateChannel { self.merged.push(channel); self.merged.extend(merged); if self.revision != revision { - self.revision = Revision::default(); + self.revision = None; } self.deadline = self.deadline.max(deadline); self.no_cache |= no_cache; @@ -193,16 +198,16 @@ type ProjectStateChannels = HashMap; /// Internally it maintains the buffer queue of the incoming requests, which got scheduled to fetch the /// state and takes care of the backoff in case there is a problem with the requests. #[derive(Debug)] -pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); +pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); impl Interface for UpstreamProjectSource {} impl FromMessage for UpstreamProjectSource { - type Response = BroadcastResponse; + type Response = BroadcastResponse; fn from_message( message: FetchProjectState, - sender: BroadcastSender, + sender: BroadcastSender, ) -> Self { Self(message, sender) } @@ -462,7 +467,7 @@ impl UpstreamProjectSourceService { let mut result = "ok"; let state = if response.unchanged.contains(&key) { result = "ok_unchanged"; - SourceProjectState::NotModified + UpstreamProjectState::NotModified } else { let state = response .configs @@ -480,7 +485,7 @@ impl UpstreamProjectSourceService { ErrorBoundary::Ok(Some(state)) => state.into(), }; - SourceProjectState::New(state) + UpstreamProjectState::New(state) }; metric!( @@ -692,7 +697,7 @@ mod tests { let mut response1 = service.send(FetchProjectState { project_key, - current_revision: "123".into(), + current_revision: Some("123".to_owned()), no_cache: false, }); @@ -703,7 +708,7 @@ mod tests { // request, after responding to the first inflight request. let mut response2 = service.send(FetchProjectState { project_key, - current_revision: Revision::default(), + current_revision: None, no_cache: false, }); @@ -727,8 +732,8 @@ mod tests { .await; let (response1, response2) = futures::future::join(response1, response2).await; - assert!(matches!(response1, Ok(SourceProjectState::NotModified))); - assert!(matches!(response2, Ok(SourceProjectState::NotModified))); + assert!(matches!(response1, Ok(UpstreamProjectState::NotModified))); + assert!(matches!(response2, Ok(UpstreamProjectState::NotModified))); // No more messages to upstream expected. assert!(upstream_rx.try_recv().is_err()); diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index 6201ff9b7a..8aa90b7b2c 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -54,7 +54,7 @@ use tokio::sync::mpsc; use crate::envelope::{Envelope, EnvelopeError}; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::legacy::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; +use crate::services::projects::cache::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{ManagedEnvelope, MemoryChecker}; @@ -156,6 +156,7 @@ impl QueueKey { /// It's sent in response to [`DequeueMany`] message from the [`ProjectCache`]. #[derive(Debug)] pub struct UnspooledEnvelope { + pub key: QueueKey, pub managed_envelope: ManagedEnvelope, } @@ -374,6 +375,7 @@ impl InMemory { self.envelope_count = self.envelope_count.saturating_sub(1); sender .send(UnspooledEnvelope { + key, managed_envelope: envelope, }) .ok(); @@ -583,9 +585,14 @@ impl OnDisk { }; match self.extract_envelope(envelope, services) { - Ok((_, managed_envelopes)) => { + Ok((key, managed_envelopes)) => { for managed_envelope in managed_envelopes { - sender.send(UnspooledEnvelope { managed_envelope }).ok(); + sender + .send(UnspooledEnvelope { + key, + managed_envelope, + }) + .ok(); } } Err(err) => relay_log::error!( @@ -1429,7 +1436,10 @@ mod tests { sender: tx.clone(), }); - let UnspooledEnvelope { managed_envelope } = rx.recv().await.unwrap(); + let UnspooledEnvelope { + key: _, + managed_envelope, + } = rx.recv().await.unwrap(); let received_at_received = managed_envelope.received_at(); // Check if the original start time elapsed to the same second as the restored one. diff --git a/relay-server/src/services/stats.rs b/relay-server/src/services/stats.rs index 8fe4747454..a7b5adc66e 100644 --- a/relay-server/src/services/stats.rs +++ b/relay-server/src/services/stats.rs @@ -122,11 +122,13 @@ impl RelayStats { project_configs, cardinality, quotas, + misc, }) = self.redis_pools.as_ref() { Self::redis_pool(project_configs, "project_configs"); Self::redis_pool(cardinality, "cardinality"); Self::redis_pool(quotas, "quotas"); + Self::redis_pool(misc, "misc"); } } } diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 3e5ed3ec48..12274595fb 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -7,6 +7,8 @@ pub enum RelayGauges { /// The state of Relay with respect to the upstream connection. /// Possible values are `0` for normal operations and `1` for a network outage. NetworkOutage, + /// The number of items currently in the garbage disposal queue. + ProjectCacheGarbageQueueSize, /// The number of envelopes waiting for project states in memory. /// /// This number is always <= `EnvelopeQueueSize`. @@ -44,14 +46,13 @@ pub enum RelayGauges { /// The number of idle connections in the Redis Pool. #[cfg(feature = "processing")] RedisPoolIdleConnections, - /// The number of notifications in the broadcast channel of the project cache. - ProjectCacheNotificationChannel, } impl GaugeMetric for RelayGauges { fn name(&self) -> &'static str { match self { RelayGauges::NetworkOutage => "upstream.network_outage", + RelayGauges::ProjectCacheGarbageQueueSize => "project_cache.garbage.queue_size", RelayGauges::BufferEnvelopesMemoryCount => "buffer.envelopes_mem_count", RelayGauges::BufferEnvelopesDiskCount => "buffer.envelopes_disk_count", RelayGauges::BufferPeriodicUnspool => "buffer.unspool.periodic", @@ -63,9 +64,6 @@ impl GaugeMetric for RelayGauges { RelayGauges::RedisPoolConnections => "redis.pool.connections", #[cfg(feature = "processing")] RelayGauges::RedisPoolIdleConnections => "redis.pool.idle_connections", - RelayGauges::ProjectCacheNotificationChannel => { - "project_cache.notification_channel.size" - } } } } @@ -397,6 +395,8 @@ pub enum RelayTimers { /// Total time in milliseconds an envelope spends in Relay from the time it is received until it /// finishes processing and has been submitted to the upstream. EnvelopeTotalTime, + /// Total time in milliseconds spent evicting outdated and unused projects happens. + ProjectStateEvictionDuration, /// Total time in milliseconds spent fetching queued project configuration updates requests to /// resolve. /// @@ -487,30 +487,25 @@ pub enum RelayTimers { /// /// - `message`: The type of message that was processed. ProcessMessageDuration, - /// Timing in milliseconds for processing a task in the project cache service. + /// Timing in milliseconds for handling a project cache message. /// /// This metric is tagged with: - /// - `task`: The type of the task the project cache does. - ProjectCacheTaskDuration, - /// Timing in milliseconds for handling a legacy project cache message. + /// - `message`: The type of message that was processed. + ProjectCacheMessageDuration, + /// Timing in milliseconds for processing a message in the buffer service. /// /// This metric is tagged with: + /// /// - `message`: The type of message that was processed. - LegacyProjectCacheMessageDuration, - /// Timing in milliseconds for processing a task in the legacy project cache service. + BufferMessageProcessDuration, + /// Timing in milliseconds for processing a task in the project cache service. /// /// A task is a unit of work the service does. Each branch of the /// `tokio::select` is a different task type. /// /// This metric is tagged with: - /// - `task`: The type of the task the project cache does. - LegacyProjectCacheTaskDuration, - /// Timing in milliseconds for processing a message in the buffer service. - /// - /// This metric is tagged with: - /// - /// - `message`: The type of message that was processed. - BufferMessageProcessDuration, + /// - `task`: The type of the task the processor does. + ProjectCacheTaskDuration, /// Timing in milliseconds for handling and responding to a health check request. /// /// This metric is tagged with: @@ -577,6 +572,7 @@ impl TimerMetric for RelayTimers { RelayTimers::EnvelopeWaitTime => "event.wait_time", RelayTimers::EnvelopeProcessingTime => "event.processing_time", RelayTimers::EnvelopeTotalTime => "event.total_time", + RelayTimers::ProjectStateEvictionDuration => "project_state.eviction.duration", RelayTimers::ProjectStateRequestDuration => "project_state.request.duration", #[cfg(feature = "processing")] RelayTimers::ProjectStateDecompression => "project_state.decompression", @@ -589,12 +585,9 @@ impl TimerMetric for RelayTimers { RelayTimers::ReplayRecordingProcessing => "replay.recording.process", RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration", RelayTimers::ProcessMessageDuration => "processor.message.duration", + RelayTimers::ProjectCacheMessageDuration => "project_cache.message.duration", RelayTimers::BufferMessageProcessDuration => "buffer.message.duration", RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration", - RelayTimers::LegacyProjectCacheMessageDuration => { - "legacy_project_cache.message.duration" - } - RelayTimers::LegacyProjectCacheTaskDuration => "legacy_project_cache.task.duration", RelayTimers::HealthCheckDuration => "health.message.duration", #[cfg(feature = "processing")] RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets", @@ -690,6 +683,15 @@ pub enum RelayCounters { /// - `invalid`: Data was considered invalid and could not be recovered. The reason indicates /// the validation that failed. Outcomes, + /// Number of times a project state is looked up from the cache. + /// + /// This includes lookups for both cached and new projects. As part of this, updates for + /// outdated or expired project caches are triggered. + /// + /// Related metrics: + /// - `project_cache.hit`: For successful cache lookups, even for outdated projects. + /// - `project_cache.miss`: For failed lookups resulting in an update. + ProjectStateGet, /// Number of project state HTTP requests. /// /// Relay updates projects in batches. Every update cycle, Relay requests @@ -699,6 +701,15 @@ pub enum RelayCounters { /// Note that after an update loop has completed, there may be more projects pending updates. /// This is indicated by `project_state.pending`. ProjectStateRequest, + /// Number of times a project config was requested with `.no-cache`. + /// + /// This effectively counts the number of envelopes or events that have been sent with a + /// corresponding DSN. Actual queries to the upstream may still be deduplicated for these + /// project state requests. + /// + /// A maximum of 1 such requests per second is allowed per project key. This metric counts only + /// permitted requests. + ProjectStateNoCache, /// Number of times a project state is requested from the central Redis cache. /// /// This metric is tagged with: @@ -731,6 +742,11 @@ pub enum RelayCounters { /// Failure can happen, for example, when there's a network error. Refer to /// [`UpstreamRequestError`](crate::services::upstream::UpstreamRequestError) for all cases. ProjectUpstreamFailed, + /// Number of full metric data flushes. + /// + /// A full flush takes all contained items of the aggregator and flushes them upstream, + /// at best this happens once per freshly loaded project. + ProjectStateFlushAllMetricMeta, /// Number of Relay server starts. /// /// This can be used to track unwanted restarts due to crashes or termination. @@ -815,6 +831,8 @@ pub enum RelayCounters { EvictingStaleProjectCaches, /// Number of times that parsing a metrics bucket item from an envelope failed. MetricBucketsParsingFailed, + /// Number of times that parsing a metric meta item from an envelope failed. + MetricMetaParsingFailed, /// Count extraction of transaction names. Tag with the decision to drop / replace / use original. MetricsTransactionNameExtracted, /// Number of Events with an OpenTelemetry Context @@ -870,7 +888,10 @@ impl CounterMetric for RelayCounters { RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes", RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes", RelayCounters::Outcomes => "events.outcomes", + RelayCounters::ProjectStateGet => "project_state.get", RelayCounters::ProjectStateRequest => "project_state.request", + RelayCounters::ProjectStateNoCache => "project_state.no_cache", + RelayCounters::ProjectStateFlushAllMetricMeta => "project_state.flush_all_metric_meta", #[cfg(feature = "processing")] RelayCounters::ProjectStateRedis => "project_state.redis.requests", RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed", @@ -887,6 +908,7 @@ impl CounterMetric for RelayCounters { RelayCounters::ResponsesStatusCodes => "responses.status_codes", RelayCounters::EvictingStaleProjectCaches => "project_cache.eviction", RelayCounters::MetricBucketsParsingFailed => "metrics.buckets.parsing_failed", + RelayCounters::MetricMetaParsingFailed => "metrics.meta.parsing_failed", RelayCounters::MetricsTransactionNameExtracted => "metrics.transaction_name", RelayCounters::OpenTelemetryEvent => "event.opentelemetry", RelayCounters::GlobalConfigFetched => "global_config.fetch", diff --git a/relay-server/src/testutils.rs b/relay-server/src/testutils.rs index 56e9742925..9f4c255d35 100644 --- a/relay-server/src/testutils.rs +++ b/relay-server/src/testutils.rs @@ -20,7 +20,6 @@ use crate::service::create_redis_pools; use crate::services::global_config::GlobalConfigHandle; use crate::services::outcome::TrackOutcome; use crate::services::processor::{self, EnvelopeProcessorService}; -use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::ProjectInfo; use crate::services::test_store::TestStore; use crate::utils::{ThreadPool, ThreadPoolBuilder}; @@ -118,6 +117,7 @@ pub fn empty_envelope_with_dsn(dsn: &str) -> Box { pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); + let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (aggregator, _) = mock_service("aggregator", (), |&mut (), _| {}); let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); @@ -132,12 +132,12 @@ pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), - ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, processor::Addrs { outcome_aggregator, + project_cache, upstream_relay, test_store, #[cfg(feature = "processing")] @@ -162,7 +162,6 @@ pub fn create_test_processor_with_addrs( create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), - ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, diff --git a/relay-server/src/utils/garbage.rs b/relay-server/src/utils/garbage.rs new file mode 100644 index 0000000000..5b86cc80cd --- /dev/null +++ b/relay-server/src/utils/garbage.rs @@ -0,0 +1,113 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{mpsc, Arc}; +use std::thread::JoinHandle; + +/// Garbage disposal agent. +/// +/// Spawns a background thread which drops items sent to it via [`GarbageDisposal::dispose`]. +#[derive(Debug)] +pub struct GarbageDisposal { + tx: mpsc::Sender, + queue_size: Arc, +} + +impl GarbageDisposal { + /// Returns a new instance plus a handle to join on the background thread. + /// Currently only used in tests. + fn new_joinable() -> (Self, JoinHandle<()>) { + let (tx, rx) = mpsc::channel(); + + let queue_size = Arc::new(AtomicUsize::new(0)); + let queue_size_clone = queue_size.clone(); + let join_handle = std::thread::spawn(move || { + relay_log::debug!("Start garbage collection thread"); + while let Ok(object) = rx.recv() { + queue_size_clone.fetch_sub(1, Ordering::Relaxed); // Wraps around on overflow + drop(object); + } + relay_log::debug!("Stop garbage collection thread"); + }); + + (Self { tx, queue_size }, join_handle) + } + + /// Spawns a new garbage disposal instance. + /// Every instance has its own background thread that received items to be dropped via + /// [`Self::dispose`]. + /// When the instance is dropped, the background thread stops automatically. + pub fn new() -> Self { + let (instance, _) = Self::new_joinable(); + instance + } + + /// Defers dropping an object by sending it to the background thread. + pub fn dispose(&self, object: U) + where + T: From, + { + self.queue_size.fetch_add(1, Ordering::Relaxed); + self.tx + .send(T::from(object)) + .map_err(|e| { + relay_log::error!("failed to send object to garbage disposal thread, drop here"); + drop(e.0); + }) + .ok(); + } + + /// Get current queue size. + pub fn queue_size(&self) -> usize { + self.queue_size.load(Ordering::Relaxed) + } +} + +impl Default for GarbageDisposal { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Mutex}; + use std::thread::ThreadId; + + use super::GarbageDisposal; + + struct SomeStruct { + thread_ids: Arc>>, + } + + impl Drop for SomeStruct { + fn drop(&mut self) { + self.thread_ids + .lock() + .unwrap() + .push(std::thread::current().id()) + } + } + + #[test] + fn test_garbage_disposal() { + let thread_ids = Arc::new(Mutex::new(Vec::::new())); + + let x1 = SomeStruct { + thread_ids: thread_ids.clone(), + }; + drop(x1); + + let x2 = SomeStruct { + thread_ids: thread_ids.clone(), + }; + + let (garbage, join_handle) = GarbageDisposal::::new_joinable(); + garbage.dispose(x2); + drop(garbage); // breaks the while loop by dropping rx + join_handle.join().ok(); // wait for thread to finish its work + + let thread_ids = thread_ids.lock().unwrap(); + assert_eq!(thread_ids.len(), 2); + assert_eq!(thread_ids[0], std::thread::current().id()); + assert!(thread_ids[0] != thread_ids[1]); + } +} diff --git a/relay-server/src/utils/mod.rs b/relay-server/src/utils/mod.rs index 9c428d3b5c..8d12ee347e 100644 --- a/relay-server/src/utils/mod.rs +++ b/relay-server/src/utils/mod.rs @@ -1,5 +1,6 @@ mod api; mod dynamic_sampling; +mod garbage; mod managed_envelope; mod multipart; mod param_parser; @@ -21,6 +22,7 @@ mod unreal; pub use self::api::*; pub use self::dynamic_sampling::*; +pub use self::garbage::*; pub use self::managed_envelope::*; pub use self::memory::*; pub use self::multipart::*; diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index 8db1f37a84..2dabb2ed3e 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -120,6 +120,7 @@ fn infer_event_category(item: &Item) -> Option { ItemType::Sessions => None, ItemType::Statsd => None, ItemType::MetricBuckets => None, + ItemType::MetricMeta => None, ItemType::FormData => None, ItemType::UserReport => None, ItemType::Profile => None, @@ -491,6 +492,7 @@ impl Enforcement { | ItemType::Sessions | ItemType::Statsd | ItemType::MetricBuckets + | ItemType::MetricMeta | ItemType::ClientReport | ItemType::UserReportV2 | ItemType::ProfileChunk diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index 4bb4b9d88d..725306d1d8 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -16,6 +16,7 @@ use crate::utils::{ItemAction, ManagedEnvelope}; /// - `max_check_in_size` /// - `max_event_size` /// - `max_metric_buckets_size` +/// - `max_metric_meta_size` /// - `max_profile_size` /// - `max_replay_compressed_size` /// - `max_session_count` @@ -61,6 +62,7 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> Resul ItemType::UserReport => NO_LIMIT, ItemType::Statsd => config.max_statsd_size(), ItemType::MetricBuckets => config.max_metric_buckets_size(), + ItemType::MetricMeta => config.max_metric_meta_size(), ItemType::Span | ItemType::OtelSpan => config.max_span_size(), ItemType::ProfileChunk => config.max_profile_size(), ItemType::Unknown(_) => NO_LIMIT, diff --git a/tests/integration/test_metric_meta.py b/tests/integration/test_metric_meta.py new file mode 100644 index 0000000000..8dc0822008 --- /dev/null +++ b/tests/integration/test_metric_meta.py @@ -0,0 +1,99 @@ +import json +import time + +from sentry_sdk.envelope import Envelope, Item, PayloadRef +from datetime import datetime, timezone, time as dttime + + +def assert_location(expected, actual): + # Compare without the 'type' which is not persisted into storage. + assert {k: expected[k] for k in expected.keys() - {"type"}} == actual + + +def test_metric_meta(mini_sentry, redis_client, relay_with_processing): + project_id = 42 + now = datetime.now(tz=timezone.utc) + start_of_day = datetime.combine(now, dttime.min, tzinfo=timezone.utc) + + # Key magic number is the fnv32 hash from the MRI. + redis_key = "mm:l:{1}:42:2718098263:" + str(int(start_of_day.timestamp())) + # Clear just so there is no overlap with previous tests. + redis_client.delete(redis_key) + + relay = relay_with_processing() + + project_config = mini_sentry.add_full_project_config(project_id)["config"] + project_config.setdefault("features", []).append("organizations:custom-metrics") + + location1 = { + "type": "location", + "function": "_scan_for_suspect_projects", + "module": "sentry.tasks.low_priority_symbolication", + "filename": "sentry/tasks/low_priority_symbolication.py", + "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", + "lineno": 45, + } + location2 = { + "type": "location", + "function": "_scan_for_suspect_projects_the_second", + "module": "sentry.tasks.low_priority_symbolication", + "filename": "sentry/tasks/low_priority_symbolication.py", + "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", + "lineno": 120, + } + location3 = { + "type": "location", + "function": "_scan_for_suspect_projects_the_second", + "module": "sentry.tasks.low_priority_symbolication", + "filename": "sentry/tasks/low_priority_symbolication.py", + "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", + "lineno": 123, + } + + envelope = Envelope() + payload = { + "timestamp": now.isoformat(), + "mapping": { + "d:custom/sentry.process_profile.track_outcome@second": [ + location1, + location2, + ] + }, + } + envelope.add_item(Item(PayloadRef(json=payload), type="metric_meta")) + relay.send_envelope(project_id, envelope) + + time.sleep(1) + + stored = sorted( + (json.loads(v) for v in redis_client.smembers(redis_key)), + key=lambda x: x["lineno"], + ) + assert len(stored) == 2 + assert_location(location1, stored[0]) + assert_location(location2, stored[1]) + + # Resend one location and add a new location. + envelope = Envelope() + payload = { + "timestamp": now.isoformat(), + "mapping": { + "d:custom/sentry.process_profile.track_outcome@second": [ + location1, + location3, + ] + }, + } + envelope.add_item(Item(PayloadRef(json=payload), type="metric_meta")) + relay.send_envelope(project_id, envelope) + + time.sleep(1) + + stored = sorted( + (json.loads(v) for v in redis_client.smembers(redis_key)), + key=lambda x: x["lineno"], + ) + assert len(stored) == 3 + assert_location(location1, stored[0]) + assert_location(location2, stored[1]) + assert_location(location3, stored[2]) diff --git a/tests/integration/test_metrics.py b/tests/integration/test_metrics.py index ba18e21e4e..5a4f0d3961 100644 --- a/tests/integration/test_metrics.py +++ b/tests/integration/test_metrics.py @@ -2,6 +2,7 @@ from collections import defaultdict from datetime import UTC, datetime, timedelta, timezone from pathlib import Path +from sentry_sdk.envelope import Envelope, Item, PayloadRef import json import signal import time @@ -133,9 +134,52 @@ def test_metrics_proxy_mode_statsd(mini_sentry, relay): relay.send_metrics(project_id, metrics_payload) envelope = mini_sentry.captured_events.get(timeout=3) assert len(envelope.items) == 1 - item = envelope.items[0] - assert item.type == "statsd" - assert item.get_bytes().decode() == metrics_payload + metric_meta_item = envelope.items[0] + assert metric_meta_item.type == "statsd" + assert metric_meta_item.get_bytes().decode() == metrics_payload + + +def test_metrics_proxy_mode_metrics_meta(mini_sentry, relay): + relay = relay( + mini_sentry, + options={ + "relay": {"mode": "proxy"}, + "aggregator": { + "bucket_interval": 1, + "initial_delay": 0, + "shift_key": "none", + }, + }, + ) + + location = { + "type": "location", + "function": "_scan_for_suspect_projects", + "module": "sentry.tasks.low_priority_symbolication", + "filename": "sentry/tasks/low_priority_symbolication.py", + "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", + "lineno": 45, + } + + meta_payload = { + "timestamp": datetime.now(tz=timezone.utc).isoformat(), + "mapping": { + "d:custom/sentry.process_profile.track_outcome@second": [ + location, + ] + }, + } + meta_payload = json.dumps(meta_payload, sort_keys=True) + + envelope = Envelope() + envelope.add_item(Item(PayloadRef(json=meta_payload), type="metric_meta")) + relay.send_envelope(42, envelope) + + envelope = mini_sentry.captured_events.get(timeout=3) + assert len(envelope.items) == 1 + metric_meta_item = envelope.items[0] + assert metric_meta_item.type == "metric_meta" + assert json.loads(metric_meta_item.get_bytes().decode()) == meta_payload def test_metrics(mini_sentry, relay): diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index c10e807cf8..18f1f2b39d 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -12,42 +12,6 @@ RelayInfo = namedtuple("RelayInfo", ["id", "public_key", "secret_key", "internal"]) -def request_config( - relay, packed, signature, *, version="3", relay_id=None, headers=None -): - return relay.post( - f"/api/0/relays/projectconfigs/?version={version}", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay_id or relay.relay_id, - "X-Sentry-Relay-Signature": signature, - } - | (headers or {}), - ) - - -def get_response(relay, packed, signature, *, version="3", relay_id=None, headers=None): - data = None - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config( - relay, - packed, - signature, - version=version, - relay_id=relay_id, - headers=headers, - ) - assert response.ok - data = response.json() - print(data) - if data["configs"] or data.get("global") or data.get("unchanged"): - return data, response - time.sleep(0.01) - - assert False, "Relay did still not receive a project config from minisentry" - - @pytest.mark.parametrize( "caller, projects", [ @@ -120,9 +84,16 @@ def test_dynamic_relays(mini_sentry, relay, caller, projects): packed, signature = caller.secret_key.pack(request) - data, _ = get_response(relay1, packed, signature, relay_id=caller.id) + resp = relay1.post( + "/api/0/relays/projectconfigs/?version=2", + data=packed, + headers={"X-Sentry-Relay-Id": caller.id, "X-Sentry-Relay-Signature": signature}, + ) + + assert resp.ok # test that it returns valid data + data = resp.json() for p in public_keys: assert data["configs"][p] is not None @@ -133,7 +104,14 @@ def test_invalid_json(mini_sentry, relay): body = {} # missing the required `publicKeys` field packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = request_config(relay, packed, signature) + response = relay.post( + "/api/0/relays/projectconfigs/?version=2", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay.relay_id, + "X-Sentry-Relay-Signature": signature, + }, + ) assert response.status_code == 400 # Bad Request assert "JSON" in response.text @@ -143,7 +121,7 @@ def test_invalid_signature(mini_sentry, relay): relay = relay(mini_sentry) response = relay.post( - "/api/0/relays/projectconfigs/?version=3", + "/api/0/relays/projectconfigs/?version=2", data='{"publicKeys":[]}', headers={ "X-Sentry-Relay-Id": relay.relay_id, @@ -170,9 +148,17 @@ def test_broken_projectkey(mini_sentry, relay): } packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data, _ = get_response(relay, packed, signature) + response = relay.post( + "/api/0/relays/projectconfigs/?version=2", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay.relay_id, + "X-Sentry-Relay-Signature": signature, + }, + ) - assert public_key in data["configs"] + assert response.ok + assert public_key in response.json()["configs"] def test_pending_projects(mini_sentry, relay): @@ -186,17 +172,63 @@ def test_pending_projects(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = request_config(relay, packed, signature) + def request_config(): + return relay.post( + "/api/0/relays/projectconfigs/?version=3", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay.relay_id, + "X-Sentry-Relay-Signature": signature, + }, + ) + + response = request_config() + assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data["configs"] - data, _ = get_response(relay, packed, signature) + deadline = time.monotonic() + 15 + while time.monotonic() <= deadline: + response = request_config() + assert response.ok + data = response.json() + if data["configs"]: + break + else: + assert False, "Relay did still not receive a project config from minisentry" assert public_key in data["configs"] assert data.get("pending") is None +def request_config(relay, packed, signature, version: str): + return relay.post( + f"/api/0/relays/projectconfigs/?version={version}", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay.relay_id, + "X-Sentry-Relay-Signature": signature, + }, + ) + + +def get_response(relay, packed, signature, version="3"): + data = None + deadline = time.monotonic() + 15 + while time.monotonic() <= deadline: + # send 1 r/s + time.sleep(1) + response = request_config(relay, packed, signature, version) + assert response.ok + data = response.json() + if data["configs"]: + break + else: + print("Relay did still not receive a project config from minisentry") + return data + + def test_unparsable_project_config(mini_sentry, relay): project_key = 42 relay_config = { @@ -255,7 +287,7 @@ def assert_clear_test_failures(): assert_clear_test_failures() # The state should be fixed and updated by now, since we keep re-trying to fetch new one all the time. - data, _ = get_response(relay, packed, signature) + data = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] time.sleep(1) @@ -294,7 +326,7 @@ def test_cached_project_config(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data, _ = get_response(relay, packed, signature) + data = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -304,7 +336,7 @@ def test_cached_project_config(mini_sentry, relay): # Give it a bit time for update to go through. time.sleep(1) - data, _ = get_response(relay, packed, signature) + data = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -341,7 +373,7 @@ def test_get_global_config(mini_sentry, relay): body = {"publicKeys": [], "global": True} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data, _ = get_response(relay, packed, signature, version="3") + data = get_response(relay, packed, signature, version="3") global_extraction_config = data["global"].pop("metricExtraction") assert "span_metrics_common" in global_extraction_config["groups"] @@ -357,11 +389,19 @@ def test_compression(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data, response = get_response( - relay, packed, signature, headers={"Accept-Encoding": "gzip"} + response = relay.post( + "/api/0/relays/projectconfigs/?version=2", + data=packed, + headers={ + "Accept-Encoding": "gzip", + "X-Sentry-Relay-Id": relay.relay_id, + "X-Sentry-Relay-Signature": signature, + }, ) + + assert response.ok assert response.headers["content-encoding"] == "gzip" - assert public_key in data["configs"] + assert public_key in response.json()["configs"] def test_unchanged_projects(mini_sentry, relay): @@ -373,14 +413,32 @@ def test_unchanged_projects(mini_sentry, relay): body = {"publicKeys": [public_key], "revisions": ["123"]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = request_config(relay, packed, signature) + def request_config(): + return relay.post( + "/api/0/relays/projectconfigs/?version=3", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay.relay_id, + "X-Sentry-Relay-Signature": signature, + }, + ) + + response = request_config() assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data.get("unchanged", []) - data, response = get_response(relay, packed, signature) + deadline = time.monotonic() + 15 + while time.monotonic() <= deadline: + response = request_config() + assert response.ok + data = response.json() + if data.get("unchanged"): + break + else: + assert False, "Relay did still not receive a project config from minisentry" assert public_key in data["unchanged"] assert public_key not in data["configs"] diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index d7ab5cc2e3..a934319c2c 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -16,12 +16,7 @@ def test_local_project_config(mini_sentry, relay): project_id = 42 config = mini_sentry.basic_project_config(project_id) relay_config = { - "cache": { - "file_interval": 1, - "project_expiry": 1, - "project_grace_period": 0, - "eviction_interval": 1, - } + "cache": {"file_interval": 1, "project_expiry": 1, "project_grace_period": 0} } relay = relay(mini_sentry, relay_config, wait_health_check=False) relay.config_dir.mkdir("projects").join("42.json").write( @@ -50,14 +45,17 @@ def test_local_project_config(mini_sentry, relay): assert event["logentry"] == {"formatted": "Hello, World!"} relay.config_dir.join("projects").join("42.json").write( - json.dumps({"disabled": True, "publicKeys": config["publicKeys"]}) + json.dumps({"disabled": True}) ) - time.sleep(2) - relay.send_event(project_id, dsn_key=dsn_key) - time.sleep(0.3) - pytest.raises(HTTPError, lambda: relay.send_event(project_id, dsn_key=dsn_key)) + try: + # This may or may not respond with 403, depending on how quickly the future to fetch project + # states executes. + relay.send_event(project_id, dsn_key=dsn_key) + except HTTPError: + pass + pytest.raises(queue.Empty, lambda: mini_sentry.captured_events.get(timeout=1)) @@ -82,7 +80,6 @@ def get_project_config(): "miss_expiry": 1, "project_expiry": 1, "project_grace_period": grace_period, - "eviction_interval": 1, } }, ) @@ -327,8 +324,7 @@ def get_project_config(): "cache": { "miss_expiry": 1, "project_expiry": 1, - "project_grace_period": 5, - "eviction_interval": 1, + "project_grace_period": 0, } }, ) @@ -347,8 +343,8 @@ def get_project_config(): relay.send_event(42) - assert project_config_fetch.wait(timeout=2) - assert with_rev.wait(timeout=2) + assert project_config_fetch.wait(timeout=1) + assert with_rev.wait(timeout=1) event = mini_sentry.captured_events.get(timeout=1).get_event() assert event["logentry"] == {"formatted": "Hello, World!"} diff --git a/tests/integration/test_redis.py b/tests/integration/test_redis.py index 8520e08b83..c263d01ae6 100644 --- a/tests/integration/test_redis.py +++ b/tests/integration/test_redis.py @@ -1,6 +1,70 @@ +import json +import time import uuid from sentry_sdk.envelope import Envelope, Item, PayloadRef +from datetime import datetime, timezone, time as dttime + + +def test_multi_write_redis_client_with_metric_meta( + mini_sentry, redis_client, secondary_redis_client, relay_with_processing +): + project_id = 42 + now = datetime.now(tz=timezone.utc) + start_of_day = datetime.combine(now, dttime.min, tzinfo=timezone.utc) + + # Key magic number is the fnv32 hash from the MRI. + redis_key = "mm:l:{1}:42:2718098263:" + str(int(start_of_day.timestamp())) + # Clear just so there is no overlap with previous tests. + redis_client.delete(redis_key) + + relay = relay_with_processing( + { + "processing": { + "redis": { + "configs": [ + {"server": "redis://127.0.0.1:6379"}, + # We want to test with multiple nesting levels to make sure the multi-write + # works nonetheless. + {"configs": ["redis://127.0.0.1:6380"]}, + ] + } + } + } + ) + + project_config = mini_sentry.add_full_project_config(project_id)["config"] + project_config.setdefault("features", []).append("organizations:custom-metrics") + + location = { + "type": "location", + "function": "_scan_for_suspect_projects", + "module": "sentry.tasks.low_priority_symbolication", + "filename": "sentry/tasks/low_priority_symbolication.py", + "abs_path": "/usr/src/sentry/src/sentry/tasks/low_priority_symbolication.py", + "lineno": 45, + } + + envelope = Envelope() + payload = { + "timestamp": now.isoformat(), + "mapping": { + "d:custom/sentry.process_profile.track_outcome@second": [ + location, + ] + }, + } + envelope.add_item(Item(PayloadRef(json=payload), type="metric_meta")) + relay.send_envelope(project_id, envelope) + + time.sleep(1) + + for client in [redis_client, secondary_redis_client]: + stored = sorted( + (json.loads(v) for v in client.smembers(redis_key)), + key=lambda x: x["lineno"], + ) + assert len(stored) == 1 def test_multi_write_redis_client_with_rate_limiting(