diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28548848d3..8287d3842e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -553,7 +553,7 @@ jobs: - run: make test-integration env: PYTEST_N: 6 - RELAY_VERSION_CHAIN: "20.6.0,latest" + RELAY_VERSION_CHAIN: "23.12.0,latest" sentry-relay-integration-tests: name: Sentry-Relay Integration Tests diff --git a/CHANGELOG.md b/CHANGELOG.md index fec1353e90..28dc177a26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ **Breaking Changes**: - Removes support for metric meta envelope items. ([#4152](https://github.com/getsentry/relay/pull/4152)) +- Removes support for the project cache endpoint version 2 and before. ([#4147](https://github.com/getsentry/relay/pull/4147)) **Bug Fixes** diff --git a/Cargo.lock b/Cargo.lock index f844e78d09..8e9e350e0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" -version = "0.8.8" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", @@ -217,6 +217,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic-wait" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55b94919229f2c42292fd71ffa4b75e83193bffdd77b1e858cd55fd2d0b0ea8" +dependencies = [ + "libc", + "windows-sys 0.42.0", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -412,7 +422,7 @@ dependencies = [ "bitflags 2.4.1", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -2695,6 +2705,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "papaya" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "674d893d27388726e4020eaa60257b3df760392003c14f46455e3277af72cd04" +dependencies = [ + "atomic-wait", + "seize", +] + [[package]] name = "parking" version = "2.2.0" @@ -2999,7 +3019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.77", @@ -3733,6 +3753,7 @@ dependencies = [ name = "relay-server" version = "24.10.0" dependencies = [ + "ahash", "anyhow", "arc-swap", "axum", @@ -3759,6 +3780,7 @@ dependencies = [ "minidump", "multer", "once_cell", + "papaya", "pin-project-lite", "priority-queue", "rand", @@ -4172,6 +4194,12 @@ dependencies = [ "libc", ] +[[package]] +name = "seize" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d659fa6f19e82a52ab8d3fff3c380bd8cc16462eaea411395618a38760eb85bc" + [[package]] name = "semver" version = "1.0.23" @@ -5787,6 +5815,21 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -5845,6 +5888,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -5857,6 +5906,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -5869,6 +5924,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -5887,6 +5948,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -5899,6 +5966,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -5911,6 +5984,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -5923,6 +6002,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index 2865dccce0..7b41485040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ relay-test = { path = "relay-test" } relay-protocol-derive = { path = "relay-protocol-derive" } relay-event-derive = { path = "relay-event-derive" } +ahash = "0.8.11" android_trace_log = { version = "0.3.0", features = ["serde"] } anyhow = "1.0.66" axum = "0.7.5" @@ -119,6 +120,7 @@ num-traits = "0.2.18" num_cpus = "1.13.0" once_cell = "1.13.1" opentelemetry-proto = { version = "0.7.0", default-features = false } +papaya = "0.1.4" parking_lot = "0.12.1" path-slash = "0.2.1" pest = "2.1.3" diff --git a/relay-config/src/config.rs b/relay-config/src/config.rs index 2a98bc63a3..6f9ac83e2e 100644 --- a/relay-config/src/config.rs +++ b/relay-config/src/config.rs @@ -1017,7 +1017,9 @@ struct Cache { /// The cache timeout for project configurations in seconds. project_expiry: u32, /// Continue using project state this many seconds after cache expiry while a new state is - /// being fetched. This is added on top of `project_expiry` and `miss_expiry`. Default is 0. + /// being fetched. This is added on top of `project_expiry`. + /// + /// Default is 2 minutes. project_grace_period: u32, /// The cache timeout for downstream relay info (public keys) in seconds. relay_expiry: u32, @@ -1053,17 +1055,17 @@ impl Default for Cache { fn default() -> Self { Cache { project_request_full_config: false, - project_expiry: 300, // 5 minutes - project_grace_period: 0, - relay_expiry: 3600, // 1 hour - envelope_expiry: 600, // 10 minutes + project_expiry: 300, // 5 minutes + project_grace_period: 120, // 2 minutes + relay_expiry: 3600, // 1 hour + envelope_expiry: 600, // 10 minutes envelope_buffer_size: 1000, miss_expiry: 60, // 1 minute batch_interval: 100, // 100ms downstream_relays_batch_interval: 100, // 100ms batch_size: 500, file_interval: 10, // 10 seconds - eviction_interval: 60, // 60 seconds + eviction_interval: 15, // 15 seconds global_config_fetch_interval: 10, // 10 seconds } } diff --git a/relay-dynamic-config/src/error_boundary.rs b/relay-dynamic-config/src/error_boundary.rs index c80d2ab8b7..dc4948a3b0 100644 --- a/relay-dynamic-config/src/error_boundary.rs +++ b/relay-dynamic-config/src/error_boundary.rs @@ -42,6 +42,14 @@ impl ErrorBoundary { } } + /// Converts from `ErrorBoundary` to `ErrorBoundary<&T>`. + pub fn as_ref(&self) -> ErrorBoundary<&T> { + match self { + Self::Ok(t) => ErrorBoundary::Ok(t), + Self::Err(e) => ErrorBoundary::Err(Arc::clone(e)), + } + } + /// Returns the contained [`Ok`] value or computes it from a closure. #[inline] pub fn unwrap_or_else(self, op: F) -> T diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index c1162d340e..ba90bacaca 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -1,5 +1,6 @@ use std::fmt; use std::str::FromStr; +use std::sync::{Arc, Mutex, PoisonError}; use std::time::{Duration, Instant}; use relay_base_schema::metrics::MetricNamespace; @@ -279,15 +280,14 @@ impl RateLimits { } /// Removes expired rate limits from this instance. - pub fn clean_expired(&mut self) { - let now = Instant::now(); + pub fn clean_expired(&mut self, now: Instant) { self.limits .retain(|limit| !limit.retry_after.expired_at(now)); } /// Checks whether any rate limits apply to the given scoping. /// - /// If no limits match, then the returned `RateLimits` instance evalutes `is_ok`. Otherwise, it + /// If no limits match, then the returned `RateLimits` instance evaluates `is_ok`. Otherwise, it /// contains rate limits that match the given scoping. pub fn check(&self, scoping: ItemScoping<'_>) -> Self { self.check_with_quotas(&[], scoping) @@ -298,7 +298,7 @@ impl RateLimits { /// This is similar to `check`. Additionally, it checks for quotas with a static limit `0`, and /// rejects items even if there is no active rate limit in this instance. /// - /// If no limits or quotas match, then the returned `RateLimits` instance evalutes `is_ok`. + /// If no limits or quotas match, then the returned `RateLimits` instance evaluates `is_ok`. /// Otherwise, it contains rate limits that match the given scoping. pub fn check_with_quotas<'a>( &self, @@ -339,7 +339,7 @@ impl RateLimits { /// Returns `true` if there are any limits contained. /// - /// This is equavalent to checking whether [`Self::longest`] returns `Some` + /// This is equivalent to checking whether [`Self::longest`] returns `Some` /// or [`Self::iter`] returns an iterator with at least one item. pub fn is_empty(&self) -> bool { self.limits.is_empty() @@ -402,7 +402,7 @@ impl<'a> IntoIterator for &'a RateLimits { /// /// The data structure makes sure no expired rate limits are enforced. #[derive(Debug, Default)] -pub struct CachedRateLimits(RateLimits); +pub struct CachedRateLimits(Mutex>); impl CachedRateLimits { /// Creates a new, empty instance without any rate limits enforced. @@ -413,25 +413,31 @@ impl CachedRateLimits { /// Adds a limit to this collection. /// /// See also: [`RateLimits::add`]. - pub fn add(&mut self, limit: RateLimit) { - self.0.add(limit); + pub fn add(&self, limit: RateLimit) { + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + let current = Arc::make_mut(&mut inner); + current.add(limit); } /// Merges more rate limits into this instance. /// /// See also: [`RateLimits::merge`]. - pub fn merge(&mut self, rate_limits: RateLimits) { - for limit in rate_limits { - self.add(limit) + pub fn merge(&self, limits: RateLimits) { + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + let current = Arc::make_mut(&mut inner); + for limit in limits { + current.add(limit) } } /// Returns a reference to the contained rate limits. /// - /// This call gurantuess that at the time of call no returned rate limit is expired. - pub fn current_limits(&mut self) -> &RateLimits { - self.0.clean_expired(); - &self.0 + /// This call guarantees that at the time of call no returned rate limit is expired. + pub fn current_limits(&self) -> Arc { + let now = Instant::now(); + let mut inner = self.0.lock().unwrap_or_else(PoisonError::into_inner); + Arc::make_mut(&mut inner).clean_expired(now); + Arc::clone(&inner) } } @@ -927,7 +933,7 @@ mod tests { // Sanity check before running `clean_expired` assert_eq!(rate_limits.iter().count(), 2); - rate_limits.clean_expired(); + rate_limits.clean_expired(Instant::now()); // Check that the expired limit has been removed insta::assert_ron_snapshot!(rate_limits, @r###" @@ -1120,7 +1126,7 @@ mod tests { #[test] fn test_cached_rate_limits_expired() { - let mut cached = CachedRateLimits::new(); + let cached = CachedRateLimits::new(); // Active error limit cached.add(RateLimit { diff --git a/relay-sampling/src/evaluation.rs b/relay-sampling/src/evaluation.rs index 7f08dc8fa8..c3b3944dd7 100644 --- a/relay-sampling/src/evaluation.rs +++ b/relay-sampling/src/evaluation.rs @@ -70,7 +70,7 @@ impl<'a> ReservoirEvaluator<'a> { Arc::clone(&self.counters) } - /// Sets the Redis pool and organiation ID for the [`ReservoirEvaluator`]. + /// Sets the Redis pool and organization ID for the [`ReservoirEvaluator`]. /// /// These values are needed to synchronize with Redis. #[cfg(feature = "redis")] diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index dbec10dc95..34862cd152 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -29,6 +29,7 @@ processing = [ workspace = true [dependencies] +ahash = { workspace = true } anyhow = { workspace = true } serde_path_to_error = { workspace = true } axum = { workspace = true, features = ["macros", "matched-path", "tracing"] } @@ -53,6 +54,7 @@ mime = { workspace = true } minidump = { workspace = true, optional = true } multer = { workspace = true } once_cell = { workspace = true } +papaya = { workspace = true } pin-project-lite = { workspace = true } priority-queue = { workspace = true } rand = { workspace = true } diff --git a/relay-server/src/endpoints/batch_metrics.rs b/relay-server/src/endpoints/batch_metrics.rs index 4d108df90f..19d1395168 100644 --- a/relay-server/src/endpoints/batch_metrics.rs +++ b/relay-server/src/endpoints/batch_metrics.rs @@ -4,8 +4,7 @@ use serde::{Deserialize, Serialize}; use crate::extractors::{ReceivedAt, SignedBytes}; use crate::service::ServiceState; -use crate::services::processor::ProcessBatchedMetrics; -use crate::services::projects::cache::BucketSource; +use crate::services::processor::{BucketSource, ProcessBatchedMetrics}; #[derive(Debug, Serialize, Deserialize)] struct SendMetricsResponse {} diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index a70fec4722..7430beb6dc 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -12,8 +12,8 @@ use crate::envelope::{AttachmentType, Envelope, EnvelopeError, Item, ItemType, I use crate::service::ServiceState; use crate::services::buffer::EnvelopeBuffer; use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::{MetricData, ProcessingGroup}; -use crate::services::projects::cache::{CheckEnvelope, ProcessMetrics, ValidateEnvelope}; +use crate::services::processor::{BucketSource, MetricData, ProcessMetrics, ProcessingGroup}; +use crate::services::projects::cache::legacy::ValidateEnvelope; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; @@ -40,9 +40,6 @@ impl IntoResponse for ServiceUnavailable { /// Error type for all store-like requests. #[derive(Debug, thiserror::Error)] pub enum BadStoreRequest { - #[error("could not schedule event processing")] - ScheduleFailed, - #[error("empty request body")] EmptyBody, @@ -113,7 +110,7 @@ impl IntoResponse for BadStoreRequest { (StatusCode::TOO_MANY_REQUESTS, headers, body).into_response() } - BadStoreRequest::ScheduleFailed | BadStoreRequest::QueueFailed => { + BadStoreRequest::QueueFailed => { // These errors indicate that something's wrong with our service system, most likely // mailbox congestion or a faulty shutdown. Indicate an unavailable service to the // client. It might retry event submission at a later time. @@ -274,12 +271,12 @@ fn queue_envelope( if !metric_items.is_empty() { relay_log::trace!("sending metrics into processing queue"); - state.project_cache().send(ProcessMetrics { + state.processor().send(ProcessMetrics { data: MetricData::Raw(metric_items.into_vec()), received_at: envelope.received_at(), sent_at: envelope.sent_at(), project_key: envelope.meta().public_key(), - source: envelope.meta().into(), + source: BucketSource::from_meta(envelope.meta()), }); } } @@ -312,7 +309,9 @@ fn queue_envelope( } None => { relay_log::trace!("Sending envelope to project cache for V1 buffer"); - state.project_cache().send(ValidateEnvelope::new(envelope)); + state + .legacy_project_cache() + .send(ValidateEnvelope::new(envelope)); } } } @@ -369,10 +368,9 @@ pub async fn handle_envelope( } let checked = state - .project_cache() - .send(CheckEnvelope::new(managed_envelope)) - .await - .map_err(|_| BadStoreRequest::ScheduleFailed)? + .project_cache_handle() + .get(managed_envelope.scoping().project_key) + .check_envelope(managed_envelope) .map_err(BadStoreRequest::EventRejected)?; let Some(mut managed_envelope) = checked.envelope else { diff --git a/relay-server/src/endpoints/project_configs.rs b/relay-server/src/endpoints/project_configs.rs index 391687a31f..ff59384618 100644 --- a/relay-server/src/endpoints/project_configs.rs +++ b/relay-server/src/endpoints/project_configs.rs @@ -3,10 +3,9 @@ use std::sync::Arc; use axum::extract::{Query, Request}; use axum::handler::Handler; +use axum::http::StatusCode; use axum::response::{IntoResponse, Result}; use axum::{Json, RequestExt}; -use futures::stream::FuturesUnordered; -use futures::StreamExt; use relay_base_schema::project::ProjectKey; use relay_dynamic_config::{ErrorBoundary, GlobalConfig}; use serde::{Deserialize, Serialize}; @@ -16,15 +15,10 @@ use crate::endpoints::forward; use crate::extractors::SignedJson; use crate::service::ServiceState; use crate::services::global_config::{self, StatusResponse}; -use crate::services::projects::cache::{GetCachedProjectState, GetProjectState}; use crate::services::projects::project::{ - LimitedParsedProjectState, ParsedProjectState, ProjectState, + LimitedParsedProjectState, ParsedProjectState, ProjectState, Revision, }; - -/// V2 version of this endpoint. -/// -/// The request is a list of [`ProjectKey`]s and the response a list of [`ProjectStateWrapper`]s -const ENDPOINT_V2: u16 = 2; +use crate::utils::ApiErrorResponse; /// V3 version of this endpoint. /// @@ -34,6 +28,16 @@ const ENDPOINT_V2: u16 = 2; /// returned, or a further poll ensues. const ENDPOINT_V3: u16 = 3; +#[derive(Debug, Clone, Copy, thiserror::Error)] +#[error("This API version is no longer supported, upgrade your Relay or Client")] +struct VersionOutdatedError; + +impl IntoResponse for VersionOutdatedError { + fn into_response(self) -> axum::response::Response { + (StatusCode::BAD_REQUEST, ApiErrorResponse::from_error(&self)).into_response() + } +} + /// Helper to deserialize the `version` query parameter. #[derive(Clone, Copy, Debug, Deserialize)] struct VersionQuery { @@ -102,19 +106,17 @@ struct GetProjectStatesRequest { /// /// This length of this list if specified must be the same length /// as [`Self::public_keys`], the items are asssociated by index. - revisions: Option>>>, + revisions: Option>>, #[serde(default)] full_config: bool, #[serde(default)] - no_cache: bool, - #[serde(default)] global: bool, } fn into_valid_keys( public_keys: Vec>, - revisions: Option>>>, -) -> impl Iterator)> { + revisions: Option>>, +) -> impl Iterator { let mut revisions = revisions.and_then(|e| e.ok()).unwrap_or_default(); if !revisions.is_empty() && revisions.len() != public_keys.len() { // The downstream sent us a different amount of revisions than project keys, @@ -127,7 +129,9 @@ fn into_valid_keys( ); revisions.clear(); } - let revisions = revisions.into_iter().chain(std::iter::repeat(None)); + let revisions = revisions + .into_iter() + .chain(std::iter::repeat_with(Revision::default)); std::iter::zip(public_keys, revisions).filter_map(|(public_key, revision)| { // Skip unparsable public keys. @@ -139,30 +143,9 @@ fn into_valid_keys( async fn inner( state: ServiceState, - Query(version): Query, body: SignedJson, ) -> Result { let SignedJson { inner, relay } = body; - let project_cache = &state.project_cache().clone(); - - let no_cache = inner.no_cache; - let keys_len = inner.public_keys.len(); - - let mut futures: FuturesUnordered<_> = into_valid_keys(inner.public_keys, inner.revisions) - .map(|(project_key, revision)| async move { - let state_result = if version.version >= ENDPOINT_V3 && !no_cache { - project_cache - .send(GetCachedProjectState::new(project_key)) - .await - } else { - project_cache - .send(GetProjectState::new(project_key).no_cache(no_cache)) - .await - }; - - (project_key, revision, state_result) - }) - .collect(); let (global, global_status) = if inner.global { match state.global_config().send(global_config::Get).await? { @@ -178,12 +161,15 @@ async fn inner( (None, None) }; + let keys_len = inner.public_keys.len(); let mut pending = Vec::with_capacity(keys_len); let mut unchanged = Vec::with_capacity(keys_len); let mut configs = HashMap::with_capacity(keys_len); - while let Some((project_key, revision, state_result)) = futures.next().await { - let project_info = match state_result? { + for (project_key, revision) in into_valid_keys(inner.public_keys, inner.revisions) { + let project = state.project_cache_handle().get(project_key); + + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { // Don't insert project config. Downstream Relay will consider it disabled. @@ -196,7 +182,7 @@ async fn inner( }; // Only ever omit responses when there was a valid revision in the first place. - if revision.is_some() && project_info.rev == revision { + if project_info.rev == revision { unchanged.push(project_key); continue; } @@ -237,9 +223,14 @@ async fn inner( })) } +/// Returns `true` for all `?version` query parameters that are no longer supported by Relay and Sentry. +fn is_outdated(Query(query): Query) -> bool { + query.version < ENDPOINT_V3 +} + /// Returns `true` if the `?version` query parameter is compatible with this implementation. fn is_compatible(Query(query): Query) -> bool { - query.version >= ENDPOINT_V2 && query.version <= ENDPOINT_V3 + query.version == ENDPOINT_V3 } /// Endpoint handler for the project configs endpoint. @@ -255,9 +246,11 @@ fn is_compatible(Query(query): Query) -> bool { /// support old Relay versions. pub async fn handle(state: ServiceState, mut req: Request) -> Result { let data = req.extract_parts().await?; - Ok(if is_compatible(data) { - inner.call(req, state).await + if is_outdated(data) { + Err(VersionOutdatedError.into()) + } else if is_compatible(data) { + Ok(inner.call(req, state).await) } else { - forward::forward(state, req).await - }) + Ok(forward::forward(state, req).await) + } } diff --git a/relay-server/src/service.rs b/relay-server/src/service.rs index f4c8051284..ac953b13ff 100644 --- a/relay-server/src/service.rs +++ b/relay-server/src/service.rs @@ -1,5 +1,4 @@ use std::convert::Infallible; -use std::fmt; use std::sync::Arc; use std::time::Duration; @@ -8,11 +7,12 @@ use crate::services::buffer::{self, EnvelopeBufferService, ObservableEnvelopeBuf use crate::services::cogs::{CogsService, CogsServiceRecorder}; use crate::services::global_config::{GlobalConfigManager, GlobalConfigService}; use crate::services::health_check::{HealthCheck, HealthCheckService}; -use crate::services::metrics::{Aggregator, RouterService}; +use crate::services::metrics::RouterService; use crate::services::outcome::{OutcomeProducer, OutcomeProducerService, TrackOutcome}; use crate::services::outcome_aggregator::OutcomeAggregator; use crate::services::processor::{self, EnvelopeProcessor, EnvelopeProcessorService}; -use crate::services::projects::cache::{ProjectCache, ProjectCacheService, Services}; +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectCacheService}; +use crate::services::projects::source::ProjectSource; use crate::services::relays::{RelayCache, RelayCacheService}; use crate::services::stats::RelayStats; #[cfg(feature = "processing")] @@ -52,9 +52,8 @@ pub enum ServiceError { Redis, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Registry { - pub aggregator: Addr, pub health_check: Addr, pub outcome_producer: Addr, pub outcome_aggregator: Addr, @@ -62,21 +61,11 @@ pub struct Registry { pub test_store: Addr, pub relay_cache: Addr, pub global_config: Addr, - pub project_cache: Addr, + pub legacy_project_cache: Addr, pub upstream_relay: Addr, pub envelope_buffer: Option, -} -impl fmt::Debug for Registry { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Registry") - .field("aggregator", &self.aggregator) - .field("health_check", &self.health_check) - .field("outcome_producer", &self.outcome_producer) - .field("outcome_aggregator", &self.outcome_aggregator) - .field("processor", &format_args!("Addr")) - .finish() - } + pub project_cache_handle: ProjectCacheHandle, } /// Constructs a tokio [`Runtime`] configured for running [services](relay_system::Service). @@ -144,6 +133,7 @@ fn create_store_pool(config: &Config) -> Result { struct StateInner { config: Arc, memory_checker: MemoryChecker, + registry: Registry, } @@ -198,12 +188,23 @@ impl ServiceState { // service fail if the service is not running. let global_config = global_config.start(); - let (project_cache, project_cache_rx) = channel(ProjectCacheService::name()); + let (legacy_project_cache, legacy_project_cache_rx) = + channel(legacy::ProjectCacheService::name()); + + let project_source = ProjectSource::start( + Arc::clone(&config), + upstream_relay.clone(), + redis_pools + .as_ref() + .map(|pools| pools.project_configs.clone()), + ); + let project_cache_handle = + ProjectCacheService::new(Arc::clone(&config), project_source).start(); let aggregator = RouterService::new( config.default_aggregator_config().clone(), config.secondary_aggregator_configs().clone(), - Some(project_cache.clone().recipient()), + Some(legacy_project_cache.clone().recipient()), ); let aggregator_handle = aggregator.handle(); let aggregator = aggregator.start(); @@ -238,11 +239,11 @@ impl ServiceState { create_processor_pool(&config)?, config.clone(), global_config_handle, + project_cache_handle.clone(), cogs, #[cfg(feature = "processing")] redis_pools.clone(), processor::Addrs { - project_cache: project_cache.clone(), outcome_aggregator: outcome_aggregator.clone(), upstream_relay: upstream_relay.clone(), test_store: test_store.clone(), @@ -261,7 +262,7 @@ impl ServiceState { global_config_rx.clone(), buffer::Services { envelopes_tx, - project_cache: project_cache.clone(), + project_cache_handle: project_cache_handle.clone(), outcome_aggregator: outcome_aggregator.clone(), test_store: test_store.clone(), }, @@ -269,27 +270,24 @@ impl ServiceState { .map(|b| b.start_observable()); // Keep all the services in one context. - let project_cache_services = Services { + let project_cache_services = legacy::Services { envelope_buffer: envelope_buffer.as_ref().map(ObservableEnvelopeBuffer::addr), aggregator: aggregator.clone(), envelope_processor: processor.clone(), outcome_aggregator: outcome_aggregator.clone(), - project_cache: project_cache.clone(), + project_cache: legacy_project_cache.clone(), test_store: test_store.clone(), - upstream_relay: upstream_relay.clone(), }; - ProjectCacheService::new( + legacy::ProjectCacheService::new( config.clone(), MemoryChecker::new(memory_stat.clone(), config.clone()), + project_cache_handle.clone(), project_cache_services, global_config_rx, envelopes_rx, - redis_pools - .as_ref() - .map(|pools| pools.project_configs.clone()), ) - .spawn_handler(project_cache_rx); + .spawn_handler(legacy_project_cache_rx); let health_check = HealthCheckService::new( config.clone(), @@ -311,7 +309,6 @@ impl ServiceState { let relay_cache = RelayCacheService::new(config.clone(), upstream_relay.clone()).start(); let registry = Registry { - aggregator, processor, health_check, outcome_producer, @@ -319,7 +316,8 @@ impl ServiceState { test_store, relay_cache, global_config, - project_cache, + legacy_project_cache, + project_cache_handle, upstream_relay, envelope_buffer, }; @@ -352,9 +350,14 @@ impl ServiceState { self.inner.registry.envelope_buffer.as_ref() } - /// Returns the address of the [`ProjectCache`] service. - pub fn project_cache(&self) -> &Addr { - &self.inner.registry.project_cache + /// Returns the address of the [`legacy::ProjectCache`] service. + pub fn legacy_project_cache(&self) -> &Addr { + &self.inner.registry.legacy_project_cache + } + + /// Returns a [`ProjectCacheHandle`]. + pub fn project_cache_handle(&self) -> &ProjectCacheHandle { + &self.inner.registry.project_cache_handle } /// Returns the address of the [`RelayCache`] service. diff --git a/relay-server/src/services/buffer/mod.rs b/relay-server/src/services/buffer/mod.rs index eeb561f16f..ff8d1cfd36 100644 --- a/relay-server/src/services/buffer/mod.rs +++ b/relay-server/src/services/buffer/mod.rs @@ -21,8 +21,7 @@ use crate::services::outcome::DiscardReason; use crate::services::outcome::Outcome; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{DequeuedEnvelope, ProjectCache, UpdateProject}; - +use crate::services::projects::cache::{legacy, ProjectCacheHandle, ProjectChange}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayHistograms}; use crate::utils::ManagedEnvelope; @@ -99,8 +98,8 @@ impl ObservableEnvelopeBuffer { pub struct Services { /// Bounded channel used exclusively to handle backpressure when sending envelopes to the /// project cache. - pub envelopes_tx: mpsc::Sender, - pub project_cache: Addr, + pub envelopes_tx: mpsc::Sender, + pub project_cache_handle: ProjectCacheHandle, pub outcome_aggregator: Addr, pub test_store: Addr, } @@ -157,7 +156,7 @@ impl EnvelopeBufferService { &mut self, buffer: &PolymorphicEnvelopeBuffer, dequeue: bool, - ) -> Option> { + ) -> Option> { relay_statsd::metric!( counter(RelayCounters::BufferReadyToPop) += 1, status = "checking" @@ -218,7 +217,7 @@ impl EnvelopeBufferService { config: &Config, buffer: &mut PolymorphicEnvelopeBuffer, services: &Services, - envelopes_tx_permit: Permit<'a, DequeuedEnvelope>, + envelopes_tx_permit: Permit<'a, legacy::DequeuedEnvelope>, ) -> Result { let sleep = match buffer.peek().await? { Peek::Empty => { @@ -251,7 +250,7 @@ impl EnvelopeBufferService { .pop() .await? .expect("Element disappeared despite exclusive excess"); - envelopes_tx_permit.send(DequeuedEnvelope(envelope)); + envelopes_tx_permit.send(legacy::DequeuedEnvelope(envelope)); Duration::ZERO // try next pop immediately } @@ -269,12 +268,12 @@ impl EnvelopeBufferService { relay_log::trace!("EnvelopeBufferService: requesting project(s) update"); let own_key = envelope.meta().public_key(); - services.project_cache.send(UpdateProject(own_key)); + services.project_cache_handle.fetch(own_key); match envelope.sampling_key() { None => {} Some(sampling_key) if sampling_key == own_key => {} // already sent. Some(sampling_key) => { - services.project_cache.send(UpdateProject(sampling_key)); + services.project_cache_handle.fetch(sampling_key); } } @@ -398,6 +397,7 @@ impl Service for EnvelopeBufferService { buffer.initialize().await; let mut shutdown = Controller::shutdown_handle(); + let mut project_events = self.services.project_cache_handle.changes(); relay_log::info!("EnvelopeBufferService: starting"); loop { @@ -427,6 +427,10 @@ impl Service for EnvelopeBufferService { } } } + Ok(ProjectChange::Ready(project_key)) = project_events.recv() => { + Self::handle_message(&mut buffer, EnvelopeBuffer::Ready(project_key)).await; + sleep = Duration::ZERO; + } Some(message) = rx.recv() => { Self::handle_message(&mut buffer, message).await; sleep = Duration::ZERO; @@ -483,8 +487,8 @@ mod tests { struct EnvelopeBufferServiceResult { service: EnvelopeBufferService, global_tx: watch::Sender, - envelopes_rx: mpsc::Receiver, - project_cache_rx: mpsc::UnboundedReceiver, + envelopes_rx: mpsc::Receiver, + project_cache_handle: ProjectCacheHandle, outcome_aggregator_rx: mpsc::UnboundedReceiver, } @@ -492,6 +496,8 @@ mod tests { config_json: Option, global_config_status: global_config::Status, ) -> EnvelopeBufferServiceResult { + relay_log::init_test!(); + let config_json = config_json.unwrap_or(serde_json::json!({ "spool": { "envelopes": { @@ -504,8 +510,8 @@ mod tests { let memory_stat = MemoryStat::default(); let (global_tx, global_rx) = watch::channel(global_config_status); let (envelopes_tx, envelopes_rx) = mpsc::channel(5); - let (project_cache, project_cache_rx) = Addr::custom(); let (outcome_aggregator, outcome_aggregator_rx) = Addr::custom(); + let project_cache_handle = ProjectCacheHandle::for_test(); let envelope_buffer_service = EnvelopeBufferService::new( config, @@ -513,7 +519,7 @@ mod tests { global_rx, Services { envelopes_tx, - project_cache, + project_cache_handle: project_cache_handle.clone(), outcome_aggregator, test_store: Addr::dummy(), }, @@ -524,7 +530,7 @@ mod tests { service: envelope_buffer_service, global_tx, envelopes_rx, - project_cache_rx, + project_cache_handle, outcome_aggregator_rx, } } @@ -537,7 +543,7 @@ mod tests { service, global_tx: _global_tx, envelopes_rx: _envelopes_rx, - project_cache_rx: _project_cache_rx, + project_cache_handle: _project_cache_handle, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service(None, global_config::Status::Pending); @@ -562,8 +568,8 @@ mod tests { service, global_tx, envelopes_rx, - project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, + .. } = envelope_buffer_service(None, global_config::Status::Pending); let addr = service.start(); @@ -576,7 +582,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); global_tx.send_replace(global_config::Status::Ready(Arc::new( GlobalConfig::default(), @@ -585,7 +590,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 1); - assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -595,9 +599,9 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, global_tx: _global_tx, + .. } = envelope_buffer_service( Some(serde_json::json!({ "spool": { @@ -623,7 +627,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(1000)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); } #[tokio::test] @@ -633,7 +636,7 @@ mod tests { let EnvelopeBufferServiceResult { service, envelopes_rx, - project_cache_rx, + project_cache_handle: _project_cache_handle, mut outcome_aggregator_rx, global_tx: _global_tx, } = envelope_buffer_service( @@ -661,7 +664,6 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; assert_eq!(envelopes_rx.len(), 0); - assert_eq!(project_cache_rx.len(), 0); let outcome = outcome_aggregator_rx.try_recv().unwrap(); assert_eq!(outcome.category, DataCategory::TransactionIndexed); @@ -675,7 +677,7 @@ mod tests { let EnvelopeBufferServiceResult { service, mut envelopes_rx, - mut project_cache_rx, + project_cache_handle, global_tx: _global_tx, outcome_aggregator_rx: _outcome_aggregator_rx, } = envelope_buffer_service( @@ -689,10 +691,10 @@ mod tests { let project_key = envelope.meta().public_key(); addr.send(EnvelopeBuffer::Push(envelope.clone())); + tokio::time::sleep(Duration::from_secs(3)).await; - tokio::time::sleep(Duration::from_secs(1)).await; - - let Some(DequeuedEnvelope(envelope)) = envelopes_rx.recv().await else { + let message = tokio::time::timeout(Duration::from_secs(5), envelopes_rx.recv()); + let Some(legacy::DequeuedEnvelope(envelope)) = message.await.unwrap() else { panic!(); }; @@ -700,20 +702,11 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; - assert_eq!(project_cache_rx.len(), 1); - let message = project_cache_rx.recv().await; - assert!(matches!( - message, - Some(ProjectCache::UpdateProject(key)) if key == project_key - )); + assert_eq!(project_cache_handle.test_num_fetches(), 1); - tokio::time::sleep(Duration::from_secs(1)).await; + tokio::time::sleep(Duration::from_millis(1300)).await; - assert_eq!(project_cache_rx.len(), 1); - assert!(matches!( - message, - Some(ProjectCache::UpdateProject(key)) if key == project_key - )) + assert_eq!(project_cache_handle.test_num_fetches(), 2); } #[tokio::test] @@ -724,8 +717,8 @@ mod tests { service, mut envelopes_rx, global_tx: _global_tx, - project_cache_rx: _project_cache_rx, outcome_aggregator_rx: _outcome_aggregator_rx, + .. } = envelope_buffer_service( None, global_config::Status::Ready(Arc::new(GlobalConfig::default())), @@ -748,7 +741,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, DequeuedEnvelope(..))) + .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) .count(), 5 ); @@ -761,7 +754,7 @@ mod tests { assert_eq!( messages .iter() - .filter(|message| matches!(message, DequeuedEnvelope(..))) + .filter(|message| matches!(message, legacy::DequeuedEnvelope(..))) .count(), 5 ); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index c528ac1643..d5f5ef58d9 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -72,9 +72,7 @@ use crate::services::global_config::GlobalConfigHandle; use crate::services::metrics::{Aggregator, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; use crate::services::processor::event::FiltersStatus; -use crate::services::projects::cache::{ - BucketSource, ProcessMetrics, ProjectCache, UpdateRateLimits, -}; +use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::{ProjectInfo, ProjectState}; use crate::services::test_store::{Capture, TestStore}; use crate::services::upstream::{ @@ -767,7 +765,7 @@ struct ProcessEnvelopeState<'a, Group> { /// Currently active cached rate limits of the project this envelope belongs to. #[cfg_attr(not(feature = "processing"), expect(dead_code))] - rate_limits: RateLimits, + rate_limits: Arc, /// The config of this Relay instance. config: Arc, @@ -884,7 +882,7 @@ pub struct ProcessEnvelope { /// The project info. pub project_info: Arc, /// Currently active cached rate limits for this project. - pub rate_limits: RateLimits, + pub rate_limits: Arc, /// Root sampling project info. pub sampling_project_info: Option>, /// Sampling reservoir counters. @@ -903,16 +901,7 @@ pub struct ProcessEnvelope { /// Additionally, processing applies clock drift correction using the system clock of this Relay, if /// the Envelope specifies the [`sent_at`](Envelope::sent_at) header. #[derive(Debug)] -pub struct ProcessProjectMetrics { - /// The project state the metrics belong to. - /// - /// The project state can be pending, in which case cached rate limits - /// and other project specific operations are skipped and executed once - /// the project state becomes available. - pub project_state: ProjectState, - /// Currently active cached rate limits for this project. - pub rate_limits: RateLimits, - +pub struct ProcessMetrics { /// A list of metric items. pub data: MetricData, /// The target project. @@ -1000,6 +989,32 @@ pub struct ProcessBatchedMetrics { pub sent_at: Option>, } +/// Source information where a metric bucket originates from. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum BucketSource { + /// The metric bucket originated from an internal Relay use case. + /// + /// The metric bucket originates either from within the same Relay + /// or was accepted coming from another Relay which is registered as + /// an internal Relay via Relay's configuration. + Internal, + /// The bucket source originated from an untrusted source. + /// + /// Managed Relays sending extracted metrics are considered external, + /// it's a project use case but it comes from an untrusted source. + External, +} + +impl BucketSource { + /// Infers the bucket source from [`RequestMeta::is_from_internal_relay`]. + pub fn from_meta(meta: &RequestMeta) -> Self { + match meta.is_from_internal_relay() { + true => Self::Internal, + false => Self::External, + } + } +} + /// Metric buckets with additional project. #[derive(Debug, Clone)] pub struct ProjectMetrics { @@ -1008,7 +1023,7 @@ pub struct ProjectMetrics { /// Project info for extracting quotas. pub project_info: Arc, /// Currently cached rate limits. - pub rate_limits: RateLimits, + pub rate_limits: Arc, } /// Encodes metrics into an envelope ready to be sent upstream. @@ -1037,7 +1052,7 @@ pub struct SubmitClientReports { #[derive(Debug)] pub enum EnvelopeProcessor { ProcessEnvelope(Box), - ProcessProjectMetrics(Box), + ProcessProjectMetrics(Box), ProcessBatchedMetrics(Box), EncodeMetrics(Box), SubmitEnvelope(Box), @@ -1068,10 +1083,10 @@ impl FromMessage for EnvelopeProcessor { } } -impl FromMessage for EnvelopeProcessor { +impl FromMessage for EnvelopeProcessor { type Response = NoResponse; - fn from_message(message: ProcessProjectMetrics, _: ()) -> Self { + fn from_message(message: ProcessMetrics, _: ()) -> Self { Self::ProcessProjectMetrics(Box::new(message)) } } @@ -1118,7 +1133,6 @@ pub struct EnvelopeProcessorService { /// Contains the addresses of services that the processor publishes to. pub struct Addrs { - pub project_cache: Addr, pub outcome_aggregator: Addr, pub upstream_relay: Addr, pub test_store: Addr, @@ -1130,7 +1144,6 @@ pub struct Addrs { impl Default for Addrs { fn default() -> Self { Addrs { - project_cache: Addr::dummy(), outcome_aggregator: Addr::dummy(), upstream_relay: Addr::dummy(), test_store: Addr::dummy(), @@ -1145,6 +1158,7 @@ struct InnerProcessor { workers: WorkerGroup, config: Arc, global_config: GlobalConfigHandle, + project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] quotas_pool: Option, @@ -1159,10 +1173,12 @@ struct InnerProcessor { impl EnvelopeProcessorService { /// Creates a multi-threaded envelope processor. + #[cfg_attr(feature = "processing", expect(clippy::too_many_arguments))] pub fn new( pool: ThreadPool, config: Arc, global_config: GlobalConfigHandle, + project_cache: ProjectCacheHandle, cogs: Cogs, #[cfg(feature = "processing")] redis: Option, addrs: Addrs, @@ -1191,6 +1207,7 @@ impl EnvelopeProcessorService { let inner = InnerProcessor { workers: WorkerGroup::new(pool), global_config, + project_cache, cogs, #[cfg(feature = "processing")] quotas_pool: quotas.clone(), @@ -1256,7 +1273,7 @@ impl EnvelopeProcessorService { mut managed_envelope: TypedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: RateLimits, + rate_limits: Arc, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> ProcessEnvelopeState { @@ -1327,10 +1344,11 @@ impl EnvelopeProcessorService { // Update cached rate limits with the freshly computed ones. if !limits.is_empty() { - self.inner.addrs.project_cache.send(UpdateRateLimits::new( - state.managed_envelope.scoping().project_key, - limits, - )); + self.inner + .project_cache + .get(state.managed_envelope.scoping().project_key) + .rate_limits() + .merge(limits); } Ok(()) @@ -1866,7 +1884,7 @@ impl EnvelopeProcessorService { mut managed_envelope: ManagedEnvelope, project_id: ProjectId, project_info: Arc, - rate_limits: RateLimits, + rate_limits: Arc, sampling_project_info: Option>, reservoir_counters: Arc>>, ) -> Result { @@ -2084,10 +2102,8 @@ impl EnvelopeProcessorService { } } - fn handle_process_project_metrics(&self, cogs: &mut Token, message: ProcessProjectMetrics) { - let ProcessProjectMetrics { - project_state, - rate_limits, + fn handle_process_metrics(&self, cogs: &mut Token, message: ProcessMetrics) { + let ProcessMetrics { data, project_key, received_at, @@ -2126,13 +2142,16 @@ impl EnvelopeProcessorService { true }); + let project = self.inner.project_cache.get(project_key); + // Best effort check to filter and rate limit buckets, if there is no project state // available at the current time, we will check again after flushing. - let buckets = match project_state.enabled() { - Some(project_info) => { - self.check_buckets(project_key, &project_info, &rate_limits, buckets) + let buckets = match project.state() { + ProjectState::Enabled(project_info) => { + let rate_limits = project.rate_limits().current_limits(); + self.check_buckets(project_key, project_info, &rate_limits, buckets) } - None => buckets, + _ => buckets, }; relay_log::trace!("merging metric buckets into the aggregator"); @@ -2167,21 +2186,17 @@ impl EnvelopeProcessorService { } }; - let mut feature_weights = FeatureWeights::none(); for (project_key, buckets) in buckets { - feature_weights = feature_weights.merge(relay_metrics::cogs::BySize(&buckets).into()); - - self.inner.addrs.project_cache.send(ProcessMetrics { - data: MetricData::Parsed(buckets), - project_key, - source, - received_at, - sent_at, - }); - } - - if !feature_weights.is_empty() { - cogs.update(feature_weights); + self.handle_process_metrics( + cogs, + ProcessMetrics { + data: MetricData::Parsed(buckets), + project_key, + source, + received_at, + sent_at, + }, + ) } } @@ -2229,7 +2244,7 @@ impl EnvelopeProcessorService { envelope, body, http_encoding, - project_cache: self.inner.addrs.project_cache.clone(), + project_cache: self.inner.project_cache.clone(), })); } Err(error) => { @@ -2382,10 +2397,11 @@ impl EnvelopeProcessorService { Outcome::RateLimited(reason_code), ); - self.inner.addrs.project_cache.send(UpdateRateLimits::new( - item_scoping.scoping.project_key, - limits, - )); + self.inner + .project_cache + .get(item_scoping.scoping.project_key) + .rate_limits() + .merge(limits); } } @@ -2460,9 +2476,10 @@ impl EnvelopeProcessorService { if was_enforced { // Update the rate limits in the project cache. self.inner - .addrs .project_cache - .send(UpdateRateLimits::new(scoping.project_key, rate_limits)); + .get(scoping.project_key) + .rate_limits() + .merge(rate_limits); } } } @@ -2773,7 +2790,7 @@ impl EnvelopeProcessorService { match message { EnvelopeProcessor::ProcessEnvelope(m) => self.handle_process_envelope(*m), EnvelopeProcessor::ProcessProjectMetrics(m) => { - self.handle_process_project_metrics(&mut cogs, *m) + self.handle_process_metrics(&mut cogs, *m) } EnvelopeProcessor::ProcessBatchedMetrics(m) => { self.handle_process_batched_metrics(&mut cogs, *m) @@ -2919,7 +2936,7 @@ pub struct SendEnvelope { envelope: TypedEnvelope, body: Bytes, http_encoding: HttpEncoding, - project_cache: Addr, + project_cache: ProjectCacheHandle, } impl UpstreamRequest for SendEnvelope { @@ -2971,10 +2988,10 @@ impl UpstreamRequest for SendEnvelope { self.envelope.accept(); if let UpstreamRequestError::RateLimited(limits) = error { - self.project_cache.send(UpdateRateLimits::new( - scoping.project_key, - limits.scope(&scoping), - )); + self.project_cache + .get(scoping.project_key) + .rate_limits() + .merge(limits.scope(&scoping)); } } Err(error) => { @@ -3714,16 +3731,14 @@ mod tests { ), (BucketSource::Internal, None), ] { - let message = ProcessProjectMetrics { + let message = ProcessMetrics { data: MetricData::Raw(vec![item.clone()]), - project_state: ProjectState::Pending, - rate_limits: Default::default(), project_key, source, received_at, sent_at: Some(Utc::now()), }; - processor.handle_process_project_metrics(&mut token, message); + processor.handle_process_metrics(&mut token, message); let value = aggregator_rx.recv().await.unwrap(); let Aggregator::MergeBuckets(merge_buckets) = value else { @@ -3741,11 +3756,11 @@ mod tests { let received_at = Utc::now(); let config = Config::default(); - let (project_cache, mut project_cache_rx) = Addr::custom(); + let (aggregator, mut aggregator_rx) = Addr::custom(); let processor = create_test_processor_with_addrs( config, Addrs { - project_cache, + aggregator, ..Default::default() }, ); @@ -3788,78 +3803,72 @@ mod tests { }; processor.handle_process_batched_metrics(&mut token, message); - let value = project_cache_rx.recv().await.unwrap(); - let ProjectCache::ProcessMetrics(pm1) = value else { + let value = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb1) = value else { panic!() }; - let value = project_cache_rx.recv().await.unwrap(); - let ProjectCache::ProcessMetrics(pm2) = value else { + let value = aggregator_rx.recv().await.unwrap(); + let Aggregator::MergeBuckets(mb2) = value else { panic!() }; - let mut messages = vec![pm1, pm2]; + let mut messages = vec![mb1, mb2]; messages.sort_by_key(|pm| pm.project_key); let actual = messages .into_iter() - .map(|pm| (pm.project_key, pm.data, pm.source)) + .map(|pm| (pm.project_key, pm.buckets)) .collect::>(); assert_debug_snapshot!(actual, @r###" [ ( ProjectKey("11111111111111111111111111111111"), - Parsed( - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.response_time@millisecond", - ), - value: Distribution( - [ - 68.0, - ], - ), - tags: { - "route": "user_index", - }, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, - }, + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.response_time@millisecond", + ), + value: Distribution( + [ + 68.0, + ], + ), + tags: { + "route": "user_index", }, - ], - ), - Internal, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, + }, + }, + ], ), ( ProjectKey("22222222222222222222222222222222"), - Parsed( - [ - Bucket { - timestamp: UnixTimestamp(1615889440), - width: 0, - name: MetricName( - "d:custom/endpoint.cache_rate@none", - ), - value: Distribution( - [ - 36.0, - ], - ), - tags: {}, - metadata: BucketMetadata { - merges: 1, - received_at: None, - extracted_from_indexed: false, - }, + [ + Bucket { + timestamp: UnixTimestamp(1615889440), + width: 0, + name: MetricName( + "d:custom/endpoint.cache_rate@none", + ), + value: Distribution( + [ + 36.0, + ], + ), + tags: {}, + metadata: BucketMetadata { + merges: 1, + received_at: None, + extracted_from_indexed: false, }, - ], - ), - Internal, + }, + ], ), ] "###); diff --git a/relay-server/src/services/processor/metrics.rs b/relay-server/src/services/processor/metrics.rs index 0d28285e77..703898e425 100644 --- a/relay-server/src/services/processor/metrics.rs +++ b/relay-server/src/services/processor/metrics.rs @@ -5,7 +5,7 @@ use relay_quotas::Scoping; use crate::metrics::MetricOutcomes; use crate::services::outcome::Outcome; -use crate::services::projects::cache::BucketSource; +use crate::services::processor::BucketSource; use crate::services::projects::project::ProjectInfo; /// Checks if the namespace of the passed bucket is valid. diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 9ef4945a6e..a6452515b2 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -848,7 +848,7 @@ mod tests { extracted_metrics: ProcessingExtractedMetrics::new(), config: Arc::new(Config::default()), project_info, - rate_limits: RateLimits::default(), + rate_limits: Arc::new(RateLimits::default()), sampling_project_info: None, project_id: ProjectId::new(42), managed_envelope: managed_envelope.try_into().unwrap(), diff --git a/relay-server/src/services/projects/cache/handle.rs b/relay-server/src/services/projects/cache/handle.rs new file mode 100644 index 0000000000..415bad5b75 --- /dev/null +++ b/relay-server/src/services/projects/cache/handle.rs @@ -0,0 +1,104 @@ +use std::fmt; +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_system::Addr; +use tokio::sync::broadcast; + +use super::state::Shared; +use crate::services::projects::cache::service::ProjectChange; +use crate::services::projects::cache::{Project, ProjectCache}; + +/// A synchronous handle to the [`ProjectCache`]. +/// +/// The handle allows lock free access to cached projects. It also acts as an interface +/// to the [`ProjectCacheService`](super::ProjectCacheService). +#[derive(Clone)] +pub struct ProjectCacheHandle { + pub(super) shared: Arc, + pub(super) config: Arc, + pub(super) service: Addr, + pub(super) project_changes: broadcast::Sender, +} + +impl ProjectCacheHandle { + /// Returns the current project state for the `project_key`. + pub fn get(&self, project_key: ProjectKey) -> Project<'_> { + let project = self.shared.get_or_create(project_key); + // Always trigger a fetch after retrieving the project to make sure the state is up to date. + self.fetch(project_key); + + Project::new(project, &self.config) + } + + /// Triggers a fetch/update check in the project cache for the supplied project. + pub fn fetch(&self, project_key: ProjectKey) { + self.service.send(ProjectCache::Fetch(project_key)); + } + + /// Returns a subscription to all [`ProjectChange`]'s. + /// + /// This stream notifies the subscriber about project state changes in the project cache. + /// Events may arrive in arbitrary order and be delivered multiple times. + pub fn changes(&self) -> broadcast::Receiver { + self.project_changes.subscribe() + } +} + +impl fmt::Debug for ProjectCacheHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ProjectCacheHandle") + .field("shared", &self.shared) + .finish() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::services::projects::project::ProjectState; + + impl ProjectCacheHandle { + /// Creates a new [`ProjectCacheHandle`] for testing only. + /// + /// A project cache handle created this way does not require a service to function. + pub fn for_test() -> Self { + Self { + shared: Default::default(), + config: Default::default(), + service: Addr::dummy(), + project_changes: broadcast::channel(999_999).0, + } + } + + /// Sets the project state for a project. + /// + /// This can be used to emulate a project cache update in tests. + pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { + let is_pending = state.is_pending(); + self.shared.test_set_project_state(project_key, state); + if is_pending { + let _ = self + .project_changes + .send(ProjectChange::Evicted(project_key)); + } else { + let _ = self.project_changes.send(ProjectChange::Ready(project_key)); + } + } + + /// Returns `true` if there is a project created for this `project_key`. + /// + /// A project is automatically created on access via [`Self::get`]. + pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { + self.shared.test_has_project_created(project_key) + } + + /// The amount of fetches triggered for projects. + /// + /// A fetch is triggered for both [`Self::get`] and [`Self::fetch`]. + pub fn test_num_fetches(&self) -> u64 { + self.service.len() + } + } +} diff --git a/relay-server/src/services/projects/cache.rs b/relay-server/src/services/projects/cache/legacy.rs similarity index 52% rename from relay-server/src/services/projects/cache.rs rename to relay-server/src/services/projects/cache/legacy.rs index d337ee63e9..0dc95464c1 100644 --- a/relay-server/src/services/projects/cache.rs +++ b/relay-server/src/services/projects/cache/legacy.rs @@ -3,146 +3,40 @@ use std::error::Error; use std::sync::Arc; use std::time::Duration; -use crate::extractors::RequestMeta; use crate::services::buffer::{EnvelopeBuffer, EnvelopeBufferError}; use crate::services::global_config; use crate::services::processor::{ - EncodeMetrics, EnvelopeProcessor, MetricData, ProcessEnvelope, ProcessingGroup, ProjectMetrics, + EncodeMetrics, EnvelopeProcessor, ProcessEnvelope, ProcessingGroup, ProjectMetrics, }; +use crate::services::projects::cache::{CheckedEnvelope, ProjectCacheHandle, ProjectChange}; use crate::Envelope; -use chrono::{DateTime, Utc}; use hashbrown::HashSet; use relay_base_schema::project::ProjectKey; use relay_config::Config; -use relay_metrics::Bucket; -use relay_quotas::RateLimits; -use relay_redis::RedisPool; use relay_statsd::metric; -use relay_system::{Addr, FromMessage, Interface, Sender, Service}; +use relay_system::{Addr, FromMessage, Interface, Service}; use tokio::sync::{mpsc, watch}; -use tokio::time::Instant; -use crate::services::metrics::{Aggregator, FlushBuckets}; +use crate::services::metrics::{Aggregator, FlushBuckets, MergeBuckets}; use crate::services::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::services::projects::project::{Project, ProjectFetchState, ProjectSender, ProjectState}; -use crate::services::projects::source::ProjectSource; +use crate::services::projects::project::ProjectState; use crate::services::spooler::{ self, Buffer, BufferService, DequeueMany, Enqueue, QueueKey, RemoveMany, RestoreIndex, UnspooledEnvelope, BATCH_KEY_COUNT, }; use crate::services::test_store::TestStore; -use crate::services::upstream::UpstreamRelay; -use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; -use crate::utils::{GarbageDisposal, ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; - -/// Requests a refresh of a project state from one of the available sources. -/// -/// The project state is resolved in the following precedence: -/// -/// 1. Local file system -/// 2. Redis cache (processing mode only) -/// 3. Upstream (managed and processing mode only) -/// -/// Requests to the upstream are performed via `UpstreamProjectSource`, which internally batches -/// individual requests. -#[derive(Clone, Debug)] -pub struct RequestUpdate { - /// The public key to fetch the project by. - pub project_key: ProjectKey, - /// If true, all caches should be skipped and a fresh state should be computed. - pub no_cache: bool, - /// Previously cached fetch state, if available. - /// - /// The upstream request will include the revision of the currently cached state, - /// if the upstream does not have a different revision, this cached - /// state is re-used and its expiry bumped. - pub cached_state: ProjectFetchState, -} - -/// Returns the project state. -/// -/// The project state is fetched if it is missing or outdated. If `no_cache` is specified, then the -/// state is always refreshed. -#[derive(Debug)] -pub struct GetProjectState { - project_key: ProjectKey, - no_cache: bool, -} - -impl GetProjectState { - /// Fetches the project state and uses the cached version if up-to-date. - pub fn new(project_key: ProjectKey) -> Self { - Self { - project_key, - no_cache: false, - } - } - - /// Fetches the project state and conditionally skips the cache. - pub fn no_cache(mut self, no_cache: bool) -> Self { - self.no_cache = no_cache; - self - } -} - -/// Returns the project state if it is already cached. -/// -/// This is used for cases when we only want to perform operations that do -/// not require waiting for network requests. -#[derive(Debug)] -pub struct GetCachedProjectState { - project_key: ProjectKey, -} - -impl GetCachedProjectState { - pub fn new(project_key: ProjectKey) -> Self { - Self { project_key } - } -} - -/// A checked envelope and associated rate limits. -/// -/// Items violating the rate limits have been removed from the envelope. If all items are removed -/// from the envelope, `None` is returned in place of the envelope. -#[derive(Debug)] -pub struct CheckedEnvelope { - pub envelope: Option, - pub rate_limits: RateLimits, -} - -/// Checks the envelope against project configuration and rate limits. -/// -/// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated -/// project state may be used, or otherwise the envelope is passed through unaltered. -/// -/// To check the envelope, this runs: -/// - Validate origins and public keys -/// - Quotas with a limit of `0` -/// - Cached rate limits -#[derive(Debug)] -pub struct CheckEnvelope { - envelope: ManagedEnvelope, -} - -impl CheckEnvelope { - /// Uses a cached project state and checks the envelope. - pub fn new(envelope: ManagedEnvelope) -> Self { - Self { envelope } - } -} +use crate::statsd::{RelayCounters, RelayGauges, RelayTimers}; +use crate::utils::{ManagedEnvelope, MemoryChecker, RetryBackoff, SleepHandle}; /// Validates the envelope against project configuration and rate limits. /// -/// This ensures internally that the project state is up to date and then runs the same checks as -/// [`CheckEnvelope`]. Once the envelope has been validated, remaining items are forwarded to the -/// next stage: +/// This ensures internally that the project state is up to date . +/// Once the envelope has been validated, remaining items are forwarded to the next stage: /// /// - If the envelope needs dynamic sampling, and the project state is not cached or out of the /// date, the envelopes is spooled and we continue when the state is fetched. /// - Otherwise, the envelope is directly submitted to the [`EnvelopeProcessor`]. -/// -/// [`EnvelopeProcessor`]: crate::services::processor::EnvelopeProcessor #[derive(Debug)] pub struct ValidateEnvelope { envelope: ManagedEnvelope, @@ -154,66 +48,6 @@ impl ValidateEnvelope { } } -#[derive(Debug)] -pub struct UpdateRateLimits { - project_key: ProjectKey, - rate_limits: RateLimits, -} - -impl UpdateRateLimits { - pub fn new(project_key: ProjectKey, rate_limits: RateLimits) -> UpdateRateLimits { - Self { - project_key, - rate_limits, - } - } -} - -/// Source information where a metric bucket originates from. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum BucketSource { - /// The metric bucket originated from an internal Relay use case. - /// - /// The metric bucket originates either from within the same Relay - /// or was accepted coming from another Relay which is registered as - /// an internal Relay via Relay's configuration. - Internal, - /// The bucket source originated from an untrusted source. - /// - /// Managed Relays sending extracted metrics are considered external, - /// it's a project use case but it comes from an untrusted source. - External, -} - -impl From<&RequestMeta> for BucketSource { - fn from(value: &RequestMeta) -> Self { - if value.is_from_internal_relay() { - Self::Internal - } else { - Self::External - } - } -} - -/// Starts the processing flow for received metrics. -/// -/// Enriches the raw data with projcet information and forwards -/// the metrics using [`ProcessProjectMetrics`](crate::services::processor::ProcessProjectMetrics). -#[derive(Debug)] -pub struct ProcessMetrics { - /// A list of metric items. - pub data: MetricData, - /// The target project. - pub project_key: ProjectKey, - /// Whether to keep or reset the metric metadata. - pub source: BucketSource, - /// The wall clock time at which the request was received. - pub received_at: DateTime, - /// The value of the Envelope's [`sent_at`](crate::envelope::Envelope::sent_at) - /// header for clock drift correction. - pub sent_at: Option>, -} - /// Updates the buffer index for [`ProjectKey`] with the [`QueueKey`] keys. /// /// This message is sent from the project buffer in case of the error while fetching the data from @@ -239,59 +73,24 @@ pub struct RefreshIndexCache(pub HashSet); #[derive(Debug)] pub struct DequeuedEnvelope(pub Box); -/// A request to update a project, typically sent by the envelope buffer. -/// -/// This message is similar to [`GetProjectState`], except it has no `no_cache` option -/// and it does not send a response, but sends a signal back to the buffer instead. -pub struct UpdateProject(pub ProjectKey); - -/// A cache for [`ProjectState`]s. -/// -/// The project maintains information about organizations, projects, and project keys along with -/// settings required to ingest traffic to Sentry. Internally, it tries to keep information -/// up-to-date and automatically retires unused old data. -/// -/// To retrieve information from the cache, use [`GetProjectState`] for guaranteed up-to-date -/// information, or [`GetCachedProjectState`] for immediately available but potentially older -/// information. +/// The legacy project cache. /// -/// There are also higher-level operations, such as [`CheckEnvelope`] and [`ValidateEnvelope`] that -/// inspect contents of envelopes for ingestion, as well as [`ProcessMetrics`] to aggregate metrics -/// associated with a project. -/// -/// See the enumerated variants for a full list of available messages for this service. +/// It manages spool v1 and some remaining messages which handle project state. #[derive(Debug)] pub enum ProjectCache { - RequestUpdate(RequestUpdate), - Get(GetProjectState, ProjectSender), - GetCached(GetCachedProjectState, Sender), - CheckEnvelope( - CheckEnvelope, - Sender>, - ), ValidateEnvelope(ValidateEnvelope), - UpdateRateLimits(UpdateRateLimits), - ProcessMetrics(ProcessMetrics), FlushBuckets(FlushBuckets), UpdateSpoolIndex(UpdateSpoolIndex), RefreshIndexCache(RefreshIndexCache), - UpdateProject(ProjectKey), } impl ProjectCache { pub fn variant(&self) -> &'static str { match self { - Self::RequestUpdate(_) => "RequestUpdate", - Self::Get(_, _) => "Get", - Self::GetCached(_, _) => "GetCached", - Self::CheckEnvelope(_, _) => "CheckEnvelope", Self::ValidateEnvelope(_) => "ValidateEnvelope", - Self::UpdateRateLimits(_) => "UpdateRateLimits", - Self::ProcessMetrics(_) => "ProcessMetrics", Self::FlushBuckets(_) => "FlushBuckets", Self::UpdateSpoolIndex(_) => "UpdateSpoolIndex", Self::RefreshIndexCache(_) => "RefreshIndexCache", - Self::UpdateProject(_) => "UpdateProject", } } } @@ -314,41 +113,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: RequestUpdate, _: ()) -> Self { - Self::RequestUpdate(message) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::BroadcastResponse; - - fn from_message(message: GetProjectState, sender: ProjectSender) -> Self { - Self::Get(message, sender) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::AsyncResponse; - - fn from_message(message: GetCachedProjectState, sender: Sender) -> Self { - Self::GetCached(message, sender) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::AsyncResponse>; - - fn from_message( - message: CheckEnvelope, - sender: Sender>, - ) -> Self { - Self::CheckEnvelope(message, sender) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -357,22 +121,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: UpdateRateLimits, _: ()) -> Self { - Self::UpdateRateLimits(message) - } -} - -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: ProcessMetrics, _: ()) -> Self { - Self::ProcessMetrics(message) - } -} - impl FromMessage for ProjectCache { type Response = relay_system::NoResponse; @@ -381,27 +129,6 @@ impl FromMessage for ProjectCache { } } -impl FromMessage for ProjectCache { - type Response = relay_system::NoResponse; - - fn from_message(message: UpdateProject, _: ()) -> Self { - let UpdateProject(project_key) = message; - Self::UpdateProject(project_key) - } -} - -/// Updates the cache with new project state information. -struct UpdateProjectState { - /// The public key to fetch the project by. - project_key: ProjectKey, - - /// New project state information. - state: ProjectFetchState, - - /// If true, all caches should be skipped and a fresh state should be computed. - no_cache: bool, -} - /// Holds the addresses of all services required for [`ProjectCache`]. #[derive(Debug, Clone)] pub struct Services { @@ -411,7 +138,6 @@ pub struct Services { pub outcome_aggregator: Addr, pub project_cache: Addr, pub test_store: Addr, - pub upstream_relay: Addr, } /// Main broker of the [`ProjectCacheService`]. @@ -423,14 +149,7 @@ struct ProjectCacheBroker { config: Arc, memory_checker: MemoryChecker, services: Services, - // Need hashbrown because extract_if is not stable in std yet. - projects: hashbrown::HashMap, - /// Utility for disposing of expired project data in a background thread. - garbage_disposal: GarbageDisposal, - /// Source for fetching project states from the upstream or from disk. - source: ProjectSource, - /// Tx channel used to send the updated project state whenever requested. - state_tx: mpsc::UnboundedSender, + projects: ProjectCacheHandle, /// Handle to schedule periodic unspooling of buffered envelopes (spool V1). spool_v1_unspool_handle: SleepHandle, @@ -493,194 +212,35 @@ impl ProjectCacheBroker { .send(DequeueMany::new(keys, spool_v1.buffer_tx.clone())) } - /// Evict projects that are over its expiry date. - /// - /// Ideally, we would use `check_expiry` to determine expiry here. - /// However, for eviction, we want to add an additional delay, such that we do not delete - /// a project that has expired recently and for which a fetch is already underway in - /// [`super::source::upstream`]. - fn evict_stale_project_caches(&mut self) { - let eviction_start = Instant::now(); - let delta = 2 * self.config.project_cache_expiry() + self.config.project_grace_period(); - - let expired = self - .projects - .extract_if(|_, entry| entry.last_updated_at() + delta <= eviction_start); - - // Defer dropping the projects to a dedicated thread: - let mut count = 0; - for (project_key, project) in expired { - if let Some(spool_v1) = self.spool_v1.as_mut() { - let keys = spool_v1 - .index - .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) - .collect::>(); - - if !keys.is_empty() { - spool_v1.buffer.send(RemoveMany::new(project_key, keys)) - } - } - - self.garbage_disposal.dispose(project); - count += 1; - } - metric!(counter(RelayCounters::EvictingStaleProjectCaches) += count); - - // Log garbage queue size: - let queue_size = self.garbage_disposal.queue_size() as f64; - metric!(gauge(RelayGauges::ProjectCacheGarbageQueueSize) = queue_size); - - metric!(timer(RelayTimers::ProjectStateEvictionDuration) = eviction_start.elapsed()); - } - - fn get_or_create_project(&mut self, project_key: ProjectKey) -> &mut Project { - metric!(histogram(RelayHistograms::ProjectStateCacheSize) = self.projects.len() as u64); - - let config = self.config.clone(); - - self.projects - .entry(project_key) - .and_modify(|_| { - metric!(counter(RelayCounters::ProjectCacheHit) += 1); - }) - .or_insert_with(move || { - metric!(counter(RelayCounters::ProjectCacheMiss) += 1); - Project::new(project_key, config) - }) - } - - /// Updates the [`Project`] with received [`ProjectState`]. - /// - /// If the project state is valid we also send the message to the buffer service to dequeue the - /// envelopes for this project. - fn merge_state(&mut self, message: UpdateProjectState) { - let UpdateProjectState { - project_key, - state, - no_cache, - } = message; - - let project_cache = self.services.project_cache.clone(); - - let old_state = - self.get_or_create_project(project_key) - .update_state(&project_cache, state, no_cache); - - if let Some(old_state) = old_state { - self.garbage_disposal.dispose(old_state); - } - - // Try to schedule unspool if it's not scheduled yet. - self.schedule_unspool(); - - if let Some(envelope_buffer) = self.services.envelope_buffer.as_ref() { - envelope_buffer.send(EnvelopeBuffer::Ready(project_key)) + fn evict_project(&mut self, project_key: ProjectKey) { + let Some(ref mut spool_v1) = self.spool_v1 else { + return; }; - } - - fn handle_request_update(&mut self, message: RequestUpdate) { - let RequestUpdate { - project_key, - no_cache, - cached_state, - } = message; - // Bump the update time of the project in our hashmap to evade eviction. - let project = self.get_or_create_project(project_key); - project.refresh_updated_timestamp(); - let next_attempt = project.next_fetch_attempt(); + let keys = spool_v1 + .index + .extract_if(|key| key.own_key == project_key || key.sampling_key == project_key) + .collect::>(); - let source = self.source.clone(); - let sender = self.state_tx.clone(); - - tokio::spawn(async move { - // Wait on the new attempt time when set. - if let Some(next_attempt) = next_attempt { - tokio::time::sleep_until(next_attempt).await; - } - let state = source - .fetch(project_key, no_cache, cached_state) - .await - .unwrap_or_else(|e| { - relay_log::error!( - error = &e as &dyn Error, - tags.project_key = %project_key, - "Failed to fetch project from source" - ); - // TODO: change this to ProjectFetchState::pending() once we consider it safe to do so. - // see https://github.com/getsentry/relay/pull/4140. - ProjectFetchState::disabled() - }); - - let message = UpdateProjectState { - project_key, - no_cache, - state, - }; - - sender.send(message).ok(); - }); - } - - fn handle_get(&mut self, message: GetProjectState, sender: ProjectSender) { - let GetProjectState { - project_key, - no_cache, - } = message; - let project_cache = self.services.project_cache.clone(); - let project = self.get_or_create_project(project_key); - - project.get_state(project_cache, sender, no_cache); - } - - fn handle_get_cached(&mut self, message: GetCachedProjectState) -> ProjectState { - let project_cache = self.services.project_cache.clone(); - self.get_or_create_project(message.project_key) - .get_cached_state(project_cache, false) + if !keys.is_empty() { + spool_v1.buffer.send(RemoveMany::new(project_key, keys)) + } } - fn handle_check_envelope( - &mut self, - message: CheckEnvelope, - ) -> Result { - let CheckEnvelope { envelope: context } = message; - let project_cache = self.services.project_cache.clone(); - let project_key = context.envelope().meta().public_key(); - if let Some(sampling_key) = context.envelope().sampling_key() { - if sampling_key != project_key { - let sampling_project = self.get_or_create_project(sampling_key); - sampling_project.prefetch(project_cache.clone(), false); - } + fn handle_project_event(&mut self, event: ProjectChange) { + match event { + ProjectChange::Ready(_) => self.schedule_unspool(), + ProjectChange::Evicted(project_key) => self.evict_project(project_key), } - let project = self.get_or_create_project(project_key); - - // Preload the project cache so that it arrives a little earlier in processing. However, - // do not pass `no_cache`. In case the project is rate limited, we do not want to force - // a full reload. Fetching must not block the store request. - project.prefetch(project_cache, false); - - project.check_envelope(context) } /// Handles the processing of the provided envelope. - fn handle_processing(&mut self, key: QueueKey, mut managed_envelope: ManagedEnvelope) { + fn handle_processing(&mut self, mut managed_envelope: ManagedEnvelope) { let project_key = managed_envelope.envelope().meta().public_key(); - let Some(project) = self.projects.get_mut(&project_key) else { - relay_log::error!( - tags.project_key = %project_key, - "project could not be found in the cache", - ); - - let mut project = Project::new(project_key, self.config.clone()); - project.prefetch(self.services.project_cache.clone(), false); - self.projects.insert(project_key, project); - self.enqueue(key, managed_envelope); - return; - }; + let project = self.projects.get(project_key); - let project_cache = self.services.project_cache.clone(); - let project_info = match project.get_cached_state(project_cache.clone(), false) { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -707,20 +267,19 @@ impl ProjectCacheBroker { .. }) = project.check_envelope(managed_envelope) { - let rate_limits = project.current_rate_limits().clone(); - - let reservoir_counters = project.reservoir_counters(); + let rate_limits = project.rate_limits().current_limits(); + let reservoir_counters = project.reservoir_counters().clone(); let sampling_project_info = managed_envelope .envelope() .sampling_key() - .and_then(|key| self.projects.get_mut(&key)) - .and_then(|p| p.get_cached_state(project_cache, false).enabled()) - .filter(|state| state.organization_id == project_info.organization_id); + .map(|key| self.projects.get(key)) + .and_then(|p| p.state().clone().enabled()) + .filter(|info| info.organization_id == project_info.organization_id); let process = ProcessEnvelope { envelope: managed_envelope, - project_info, + project_info: Arc::clone(project_info), rate_limits, sampling_project_info, reservoir_counters, @@ -748,17 +307,13 @@ impl ProjectCacheBroker { envelope: mut managed_envelope, } = message; - let project_cache = self.services.project_cache.clone(); let envelope = managed_envelope.envelope(); // Fetch the project state for our key and make sure it's not invalid. let own_key = envelope.meta().public_key(); - let project = self.get_or_create_project(own_key); + let project = self.projects.get(own_key); - let project_state = - project.get_cached_state(project_cache.clone(), envelope.meta().no_cache()); - - let project_state = match project_state { + let project_state = match project.state() { ProjectState::Enabled(state) => Some(state), ProjectState::Disabled => { managed_envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); @@ -770,12 +325,10 @@ impl ProjectCacheBroker { // Also, fetch the project state for sampling key and make sure it's not invalid. let sampling_key = envelope.sampling_key(); let mut requires_sampling_state = sampling_key.is_some(); - let sampling_state = if let Some(sampling_key) = sampling_key { - let state = self - .get_or_create_project(sampling_key) - .get_cached_state(project_cache, envelope.meta().no_cache()); - match state { - ProjectState::Enabled(state) => Some(state), + let sampling_info = if let Some(sampling_key) = sampling_key { + let sampling_project = self.projects.get(sampling_key); + match sampling_project.state() { + ProjectState::Enabled(info) => Some(Arc::clone(info)), ProjectState::Disabled => { relay_log::trace!("Sampling state is disabled ({sampling_key})"); // We accept events even if its root project has been disabled. @@ -796,50 +349,33 @@ impl ProjectCacheBroker { // Trigger processing once we have a project state and we either have a sampling project // state or we do not need one. if project_state.is_some() - && (sampling_state.is_some() || !requires_sampling_state) + && (sampling_info.is_some() || !requires_sampling_state) && self.memory_checker.check_memory().has_capacity() && self.global_config.is_ready() { // TODO: Add ready project infos to the processing message. relay_log::trace!("Sending envelope to processor"); - return self.handle_processing(key, managed_envelope); + return self.handle_processing(managed_envelope); } relay_log::trace!("Enqueueing envelope"); self.enqueue(key, managed_envelope); } - fn handle_rate_limits(&mut self, message: UpdateRateLimits) { - self.get_or_create_project(message.project_key) - .merge_rate_limits(message.rate_limits); - } - - fn handle_process_metrics(&mut self, message: ProcessMetrics) { - let project_cache = self.services.project_cache.clone(); - - let message = self - .get_or_create_project(message.project_key) - .prefetch(project_cache, false) - .process_metrics(message); - - self.services.envelope_processor.send(message); - } - fn handle_flush_buckets(&mut self, message: FlushBuckets) { let aggregator = self.services.aggregator.clone(); - let project_cache = self.services.project_cache.clone(); let mut no_project = 0; let mut scoped_buckets = BTreeMap::new(); for (project_key, buckets) in message.buckets { - let project = self.get_or_create_project(project_key); + let project = self.projects.get(project_key); - let project_info = match project.current_state() { + let project_info = match project.state() { ProjectState::Pending => { no_project += 1; - // Schedule an update for the project just in case. - project.prefetch(project_cache.clone(), false); - project.return_buckets(&aggregator, buckets); + + // Return the buckets to the aggregator. + aggregator.send(MergeBuckets::new(project_key, buckets)); continue; } ProjectState::Disabled => { @@ -848,13 +384,12 @@ impl ProjectCacheBroker { // Ideally we log outcomes for the metrics here, but currently for metric // outcomes we need a valid scoping, which we cannot construct for disabled // projects. - self.garbage_disposal.dispose(buckets); continue; } ProjectState::Enabled(project_info) => project_info, }; - let Some(scoping) = project.scoping() else { + let Some(scoping) = project_info.scoping(project_key) else { relay_log::error!( tags.project_key = project_key.as_str(), "there is no scoping: dropping {} buckets", @@ -867,8 +402,8 @@ impl ProjectCacheBroker { match scoped_buckets.entry(scoping) { Vacant(entry) => { entry.insert(ProjectMetrics { - project_info, - rate_limits: project.current_rate_limits().clone(), + project_info: Arc::clone(project_info), + rate_limits: project.rate_limits().current_limits(), buckets, }); } @@ -895,16 +430,14 @@ impl ProjectCacheBroker { fn handle_refresh_index_cache(&mut self, message: RefreshIndexCache) { let RefreshIndexCache(index) = message; - let project_cache = self.services.project_cache.clone(); for key in index { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); spool_v1.index.insert(key); - self.get_or_create_project(key.own_key) - .prefetch(project_cache.clone(), false); + + self.projects.fetch(key.own_key); if key.own_key != key.sampling_key { - self.get_or_create_project(key.sampling_key) - .prefetch(project_cache.clone(), false); + self.projects.fetch(key.sampling_key); } } } @@ -918,11 +451,10 @@ impl ProjectCacheBroker { let services = self.services.clone(); let own_key = envelope.meta().public_key(); - let project = self.get_or_create_project(own_key); - let project_state = project.get_cached_state(services.project_cache.clone(), false); + let project = self.projects.get(own_key); // Check if project config is enabled. - let project_info = match project_state { + let project_info = match project.state() { ProjectState::Enabled(info) => info, ProjectState::Disabled => { let mut managed_envelope = ManagedEnvelope::new( @@ -944,8 +476,7 @@ impl ProjectCacheBroker { let sampling_project_info = match sampling_key.map(|sampling_key| { ( sampling_key, - self.get_or_create_project(sampling_key) - .get_cached_state(services.project_cache, false), + self.projects.get(sampling_key).state().clone(), ) }) { Some((_, ProjectState::Enabled(info))) => { @@ -964,8 +495,6 @@ impl ProjectCacheBroker { None => None, }; - let project = self.get_or_create_project(own_key); - // Reassign processing groups and proceed to processing. for (group, envelope) in ProcessingGroup::split_envelope(*envelope) { let managed_envelope = ManagedEnvelope::new( @@ -983,11 +512,11 @@ impl ProjectCacheBroker { continue; // Outcomes are emitted by check_envelope }; - let reservoir_counters = project.reservoir_counters(); + let reservoir_counters = project.reservoir_counters().clone(); services.envelope_processor.send(ProcessEnvelope { envelope: managed_envelope, project_info: project_info.clone(), - rate_limits: project.current_rate_limits().clone(), + rate_limits: project.rate_limits().current_limits(), sampling_project_info: sampling_project_info.clone(), reservoir_counters, }); @@ -996,22 +525,6 @@ impl ProjectCacheBroker { Ok(()) } - fn handle_update_project(&mut self, project_key: ProjectKey) { - let project_cache = self.services.project_cache.clone(); - let envelope_buffer = self.services.envelope_buffer.clone(); - let project = self.get_or_create_project(project_key); - - // If the project is already loaded, inform the envelope buffer. - if !project.current_state().is_pending() { - if let Some(envelope_buffer) = envelope_buffer { - envelope_buffer.send(EnvelopeBuffer::Ready(project_key)); - } - } - - let no_cache = false; - project.prefetch(project_cache, no_cache); - } - /// Returns backoff timeout for an unspool attempt. fn next_unspool_attempt(&mut self) -> Duration { let spool_v1 = self.spool_v1.as_mut().expect("no V1 spool configured"); @@ -1036,15 +549,9 @@ impl ProjectCacheBroker { /// Which includes the own key and the sampling key for the project. /// Note: this function will trigger [`ProjectState`] refresh if it's already expired. fn is_state_cached(&mut self, key: &QueueKey) -> bool { - key.unique_keys().iter().all(|key| { - self.projects.get_mut(key).is_some_and(|project| { - // Returns `Some` if the project is cached otherwise None and also triggers refresh - // in background. - !project - .get_cached_state(self.services.project_cache.clone(), false) - .is_pending() - }) - }) + key.unique_keys() + .iter() + .all(|key| !self.projects.get(*key).state().is_pending()) } /// Iterates the buffer index and tries to unspool the envelopes for projects with a valid @@ -1104,29 +611,18 @@ impl ProjectCacheBroker { fn handle_message(&mut self, message: ProjectCache) { let ty = message.variant(); metric!( - timer(RelayTimers::ProjectCacheMessageDuration), + timer(RelayTimers::LegacyProjectCacheMessageDuration), message = ty, { match message { - ProjectCache::RequestUpdate(message) => self.handle_request_update(message), - ProjectCache::Get(message, sender) => self.handle_get(message, sender), - ProjectCache::GetCached(message, sender) => { - sender.send(self.handle_get_cached(message)) - } - ProjectCache::CheckEnvelope(message, sender) => { - sender.send(self.handle_check_envelope(message)) - } ProjectCache::ValidateEnvelope(message) => { self.handle_validate_envelope(message) } - ProjectCache::UpdateRateLimits(message) => self.handle_rate_limits(message), - ProjectCache::ProcessMetrics(message) => self.handle_process_metrics(message), ProjectCache::FlushBuckets(message) => self.handle_flush_buckets(message), ProjectCache::UpdateSpoolIndex(message) => self.handle_buffer_index(message), ProjectCache::RefreshIndexCache(message) => { self.handle_refresh_index_cache(message) } - ProjectCache::UpdateProject(project) => self.handle_update_project(project), } } ) @@ -1153,11 +649,11 @@ impl ProjectCacheBroker { pub struct ProjectCacheService { config: Arc, memory_checker: MemoryChecker, + project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, /// Bounded channel used exclusively to receive envelopes from the envelope buffer. envelopes_rx: mpsc::Receiver, - redis: Option, } impl ProjectCacheService { @@ -1165,18 +661,18 @@ impl ProjectCacheService { pub fn new( config: Arc, memory_checker: MemoryChecker, + project_cache_handle: ProjectCacheHandle, services: Services, global_config_rx: watch::Receiver, envelopes_rx: mpsc::Receiver, - redis: Option, ) -> Self { Self { config, memory_checker, + project_cache_handle, services, global_config_rx, envelopes_rx, - redis, } } } @@ -1188,22 +684,19 @@ impl Service for ProjectCacheService { let Self { config, memory_checker, + project_cache_handle, services, mut global_config_rx, mut envelopes_rx, - redis, } = self; + let mut project_events = project_cache_handle.changes(); let project_cache = services.project_cache.clone(); let outcome_aggregator = services.outcome_aggregator.clone(); let test_store = services.test_store.clone(); tokio::spawn(async move { - let mut ticker = tokio::time::interval(config.cache_eviction_interval()); relay_log::info!("project cache started"); - // Channel for async project state responses back into the project cache. - let (state_tx, mut state_rx) = mpsc::unbounded_channel(); - let global_config = match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => { relay_log::info!("global config received"); @@ -1256,20 +749,11 @@ impl Service for ProjectCacheService { }), }; - // Main broker that serializes public and internal messages, and triggers project state - // fetches via the project source. let mut broker = ProjectCacheBroker { config: config.clone(), memory_checker, - projects: hashbrown::HashMap::new(), - garbage_disposal: GarbageDisposal::new(), - source: ProjectSource::start( - config.clone(), - services.upstream_relay.clone(), - redis, - ), + projects: project_cache_handle, services, - state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1, global_config, @@ -1280,7 +764,7 @@ impl Service for ProjectCacheService { biased; Ok(()) = global_config_rx.changed() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "update_global_config", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "update_global_config", { match global_config_rx.borrow().clone() { global_config::Status::Ready(_) => broker.set_global_config_ready(), // The watch should only be updated if it gets a new value. @@ -1289,35 +773,30 @@ impl Service for ProjectCacheService { } }) }, - Some(message) = state_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "merge_state", { - broker.merge_state(message) + Ok(project_event) = project_events.recv() => { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_project_event", { + broker.handle_project_event(project_event); }) } // Buffer will not dequeue the envelopes from the spool if there is not enough // permits in `BufferGuard` available. Currently this is 50%. - Some(UnspooledEnvelope { managed_envelope, key }) = buffer_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_processing", { - broker.handle_processing(key, managed_envelope) + Some(UnspooledEnvelope { managed_envelope, .. }) = buffer_rx.recv() => { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_processing", { + broker.handle_processing(managed_envelope) }) }, - _ = ticker.tick() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "evict_project_caches", { - broker.evict_stale_project_caches() - }) - } () = &mut broker.spool_v1_unspool_handle => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "periodic_unspool", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "periodic_unspool", { broker.handle_periodic_unspool() }) } Some(message) = rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_message", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_message", { broker.handle_message(message) }) } Some(message) = envelopes_rx.recv() => { - metric!(timer(RelayTimers::ProjectCacheTaskDuration), task = "handle_envelope", { + metric!(timer(RelayTimers::LegacyProjectCacheTaskDuration), task = "handle_envelope", { broker.handle_envelope(message) }) } @@ -1330,33 +809,6 @@ impl Service for ProjectCacheService { } } -/// Sum type for all objects which need to be discareded through the [`GarbageDisposal`]. -#[derive(Debug)] -#[allow(dead_code)] // Fields are never read, only used for discarding/dropping data. -enum ProjectGarbage { - Project(Project), - ProjectFetchState(ProjectFetchState), - Metrics(Vec), -} - -impl From for ProjectGarbage { - fn from(value: Project) -> Self { - Self::Project(value) - } -} - -impl From for ProjectGarbage { - fn from(value: ProjectFetchState) -> Self { - Self::ProjectFetchState(value) - } -} - -impl From> for ProjectGarbage { - fn from(value: Vec) -> Self { - Self::Metrics(value) - } -} - #[cfg(test)] mod tests { use relay_test::mock_service; @@ -1375,7 +827,6 @@ mod tests { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); - let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); Services { envelope_buffer: None, @@ -1384,13 +835,11 @@ mod tests { project_cache, outcome_aggregator, test_store, - upstream_relay, } } async fn project_cache_broker_setup( services: Services, - state_tx: mpsc::UnboundedSender, buffer_tx: mpsc::UnboundedSender, ) -> (ProjectCacheBroker, Addr) { let config: Arc<_> = Config::from_json_value(serde_json::json!({ @@ -1429,11 +878,8 @@ mod tests { ProjectCacheBroker { config: config.clone(), memory_checker, - projects: hashbrown::HashMap::new(), - garbage_disposal: GarbageDisposal::new(), - source: ProjectSource::start(config, services.upstream_relay.clone(), None), + projects: ProjectCacheHandle::for_test(), services, - state_tx, spool_v1_unspool_handle: SleepHandle::idle(), spool_v1: Some(SpoolV1 { buffer_tx, @@ -1452,13 +898,13 @@ mod tests { relay_log::init_test!(); let services = mocked_services(); - let (state_tx, _) = mpsc::unbounded_channel(); let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); let (mut broker, _buffer_svc) = - project_cache_broker_setup(services.clone(), state_tx, buffer_tx).await; + project_cache_broker_setup(services.clone(), buffer_tx).await; + let projects = broker.projects.clone(); + let mut project_events = projects.changes(); broker.global_config = GlobalConfigStatus::Ready; - let (tx_update, mut rx_update) = mpsc::unbounded_channel(); let (tx_assert, mut rx_assert) = mpsc::unbounded_channel(); let dsn1 = "111d836b15bb49d7bbf99e64295d995b"; @@ -1485,11 +931,12 @@ mod tests { tokio::task::spawn(async move { loop { select! { - + Ok(project_event) = project_events.recv() => { + broker.handle_project_event(project_event); + } Some(assert) = rx_assert.recv() => { assert_eq!(broker.spool_v1.as_ref().unwrap().index.len(), assert); }, - Some(update) = rx_update.recv() => broker.merge_state(update), () = &mut broker.spool_v1_unspool_handle => broker.handle_periodic_unspool(), } } @@ -1498,13 +945,11 @@ mod tests { // Before updating any project states. tx_assert.send(2).unwrap(); - let update_dsn1_project_state = UpdateProjectState { - project_key: ProjectKey::parse(dsn1).unwrap(), - state: ProjectFetchState::allowed(), - no_cache: false, - }; + projects.test_set_project_state( + ProjectKey::parse(dsn1).unwrap(), + ProjectState::new_allowed(), + ); - tx_update.send(update_dsn1_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // One of the project should be unspooled. tx_assert.send(1).unwrap(); @@ -1512,59 +957,15 @@ mod tests { // Schedule some work... tokio::time::sleep(Duration::from_secs(2)).await; - let update_dsn2_project_state = UpdateProjectState { - project_key: ProjectKey::parse(dsn2).unwrap(), - state: ProjectFetchState::allowed(), - no_cache: false, - }; + projects.test_set_project_state( + ProjectKey::parse(dsn2).unwrap(), + ProjectState::new_allowed(), + ); - tx_update.send(update_dsn2_project_state).unwrap(); assert!(buffer_rx.recv().await.is_some()); // The last project should be unspooled. tx_assert.send(0).unwrap(); // Make sure the last assert is tested. tokio::time::sleep(Duration::from_millis(100)).await; } - - #[tokio::test] - async fn handle_processing_without_project() { - let services = mocked_services(); - let (state_tx, _) = mpsc::unbounded_channel(); - let (buffer_tx, mut buffer_rx) = mpsc::unbounded_channel(); - let (mut broker, buffer_svc) = - project_cache_broker_setup(services.clone(), state_tx, buffer_tx.clone()).await; - - let dsn = "111d836b15bb49d7bbf99e64295d995b"; - let project_key = ProjectKey::parse(dsn).unwrap(); - let key = QueueKey { - own_key: project_key, - sampling_key: project_key, - }; - let envelope = ManagedEnvelope::new( - empty_envelope_with_dsn(dsn), - services.outcome_aggregator.clone(), - services.test_store.clone(), - ProcessingGroup::Ungrouped, - ); - - // Index and projects are empty. - assert!(broker.projects.is_empty()); - assert!(broker.spool_v1.as_mut().unwrap().index.is_empty()); - - // Since there is no project we should not process anything but create a project and spool - // the envelope. - broker.handle_processing(key, envelope); - - // Assert that we have a new project and also added an index. - assert!(broker.projects.get(&project_key).is_some()); - assert!(broker.spool_v1.as_mut().unwrap().index.contains(&key)); - - // Check is we actually spooled anything. - buffer_svc.send(DequeueMany::new([key].into(), buffer_tx.clone())); - let UnspooledEnvelope { - managed_envelope, .. - } = buffer_rx.recv().await.unwrap(); - - assert_eq!(key, QueueKey::from_envelope(managed_envelope.envelope())); - } } diff --git a/relay-server/src/services/projects/cache/mod.rs b/relay-server/src/services/projects/cache/mod.rs new file mode 100644 index 0000000000..35ee749ecc --- /dev/null +++ b/relay-server/src/services/projects/cache/mod.rs @@ -0,0 +1,10 @@ +mod handle; +mod project; +mod service; +mod state; + +pub mod legacy; + +pub use self::handle::ProjectCacheHandle; +pub use self::project::{CheckedEnvelope, Project}; +pub use self::service::{ProjectCache, ProjectCacheService, ProjectChange}; diff --git a/relay-server/src/services/projects/cache/project.rs b/relay-server/src/services/projects/cache/project.rs new file mode 100644 index 0000000000..4b9e95a2be --- /dev/null +++ b/relay-server/src/services/projects/cache/project.rs @@ -0,0 +1,299 @@ +use std::sync::Arc; + +use relay_config::Config; +use relay_dynamic_config::Feature; +use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits}; +use relay_sampling::evaluation::ReservoirCounters; + +use crate::envelope::ItemType; +use crate::services::outcome::{DiscardReason, Outcome}; +use crate::services::projects::cache::state::SharedProject; +use crate::services::projects::project::ProjectState; +use crate::utils::{CheckLimits, Enforcement, EnvelopeLimiter, ManagedEnvelope}; + +/// A loaded project. +pub struct Project<'a> { + shared: SharedProject, + config: &'a Config, +} + +impl<'a> Project<'a> { + pub(crate) fn new(shared: SharedProject, config: &'a Config) -> Self { + Self { shared, config } + } + + /// Returns a reference to the currently cached project state. + pub fn state(&self) -> &ProjectState { + self.shared.project_state() + } + + /// Returns a reference to the currently cached rate limits. + pub fn rate_limits(&self) -> &CachedRateLimits { + self.shared.cached_rate_limits() + } + + /// Returns a reference to the currently reservoir counters. + pub fn reservoir_counters(&self) -> &ReservoirCounters { + self.shared.reservoir_counters() + } + + /// Checks the envelope against project configuration and rate limits. + /// + /// When `fetched`, then the project state is ensured to be up to date. When `cached`, an outdated + /// project state may be used, or otherwise the envelope is passed through unaltered. + /// + /// To check the envelope, this runs: + /// - Validate origins and public keys + /// - Quotas with a limit of `0` + /// - Cached rate limits + pub fn check_envelope( + &self, + mut envelope: ManagedEnvelope, + ) -> Result { + let state = match self.state() { + ProjectState::Enabled(state) => Some(Arc::clone(state)), + ProjectState::Disabled => { + // TODO(jjbayer): We should refactor this function to either return a Result or + // handle envelope rejections internally, but not both. + envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); + return Err(DiscardReason::ProjectId); + } + ProjectState::Pending => None, + }; + + let mut scoping = envelope.scoping(); + + if let Some(ref state) = state { + scoping = state.scope_request(envelope.envelope().meta()); + envelope.scope(scoping); + + if let Err(reason) = state.check_envelope(envelope.envelope(), self.config) { + envelope.reject(Outcome::Invalid(reason)); + return Err(reason); + } + } + + let current_limits = self.rate_limits().current_limits(); + + let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); + let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { + Ok(current_limits.check_with_quotas(quotas, item_scoping)) + }); + + let (mut enforcement, mut rate_limits) = + envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; + + let check_nested_spans = state + .as_ref() + .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); + + // If we can extract spans from the event, we want to try and count the number of nested + // spans to correctly emit negative outcomes in case the transaction itself is dropped. + if check_nested_spans { + sync_spans_to_enforcement(&envelope, &mut enforcement); + } + + enforcement.apply_with_outcomes(&mut envelope); + + envelope.update(); + + // Special case: Expose active rate limits for all metric namespaces if there is at least + // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary + // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. + if envelope.envelope().items().any(|i| i.ty().is_metrics()) { + let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); + metrics_scoping.namespace = MetricNamespaceScoping::Any; + rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); + } + + let envelope = if envelope.envelope().is_empty() { + // Individual rate limits have already been issued above + envelope.reject(Outcome::RateLimited(None)); + None + } else { + Some(envelope) + }; + + Ok(CheckedEnvelope { + envelope, + rate_limits, + }) + } +} + +/// A checked envelope and associated rate limits. +/// +/// Items violating the rate limits have been removed from the envelope. If all items are removed +/// from the envelope, `None` is returned in place of the envelope. +#[derive(Debug)] +pub struct CheckedEnvelope { + pub envelope: Option, + pub rate_limits: RateLimits, +} + +/// Adds category limits for the nested spans inside a transaction. +/// +/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted +/// as top-level spans, thus if we limited a transaction, we want to count and emit negative +/// outcomes for each of the spans nested inside that transaction. +fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { + if !enforcement.is_event_active() { + return; + } + + let spans_count = count_nested_spans(envelope); + if spans_count == 0 { + return; + } + + if enforcement.event.is_active() { + enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); + } + + if enforcement.event_indexed.is_active() { + enforcement.spans_indexed = enforcement + .event_indexed + .clone_for(DataCategory::SpanIndexed, spans_count); + } +} + +/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). +fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { + #[derive(Debug, serde::Deserialize)] + struct PartialEvent { + spans: crate::utils::SeqCount, + } + + envelope + .envelope() + .items() + .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) + .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) + // We do + 1, since we count the transaction itself because it will be extracted + // as a span and counted during the slow path of rate limiting. + .map_or(0, |event| event.spans.0 + 1) +} + +#[cfg(test)] +mod tests { + use crate::envelope::{ContentType, Envelope, Item}; + use crate::extractors::RequestMeta; + use crate::services::processor::ProcessingGroup; + use crate::services::projects::project::{ProjectInfo, PublicKeyConfig}; + use relay_base_schema::project::{ProjectId, ProjectKey}; + use relay_event_schema::protocol::EventId; + use serde_json::json; + use smallvec::smallvec; + + use super::*; + + fn create_project(config: &Config, data: Option) -> Project<'_> { + let mut project_info = ProjectInfo { + project_id: Some(ProjectId::new(42)), + ..Default::default() + }; + project_info.public_keys = smallvec![PublicKeyConfig { + public_key: ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(), + numeric_id: None, + }]; + + if let Some(data) = data { + project_info.config = serde_json::from_value(data).unwrap(); + } + + Project::new( + SharedProject::for_test(ProjectState::Enabled(project_info.into())), + config, + ) + } + + fn request_meta() -> RequestMeta { + let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" + .parse() + .unwrap(); + + RequestMeta::new(dsn) + } + + #[test] + fn test_track_nested_spans_outcomes() { + let config = Default::default(); + let project = create_project( + &config, + Some(json!({ + "features": [ + "organizations:indexed-spans-extraction" + ], + "quotas": [{ + "id": "foo", + "categories": ["transaction"], + "window": 3600, + "limit": 0, + "reasonCode": "foo", + }] + })), + ); + + let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); + + let mut transaction = Item::new(ItemType::Transaction); + transaction.set_payload( + ContentType::Json, + r#"{ + "event_id": "52df9022835246eeb317dbd739ccd059", + "type": "transaction", + "transaction": "I have a stale timestamp, but I'm recent!", + "start_timestamp": 1, + "timestamp": 2, + "contexts": { + "trace": { + "trace_id": "ff62a8b040f340bda5d830223def1d81", + "span_id": "bd429c44b67a3eb4" + } + }, + "spans": [ + { + "span_id": "bd429c44b67a3eb4", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + }, + { + "span_id": "bd429c44b67a3eb5", + "start_timestamp": 1, + "timestamp": null, + "trace_id": "ff62a8b040f340bda5d830223def1d81" + } + ] +}"#, + ); + + envelope.add_item(transaction); + + let (outcome_aggregator, mut outcome_aggregator_rx) = relay_system::Addr::custom(); + let (test_store, _) = relay_system::Addr::custom(); + + let managed_envelope = ManagedEnvelope::new( + envelope, + outcome_aggregator.clone(), + test_store, + ProcessingGroup::Transaction, + ); + + project.check_envelope(managed_envelope).unwrap(); + drop(outcome_aggregator); + + let expected = [ + (DataCategory::Transaction, 1), + (DataCategory::TransactionIndexed, 1), + (DataCategory::Span, 3), + (DataCategory::SpanIndexed, 3), + ]; + + for (expected_category, expected_quantity) in expected { + let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); + assert_eq!(outcome.category, expected_category); + assert_eq!(outcome.quantity, expected_quantity); + } + } +} diff --git a/relay-server/src/services/projects/cache/service.rs b/relay-server/src/services/projects/cache/service.rs new file mode 100644 index 0000000000..23a21e553c --- /dev/null +++ b/relay-server/src/services/projects/cache/service.rs @@ -0,0 +1,225 @@ +use std::sync::Arc; + +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_statsd::metric; +use relay_system::Service; +use tokio::sync::{broadcast, mpsc}; + +use crate::services::projects::cache::handle::ProjectCacheHandle; +use crate::services::projects::cache::state::{CompletedFetch, Fetch, ProjectStore}; +use crate::services::projects::project::ProjectState; +use crate::services::projects::source::ProjectSource; +use crate::statsd::{RelayGauges, RelayTimers}; + +/// Size of the broadcast channel for project events. +/// +/// This is set to a value which theoretically should never be reachable, +/// the number of events is approximately bounded by the amount of projects +/// receiving events. +/// +/// It is set to such a large amount because receivers of events currently +/// do not deal with lags in the channel gracefully. +const PROJECT_EVENTS_CHANNEL_SIZE: usize = 512_000; + +/// A cache for projects, which allows concurrent access to the cached projects. +#[derive(Debug)] +pub enum ProjectCache { + /// Schedules another fetch or update for the specified project. + /// + /// A project which is not fetched will eventually expire and be evicted + /// from the cache. Fetches for an already cached project ensure the project + /// is always up to date and not evicted. + Fetch(ProjectKey), +} + +impl ProjectCache { + fn variant(&self) -> &'static str { + match self { + Self::Fetch(_) => "fetch", + } + } +} + +impl relay_system::Interface for ProjectCache {} + +impl relay_system::FromMessage for ProjectCache { + type Response = relay_system::NoResponse; + + fn from_message(message: Self, _: ()) -> Self { + message + } +} + +/// Project life-cycle changes produced by the project cache. +#[derive(Debug, Copy, Clone)] +pub enum ProjectChange { + /// A project was successfully fetched and is now ready to use. + Ready(ProjectKey), + /// A project expired from the cache and was evicted. + Evicted(ProjectKey), +} + +/// A service implementing the [`ProjectCache`] interface. +pub struct ProjectCacheService { + store: ProjectStore, + source: ProjectSource, + config: Arc, + + project_update_rx: mpsc::UnboundedReceiver, + project_update_tx: mpsc::UnboundedSender, + + project_events_tx: broadcast::Sender, +} + +impl ProjectCacheService { + /// Creates a new [`ProjectCacheService`]. + pub fn new(config: Arc, source: ProjectSource) -> Self { + let (project_update_tx, project_update_rx) = mpsc::unbounded_channel(); + let project_events_tx = broadcast::channel(PROJECT_EVENTS_CHANNEL_SIZE).0; + + Self { + store: ProjectStore::default(), + source, + config, + project_update_rx, + project_update_tx, + project_events_tx, + } + } + + /// Consumes and starts a [`ProjectCacheService`]. + /// + /// Returns a [`ProjectCacheHandle`] to access the cache concurrently. + pub fn start(self) -> ProjectCacheHandle { + let (addr, addr_rx) = relay_system::channel(Self::name()); + + let handle = ProjectCacheHandle { + shared: self.store.shared(), + config: Arc::clone(&self.config), + service: addr, + project_changes: self.project_events_tx.clone(), + }; + + self.spawn_handler(addr_rx); + + handle + } + + /// Schedules a new [`Fetch`] and delivers the result to the [`Self::project_update_tx`] channel. + fn schedule_fetch(&self, fetch: Fetch) { + let source = self.source.clone(); + let project_updates = self.project_update_tx.clone(); + + tokio::spawn(async move { + tokio::time::sleep_until(fetch.when()).await; + + let state = match source + .fetch(fetch.project_key(), false, fetch.revision()) + .await + { + Ok(result) => result, + Err(err) => { + relay_log::error!( + tags.project_key = fetch.project_key().as_str(), + error = &err as &dyn std::error::Error, + "failed to fetch project from source: {fetch:?}" + ); + + // TODO: change this to ProjectState::Pending once we consider it safe to do so. + // see https://github.com/getsentry/relay/pull/4140. + ProjectState::Disabled.into() + } + }; + + let _ = project_updates.send(fetch.complete(state)); + }); + } +} + +/// All [`ProjectCacheService`] message handlers. +impl ProjectCacheService { + fn handle_fetch(&mut self, project_key: ProjectKey) { + if let Some(fetch) = self.store.try_begin_fetch(project_key, &self.config) { + self.schedule_fetch(fetch); + } + } + + fn handle_completed_fetch(&mut self, fetch: CompletedFetch) { + let project_key = fetch.project_key(); + + if let Some(fetch) = self.store.complete_fetch(fetch, &self.config) { + relay_log::trace!( + project_key = fetch.project_key().as_str(), + "re-scheduling project fetch: {fetch:?}" + ); + self.schedule_fetch(fetch); + return; + } + + let _ = self + .project_events_tx + .send(ProjectChange::Ready(project_key)); + + metric!( + gauge(RelayGauges::ProjectCacheNotificationChannel) = + self.project_events_tx.len() as u64 + ); + } + + fn handle_evict_stale_projects(&mut self) { + let on_evict = |project_key| { + let _ = self + .project_events_tx + .send(ProjectChange::Evicted(project_key)); + }; + + self.store.evict_stale_projects(&self.config, on_evict); + } + + fn handle_message(&mut self, message: ProjectCache) { + match message { + ProjectCache::Fetch(project_key) => self.handle_fetch(project_key), + } + } +} + +impl relay_system::Service for ProjectCacheService { + type Interface = ProjectCache; + + fn spawn_handler(mut self, mut rx: relay_system::Receiver) { + macro_rules! timed { + ($task:expr, $body:expr) => {{ + let task_name = $task; + metric!( + timer(RelayTimers::ProjectCacheTaskDuration), + task = task_name, + { $body } + ) + }}; + } + + tokio::spawn(async move { + let mut eviction_ticker = tokio::time::interval(self.config.cache_eviction_interval()); + + loop { + tokio::select! { + biased; + + Some(update) = self.project_update_rx.recv() => timed!( + "project_update", + self.handle_completed_fetch(update) + ), + Some(message) = rx.recv() => timed!( + message.variant(), + self.handle_message(message) + ), + _ = eviction_ticker.tick() => timed!( + "evict_stale_projects", + self.handle_evict_stale_projects() + ), + } + } + }); + } +} diff --git a/relay-server/src/services/projects/cache/state.rs b/relay-server/src/services/projects/cache/state.rs new file mode 100644 index 0000000000..ada692cef0 --- /dev/null +++ b/relay-server/src/services/projects/cache/state.rs @@ -0,0 +1,769 @@ +use std::fmt; +use std::sync::Arc; +use tokio::time::Instant; + +use arc_swap::ArcSwap; +use relay_base_schema::project::ProjectKey; +use relay_config::Config; +use relay_quotas::CachedRateLimits; +use relay_sampling::evaluation::ReservoirCounters; +use relay_statsd::metric; + +use crate::services::projects::project::{ProjectState, Revision}; +use crate::services::projects::source::SourceProjectState; +use crate::statsd::{RelayCounters, RelayHistograms}; +use crate::utils::RetryBackoff; + +/// The backing storage for a project cache. +/// +/// Exposes the only interface to delete from [`Shared`], guaranteed by +/// requiring exclusive/mutable access to [`ProjectStore`]. +/// +/// [`Shared`] can be extended through [`Shared::get_or_create`], in which case +/// the private state is missing. Users of [`Shared::get_or_create`] *must* trigger +/// a fetch to create the private state and keep it updated. +/// This guarantees that eventually the project state is populated, but for a undetermined, +/// time it is possible that shared state exists without the respective private state. +#[derive(Default)] +pub struct ProjectStore { + /// The shared state, which can be accessed concurrently. + shared: Arc, + /// The private, mutably exclusive state, used to maintain the project state. + private: hashbrown::HashMap, +} + +impl ProjectStore { + /// Retrieves a [`Shared`] handle which can be freely shared with multiple consumers. + pub fn shared(&self) -> Arc { + Arc::clone(&self.shared) + } + + /// Tries to begin a new fetch for the passed `project_key`. + /// + /// Returns `None` if no fetch is necessary or there is already a fetch ongoing. + /// A returned [`Fetch`] must be scheduled and completed with [`Fetch::complete`] and + /// [`Self::complete_fetch`]. + pub fn try_begin_fetch(&mut self, project_key: ProjectKey, config: &Config) -> Option { + self.get_or_create(project_key, config) + .try_begin_fetch(config) + } + + /// Completes a [`CompletedFetch`] started with [`Self::try_begin_fetch`]. + /// + /// Returns a new [`Fetch`] if another fetch must be scheduled. This happens when the fetched + /// [`ProjectState`] is still pending or already deemed expired. + #[must_use = "an incomplete fetch must be retried"] + pub fn complete_fetch(&mut self, fetch: CompletedFetch, config: &Config) -> Option { + // Eviction is not possible for projects which are currently being fetched. + // Hence if there was a started fetch, the project state must always exist at this stage. + debug_assert!(self.shared.projects.pin().get(&fetch.project_key).is_some()); + debug_assert!(self.private.get(&fetch.project_key).is_some()); + + let mut project = self.get_or_create(fetch.project_key, config); + project.complete_fetch(fetch); + // Schedule another fetch if necessary, usually should only happen if + // the completed fetch is pending. + project.try_begin_fetch(config) + } + + /// Evicts all stale, expired projects from the cache. + /// + /// Evicted projects are passed to the `on_evict` callback. Returns the total amount of evicted + /// projects. + pub fn evict_stale_projects(&mut self, config: &Config, mut on_evict: F) -> usize + where + F: FnMut(ProjectKey), + { + let eviction_start = Instant::now(); + + let expired = self.private.extract_if(|_, private| { + // We only evict projects which have fully expired and are not currently being fetched. + // + // This relies on the fact that a project should never remain in the `pending` state + // for long and is either always being fetched or successfully fetched. + private.last_fetch().map_or(false, |ts| { + ts.check_expiry(eviction_start, config).is_expired() + }) + }); + + let mut evicted = 0; + + let shared = self.shared.projects.pin(); + for (project_key, _) in expired { + let _removed = shared.remove(&project_key); + debug_assert!( + _removed.is_some(), + "an expired project must exist in the shared state" + ); + relay_log::trace!(tags.project_key = project_key.as_str(), "project evicted"); + + evicted += 1; + + on_evict(project_key); + } + drop(shared); + + metric!( + histogram(RelayHistograms::ProjectStateCacheSize) = self.shared.projects.len() as u64, + storage = "shared" + ); + metric!( + histogram(RelayHistograms::ProjectStateCacheSize) = self.private.len() as u64, + storage = "private" + ); + metric!(counter(RelayCounters::EvictingStaleProjectCaches) += evicted as u64); + + evicted + } + + /// Get a reference to the current project or create a new project. + /// + /// For internal use only, a created project must always be fetched immediately. + fn get_or_create(&mut self, project_key: ProjectKey, config: &Config) -> ProjectRef<'_> { + #[cfg(debug_assertions)] + if self.private.contains_key(&project_key) { + // We have exclusive access to the private part, there are no concurrent deletions + // hence if we have a private state there must always be a shared state as well. + // + // The opposite is not true, the shared state may have been created concurrently + // through the shared access. + debug_assert!(self.shared.projects.pin().contains_key(&project_key)); + } + + let private = self + .private + .entry(project_key) + .and_modify(|_| { + metric!(counter(RelayCounters::ProjectCacheHit) += 1); + }) + .or_insert_with(|| { + metric!(counter(RelayCounters::ProjectCacheMiss) += 1); + PrivateProjectState::new(project_key, config) + }); + + let shared = self + .shared + .projects + .pin() + .get_or_insert_with(project_key, Default::default) + .clone(); + + ProjectRef { private, shared } + } +} + +/// The shared and concurrently accessible handle to the project cache. +#[derive(Default)] +pub struct Shared { + projects: papaya::HashMap, +} + +impl Shared { + /// Returns the existing project state or creates a new one. + /// + /// The caller must ensure that the project cache is instructed to + /// [`super::ProjectCache::Fetch`] the retrieved project. + pub fn get_or_create(&self, project_key: ProjectKey) -> SharedProject { + // The fast path, we expect the project to exist. + let projects = self.projects.pin(); + if let Some(project) = projects.get(&project_key) { + return project.to_shared_project(); + } + + // The slow path, try to attempt to insert, somebody else may have been faster, but that's okay. + match projects.try_insert(project_key, Default::default()) { + Ok(inserted) => inserted.to_shared_project(), + Err(occupied) => occupied.current.to_shared_project(), + } + } +} + +/// TEST ONLY bypass to make the project cache mockable. +#[cfg(test)] +impl Shared { + /// Updates the project state for a project. + /// + /// TEST ONLY! + pub fn test_set_project_state(&self, project_key: ProjectKey, state: ProjectState) { + self.projects + .pin() + .get_or_insert_with(project_key, Default::default) + .set_project_state(state); + } + + /// Returns `true` if there exists a shared state for the passed `project_key`. + pub fn test_has_project_created(&self, project_key: ProjectKey) -> bool { + self.projects.pin().contains_key(&project_key) + } +} + +impl fmt::Debug for Shared { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Shared") + .field("num_projects", &self.projects.len()) + .finish() + } +} + +/// A single project from the [`Shared`] project cache. +pub struct SharedProject(Arc); + +impl SharedProject { + /// Returns a reference to the contained [`ProjectState`]. + pub fn project_state(&self) -> &ProjectState { + &self.0.state + } + + /// Returns a reference to the contained [`CachedRateLimits`]. + pub fn cached_rate_limits(&self) -> &CachedRateLimits { + // Exposing cached rate limits may be a bad idea, this allows mutation + // and caching of rate limits for pending projects, which may or may not be fine. + // Although, for now this is fine. + // + // Read only access is easily achievable if we return only the current rate limits. + &self.0.rate_limits + } + + /// Returns a reference to the contained [`ReservoirCounters`]. + pub fn reservoir_counters(&self) -> &ReservoirCounters { + &self.0.reservoir_counters + } +} + +/// TEST ONLY bypass to make the project cache mockable. +#[cfg(test)] +impl SharedProject { + /// Creates a new [`SharedProject`] for testing only. + pub fn for_test(state: ProjectState) -> Self { + Self(Arc::new(SharedProjectStateInner { + state, + ..Default::default() + })) + } +} + +/// Reference to a full project wrapping shared and private state. +struct ProjectRef<'a> { + shared: SharedProjectState, + private: &'a mut PrivateProjectState, +} + +impl ProjectRef<'_> { + fn try_begin_fetch(&mut self, config: &Config) -> Option { + let now = Instant::now(); + self.private + .try_begin_fetch(now, config) + .map(|fetch| fetch.with_revision(self.shared.revision())) + } + + fn complete_fetch(&mut self, fetch: CompletedFetch) { + let now = Instant::now(); + self.private.complete_fetch(&fetch, now); + + // Keep the old state around if the current fetch is pending. + // It may still be useful to callers. + match fetch.state { + SourceProjectState::New(state) if !state.is_pending() => { + self.shared.set_project_state(state); + } + _ => {} + } + } +} + +/// A [`Fetch`] token. +/// +/// When returned it must be executed and completed using [`Self::complete`]. +#[must_use = "a fetch must be executed"] +#[derive(Debug)] +pub struct Fetch { + project_key: ProjectKey, + when: Instant, + revision: Revision, +} + +impl Fetch { + /// Returns the [`ProjectKey`] of the project to fetch. + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + /// Returns when the fetch for the project should be scheduled. + /// + /// This can be now (as soon as possible) or a later point in time, if the project is currently + /// in a backoff. + pub fn when(&self) -> Instant { + self.when + } + + /// Returns the revisions of the currently cached project. + /// + /// If the upstream indicates it does not have a different version of this project + /// we do not need to update the local state. + pub fn revision(&self) -> Revision { + self.revision.clone() + } + + /// Completes the fetch with a result and returns a [`CompletedFetch`]. + pub fn complete(self, state: SourceProjectState) -> CompletedFetch { + CompletedFetch { + project_key: self.project_key, + state, + } + } + + fn with_revision(mut self, revision: Revision) -> Self { + self.revision = revision; + self + } +} + +/// The result of an executed [`Fetch`]. +#[must_use = "a completed fetch must be acted upon"] +#[derive(Debug)] +pub struct CompletedFetch { + project_key: ProjectKey, + state: SourceProjectState, +} + +impl CompletedFetch { + /// Returns the [`ProjectKey`] of the project which was fetched. + pub fn project_key(&self) -> ProjectKey { + self.project_key + } + + /// Returns `true` if the fetch completed with a pending status. + fn is_pending(&self) -> bool { + match &self.state { + SourceProjectState::New(state) => state.is_pending(), + SourceProjectState::NotModified => false, + } + } +} + +/// The state of a project contained in the [`Shared`] project cache. +/// +/// This state is interior mutable and allows updates to the project. +#[derive(Debug, Default, Clone)] +struct SharedProjectState(Arc>); + +impl SharedProjectState { + /// Updates the project state. + fn set_project_state(&self, state: ProjectState) { + let prev = self.0.rcu(|stored| SharedProjectStateInner { + state: state.clone(), + rate_limits: Arc::clone(&stored.rate_limits), + reservoir_counters: Arc::clone(&stored.reservoir_counters), + }); + + // Try clean expired reservoir counters. + // + // We do it after the `rcu`, to not re-run this more often than necessary. + if let Some(state) = state.enabled() { + let config = state.config.sampling.as_ref(); + if let Some(config) = config.and_then(|eb| eb.as_ref().ok()) { + // We can safely use previous here, the `rcu` just replaced the state, the + // reservoir counters did not change. + // + // `try_lock` to not potentially block, it's a best effort cleanup. + if let Ok(mut counters) = prev.reservoir_counters.try_lock() { + counters.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); + } + } + } + } + + /// Extracts and clones the revision from the contained project state. + fn revision(&self) -> Revision { + self.0.as_ref().load().state.revision().clone() + } + + /// Transforms this interior mutable handle to an immutable [`SharedProject`]. + fn to_shared_project(&self) -> SharedProject { + SharedProject(self.0.as_ref().load_full()) + } +} + +/// The data contained in a [`SharedProjectState`]. +/// +/// All fields must be cheap to clone and are ideally just a single `Arc`. +/// Partial updates to [`SharedProjectState`], are performed using `rcu` cloning all fields. +#[derive(Debug, Default)] +struct SharedProjectStateInner { + state: ProjectState, + rate_limits: Arc, + reservoir_counters: ReservoirCounters, +} + +/// Current fetch state for a project. +#[derive(Debug)] +enum FetchState { + /// There is a fetch currently in progress. + InProgress, + /// A successful fetch is pending. + /// + /// This state is essentially only the initial state, a project + /// for the most part should always have a fetch in progress or be + /// in the non-pending state. + Pending { + /// Time when the next fetch should be attempted. + /// + /// `None` means soon as possible. + next_fetch_attempt: Option, + }, + /// There was a successful non-pending fetch. + Complete { + /// Time when the fetch was completed. + last_fetch: LastFetch, + }, +} + +/// Contains all mutable state necessary to maintain the project cache. +struct PrivateProjectState { + /// Project key this state belongs to. + project_key: ProjectKey, + + /// The current fetch state. + state: FetchState, + /// The current backoff used for calculating the next fetch attempt. + /// + /// The backoff is reset after a successful, non-pending fetch. + backoff: RetryBackoff, +} + +impl PrivateProjectState { + fn new(project_key: ProjectKey, config: &Config) -> Self { + Self { + project_key, + state: FetchState::Pending { + next_fetch_attempt: None, + }, + backoff: RetryBackoff::new(config.http_max_retry_interval()), + } + } + + /// Returns the [`LastFetch`] if there is currently no fetch in progress and the project + /// was fetched successfully before. + fn last_fetch(&self) -> Option { + match &self.state { + FetchState::Complete { last_fetch } => Some(*last_fetch), + _ => None, + } + } + + fn try_begin_fetch(&mut self, now: Instant, config: &Config) -> Option { + let when = match &self.state { + FetchState::InProgress {} => { + relay_log::trace!( + tags.project_key = self.project_key.as_str(), + "project fetch skipped, fetch in progress" + ); + return None; + } + FetchState::Pending { next_fetch_attempt } => { + // Schedule a new fetch, even if there is a backoff, it will just be sleeping for a while. + next_fetch_attempt.unwrap_or(now) + } + FetchState::Complete { last_fetch } => { + if last_fetch.check_expiry(now, config).is_fresh() { + // The current state is up to date, no need to start another fetch. + relay_log::trace!( + tags.project_key = self.project_key.as_str(), + "project fetch skipped, already up to date" + ); + return None; + } + now + } + }; + + // Mark a current fetch in progress. + self.state = FetchState::InProgress {}; + + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + attempts = self.backoff.attempt() + 1, + "project state fetch scheduled in {:?}", + when.saturating_duration_since(Instant::now()), + ); + + Some(Fetch { + project_key: self.project_key, + when, + revision: Revision::default(), + }) + } + + fn complete_fetch(&mut self, fetch: &CompletedFetch, now: Instant) { + debug_assert!( + matches!(self.state, FetchState::InProgress), + "fetch completed while there was no current fetch registered" + ); + + if fetch.is_pending() { + self.state = FetchState::Pending { + next_fetch_attempt: now.checked_add(self.backoff.next_backoff()), + }; + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + "project state fetch completed but still pending" + ); + } else { + relay_log::trace!( + tags.project_key = &self.project_key.as_str(), + "project state fetch completed with non-pending config" + ); + self.backoff.reset(); + self.state = FetchState::Complete { + last_fetch: LastFetch(now), + }; + } + } +} + +/// New type containing the last successful fetch time as an [`Instant`]. +#[derive(Debug, Copy, Clone)] +struct LastFetch(Instant); + +impl LastFetch { + /// Returns the [`Expiry`] of the last fetch in relation to `now`. + fn check_expiry(&self, now: Instant, config: &Config) -> Expiry { + let expiry = config.project_cache_expiry(); + let elapsed = now.saturating_duration_since(self.0); + + if elapsed >= expiry + config.project_grace_period() { + Expiry::Expired + } else if elapsed >= expiry { + Expiry::Stale + } else { + Expiry::Fresh + } + } +} + +/// Expiry state of a project. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +enum Expiry { + /// The project state is perfectly up to date. + Fresh, + /// The project state is outdated but events depending on this project state can still be + /// processed. The state should be refreshed in the background though. + Stale, + /// The project state is completely outdated and events need to be buffered up until the new + /// state has been fetched. + Expired, +} + +impl Expiry { + /// Returns `true` if the project is up-to-date and does not need to be fetched. + fn is_fresh(&self) -> bool { + matches!(self, Self::Fresh) + } + + /// Returns `true` if the project is expired and can be evicted. + fn is_expired(&self) -> bool { + matches!(self, Self::Expired) + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + fn collect_evicted(store: &mut ProjectStore, config: &Config) -> Vec { + let mut evicted = Vec::new(); + let num_evicted = store.evict_stale_projects(config, |pk| evicted.push(pk)); + assert_eq!(evicted.len(), num_evicted); + evicted + } + + macro_rules! assert_state { + ($store:ident, $project_key:ident, $state:pat) => { + assert!(matches!( + $store.shared().get_or_create($project_key).project_state(), + $state + )); + }; + } + + #[test] + fn test_store_fetch() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Default::default(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + assert!(fetch.when() < Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Fetch already in progress, nothing to do. + assert!(store.try_begin_fetch(project_key, &config).is_none()); + + // A pending fetch should trigger a new fetch immediately. + let fetch = fetch.complete(ProjectState::Pending.into()); + let fetch = store.complete_fetch(fetch, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + // First backoff is still immediately. + assert!(fetch.when() < Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Pending again. + let fetch = fetch.complete(ProjectState::Pending.into()); + let fetch = store.complete_fetch(fetch, &config).unwrap(); + assert_eq!(fetch.project_key(), project_key); + // This time it needs to be in the future (backoff). + assert!(fetch.when() > Instant::now()); + assert_eq!(fetch.revision().as_str(), None); + assert_state!(store, project_key, ProjectState::Pending); + + // Now complete with disabled. + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Disabled); + + // A new fetch is not yet necessary. + assert!(store.try_begin_fetch(project_key, &config).is_none()); + } + + #[tokio::test(start_paused = true)] + async fn test_store_fetch_pending_does_not_replace_state() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 5, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Disabled); + + tokio::time::advance(Duration::from_secs(6)).await; + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Pending.into()); + // We're returned a new fetch, because the current one completed pending. + let fetch = store.complete_fetch(fetch, &config).unwrap(); + // The old cached state is still available and not replaced. + assert_state!(store, project_key, ProjectState::Disabled); + + let fetch = fetch.complete(ProjectState::new_allowed().into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + assert_state!(store, project_key, ProjectState::Enabled(_)); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects() { + let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 0, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + // 3 seconds is not enough to expire any project. + tokio::time::advance(Duration::from_secs(3)).await; + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + let fetch = store.try_begin_fetch(project_key2, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // A total of 6 seconds should expire the first project. + tokio::time::advance(Duration::from_secs(3)).await; + + assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); + assert_state!(store, project_key1, ProjectState::Pending); + assert_state!(store, project_key2, ProjectState::Disabled); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects_pending_not_expired() { + let project_key1 = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let project_key2 = ProjectKey::parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 0, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key1, &config).unwrap(); + // Create a new project in a pending state, but never fetch it, this should also never expire. + store.shared().get_or_create(project_key2); + + tokio::time::advance(Duration::from_secs(6)).await; + + // No evictions, project is pending. + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + + // Complete the project. + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // Still should not be evicted, because we do have 5 seconds to expire since completion. + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + tokio::time::advance(Duration::from_secs(4)).await; + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key1, ProjectState::Disabled); + + // Just enough to expire the project. + tokio::time::advance(Duration::from_millis(1001)).await; + assert_eq!(collect_evicted(&mut store, &config), vec![project_key1]); + assert_state!(store, project_key1, ProjectState::Pending); + assert_state!(store, project_key2, ProjectState::Pending); + } + + #[tokio::test(start_paused = true)] + async fn test_store_evict_projects_stale() { + let project_key = ProjectKey::parse("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(); + let mut store = ProjectStore::default(); + let config = Config::from_json_value(serde_json::json!({ + "cache": { + "project_expiry": 5, + "project_grace_period": 5, + } + })) + .unwrap(); + + let fetch = store.try_begin_fetch(project_key, &config).unwrap(); + let fetch = fetch.complete(ProjectState::Disabled.into()); + assert!(store.complete_fetch(fetch, &config).is_none()); + + // This is in the grace period, but not yet expired. + tokio::time::advance(Duration::from_millis(9500)).await; + + assert_eq!(collect_evicted(&mut store, &config), Vec::new()); + assert_state!(store, project_key, ProjectState::Disabled); + + // Now it's expired. + tokio::time::advance(Duration::from_secs(1)).await; + + assert_eq!(collect_evicted(&mut store, &config), vec![project_key]); + assert_state!(store, project_key, ProjectState::Pending); + } +} diff --git a/relay-server/src/services/projects/project/state/info.rs b/relay-server/src/services/projects/project/info.rs similarity index 86% rename from relay-server/src/services/projects/project/state/info.rs rename to relay-server/src/services/projects/project/info.rs index eab9b8fb62..d6fb7cfaab 100644 --- a/relay-server/src/services/projects/project/state/info.rs +++ b/relay-server/src/services/projects/project/info.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use chrono::{DateTime, Utc}; use relay_base_schema::project::{ProjectId, ProjectKey}; @@ -16,7 +18,6 @@ use url::Url; use crate::envelope::Envelope; use crate::extractors::RequestMeta; use crate::services::outcome::DiscardReason; -use crate::services::projects::project::PublicKeyConfig; /// Information about an enabled project. /// @@ -32,7 +33,7 @@ pub struct ProjectInfo { /// are faked locally. pub last_change: Option>, /// The revision id of the project config. - pub rev: Option, + pub rev: Revision, /// Indicates that the project is disabled. /// A container of known public keys in the project. /// @@ -57,7 +58,7 @@ pub struct ProjectInfo { pub struct LimitedProjectInfo { pub project_id: Option, pub last_change: Option>, - pub rev: Option, + pub rev: Revision, pub public_keys: SmallVec<[PublicKeyConfig; 1]>, pub slug: Option, #[serde(with = "LimitedProjectConfig")] @@ -187,9 +188,6 @@ impl ProjectInfo { /// This scoping amends `RequestMeta::get_partial_scoping` by adding organization and key info. /// The processor must fetch the full scoping before attempting to rate limit with partial /// scoping. - /// - /// To get the own scoping of this ProjectKey without amending request information, use - /// [`Project::scoping`](crate::services::projects::project::Project::scoping) instead. pub fn scope_request(&self, meta: &RequestMeta) -> Scoping { let mut scoping = meta.get_partial_scoping(); @@ -248,3 +246,51 @@ impl ProjectInfo { self.config.features.has(feature) } } + +/// Represents a public key received from the projectconfig endpoint. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PublicKeyConfig { + /// Public part of key (random hash). + pub public_key: ProjectKey, + + /// The primary key of the DSN in Sentry's main database. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub numeric_id: Option, +} + +/// Represents a project info revision. +/// +/// A revision can be missing, a missing revision never compares equal +/// to any other revision. +/// +/// Revisions are internally reference counted and cheap to create. +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Revision(Option>); + +impl Revision { + /// Returns the revision as a string reference. + /// + /// `None` is a revision that does not match any other revision, + /// not even another revision which is represented as `None`. + pub fn as_str(&self) -> Option<&str> { + self.0.as_deref() + } +} + +impl PartialEq for Revision { + fn eq(&self, other: &Self) -> bool { + match (&self.0, &other.0) { + (None, _) => false, + (_, None) => false, + (Some(left), Some(right)) => left == right, + } + } +} + +impl From<&str> for Revision { + fn from(value: &str) -> Self { + Self(Some(value.into())) + } +} diff --git a/relay-server/src/services/projects/project/mod.rs b/relay-server/src/services/projects/project/mod.rs index 460bff2659..5e23327370 100644 --- a/relay-server/src/services/projects/project/mod.rs +++ b/relay-server/src/services/projects/project/mod.rs @@ -1,753 +1,127 @@ +//! Types that represent the current project state. use std::sync::Arc; -use std::time::Duration; -use relay_base_schema::project::ProjectKey; -use relay_config::Config; -use relay_dynamic_config::{ErrorBoundary, Feature}; -use relay_metrics::Bucket; -use relay_quotas::{CachedRateLimits, DataCategory, MetricNamespaceScoping, RateLimits, Scoping}; -use relay_sampling::evaluation::ReservoirCounters; -use relay_statsd::metric; -use relay_system::{Addr, BroadcastChannel}; use serde::{Deserialize, Serialize}; -use tokio::time::Instant; - -use crate::envelope::ItemType; -use crate::services::metrics::{Aggregator, MergeBuckets}; -use crate::services::outcome::{DiscardReason, Outcome}; -use crate::services::processor::ProcessProjectMetrics; -use crate::services::projects::cache::{ - CheckedEnvelope, ProcessMetrics, ProjectCache, RequestUpdate, -}; -use crate::utils::{Enforcement, SeqCount}; - -use crate::statsd::RelayCounters; -use crate::utils::{CheckLimits, EnvelopeLimiter, ManagedEnvelope, RetryBackoff}; - -pub mod state; - -pub use state::{ - ExpiryState, LimitedParsedProjectState, ParsedProjectState, ProjectFetchState, ProjectInfo, - ProjectState, -}; - -/// Sender type for messages that respond with project states. -pub type ProjectSender = relay_system::BroadcastSender; - -/// Represents a public key received from the projectconfig endpoint. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct PublicKeyConfig { - /// Public part of key (random hash). - pub public_key: ProjectKey, - - /// The primary key of the DSN in Sentry's main database. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub numeric_id: Option, -} - -/// Channel used to respond to state requests (e.g. by project config endpoint). -#[derive(Debug)] -struct StateChannel { - inner: BroadcastChannel, - no_cache: bool, -} - -impl StateChannel { - pub fn new() -> Self { - Self { - inner: BroadcastChannel::new(), - no_cache: false, - } - } - - pub fn no_cache(&mut self, no_cache: bool) -> &mut Self { - self.no_cache = no_cache; - self - } -} - -#[derive(Debug)] -enum GetOrFetch<'a> { - Cached(ProjectState), - Scheduled(&'a mut StateChannel), -} -/// Structure representing organization and project configuration for a project key. -/// -/// This structure no longer uniquely identifies a project. Instead, it identifies a project key. -/// Projects can define multiple keys, in which case this structure is duplicated for each instance. -#[derive(Debug)] -pub struct Project { - backoff: RetryBackoff, - next_fetch_attempt: Option, - last_updated_at: Instant, - project_key: ProjectKey, - config: Arc, - state: ProjectFetchState, - state_channel: Option, - rate_limits: CachedRateLimits, - last_no_cache: Instant, - reservoir_counters: ReservoirCounters, +use relay_base_schema::project::ProjectKey; +use relay_quotas::Scoping; + +mod info; + +pub use self::info::*; + +/// Representation of a project's current state. +#[derive(Clone, Debug, Default)] +pub enum ProjectState { + /// A valid project that is not disabled. + Enabled(Arc), + /// A project that was marked as "gone" by the upstream. This variant does not expose + /// any other project information. + Disabled, + /// A project to which one of the following conditions apply: + /// - The project has not yet been fetched. + /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). + /// - The upstream returned an unparsable project so we have to try again. + /// - The project has expired and must be treated as "has not been fetched". + #[default] + Pending, } -impl Project { - /// Creates a new `Project`. - pub fn new(key: ProjectKey, config: Arc) -> Self { - Project { - backoff: RetryBackoff::new(config.http_max_retry_interval()), - next_fetch_attempt: None, - last_updated_at: Instant::now(), - project_key: key, - state: ProjectFetchState::expired(), - state_channel: None, - rate_limits: CachedRateLimits::new(), - last_no_cache: Instant::now(), - reservoir_counters: Arc::default(), - config, - } - } - - /// Returns the [`ReservoirCounters`] for the project. - pub fn reservoir_counters(&self) -> ReservoirCounters { - self.reservoir_counters.clone() - } - - /// Returns the current [`ProjectState`] attached to the project. - pub fn current_state(&self) -> ProjectState { - self.state.current_state(&self.config) - } - - /// Returns the currently active cached rate limits. - pub fn current_rate_limits(&mut self) -> &RateLimits { - self.rate_limits.current_limits() - } - - /// If a reservoir rule is no longer in the sampling config, we will remove those counters. - fn remove_expired_reservoir_rules(&self) { - let Some(state) = self.current_state().enabled() else { - return; - }; - - let Some(ErrorBoundary::Ok(config)) = state.config.sampling.as_ref() else { - return; - }; - - // Using try_lock to not slow down the project cache service. - if let Ok(mut guard) = self.reservoir_counters.try_lock() { - guard.retain(|key, _| config.rules.iter().any(|rule| rule.id == *key)); - } - } - - pub fn merge_rate_limits(&mut self, rate_limits: RateLimits) { - self.rate_limits.merge(rate_limits); - } - - /// Returns the next attempt `Instant` if backoff is initiated, or None otherwise. - pub fn next_fetch_attempt(&self) -> Option { - self.next_fetch_attempt - } - - /// The last time the project state was updated - pub fn last_updated_at(&self) -> Instant { - self.last_updated_at - } - - /// Refresh the update time of the project in order to delay eviction. - /// - /// Called by the project cache when the project state is refreshed. - pub fn refresh_updated_timestamp(&mut self) { - self.last_updated_at = Instant::now(); - } - - /// Collects internal project state and assembles a [`ProcessProjectMetrics`] message. - pub fn process_metrics(&mut self, message: ProcessMetrics) -> ProcessProjectMetrics { - let project_state = self.current_state(); - let rate_limits = self.rate_limits.current_limits().clone(); - - ProcessProjectMetrics { - project_state, - rate_limits, - - data: message.data, - project_key: message.project_key, - source: message.source, - received_at: message.received_at, - sent_at: message.sent_at, - } - } - - /// Returns a list of buckets back to the aggregator. +impl ProjectState { + /// Project state for an unknown but allowed project. /// - /// This is used to return flushed buckets back to the aggregator if the project has not been - /// loaded at the time of flush. - /// - /// Buckets at this stage are expected to be validated already. - pub fn return_buckets(&self, aggregator: &Addr, buckets: Vec) { - aggregator.send(MergeBuckets::new( - self.project_key, - buckets.into_iter().collect(), - )); - } - - /// Returns `true` if backoff expired and new attempt can be triggered. - fn can_fetch(&self) -> bool { - self.next_fetch_attempt - .map(|next_attempt_at| next_attempt_at <= Instant::now()) - .unwrap_or(true) - } - - /// Triggers a debounced refresh of the project state. - /// - /// If the state is already being updated in the background, this method checks if the request - /// needs to be upgraded with the `no_cache` flag to ensure a more recent update. - fn fetch_state( - &mut self, - project_cache: Addr, - no_cache: bool, - ) -> &mut StateChannel { - // If there is a running request and we do not need to upgrade it to no_cache, or if the - // backoff is started and the new attempt is still somewhere in the future, skip - // scheduling a new fetch. - let should_fetch = !matches!(self.state_channel, Some(ref channel) if channel.no_cache || !no_cache) - && self.can_fetch(); - - let channel = self.state_channel.get_or_insert_with(StateChannel::new); - - if should_fetch { - channel.no_cache(no_cache); - let attempts = self.backoff.attempt() + 1; - relay_log::debug!( - "project {} state requested {attempts} times", - self.project_key - ); - project_cache.send(RequestUpdate { - project_key: self.project_key, - no_cache, - cached_state: self.state.clone(), - }); - } - - channel + /// This state is used for forwarding in Proxy mode. + pub fn new_allowed() -> Self { + Self::Enabled(Arc::new(ProjectInfo { + project_id: None, + last_change: None, + rev: Default::default(), + public_keys: Default::default(), + slug: None, + config: Default::default(), + organization_id: None, + })) } - fn get_or_fetch_state( - &mut self, - project_cache: Addr, - mut no_cache: bool, - ) -> GetOrFetch<'_> { - // count number of times we are looking for the project state - metric!(counter(RelayCounters::ProjectStateGet) += 1); - - // Allow at most 1 no_cache request per second. Gracefully degrade to cached requests. - if no_cache { - if self.last_no_cache.elapsed() < Duration::from_secs(1) { - no_cache = false; - } else { - metric!(counter(RelayCounters::ProjectStateNoCache) += 1); - self.last_no_cache = Instant::now(); - } - } - - let cached_state = match self.state.expiry_state(&self.config) { - // Never use the cached state if `no_cache` is set. - _ if no_cache => None, - // There is no project state that can be used, fetch a state and return it. - ExpiryState::Expired => None, - // The project is semi-outdated, fetch new state but return old one. - ExpiryState::Stale(state) => Some(state.clone()), - // The project is not outdated, return early here to jump over fetching logic below. - ExpiryState::Updated(state) => return GetOrFetch::Cached(state.clone()), - }; - - let channel = self.fetch_state(project_cache, no_cache); - - match cached_state { - Some(state) => GetOrFetch::Cached(state), - None => GetOrFetch::Scheduled(channel), + /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). + pub fn sanitized(self) -> Self { + match self { + Self::Enabled(state) => Self::Enabled(Arc::new(state.as_ref().clone().sanitized())), + Self::Disabled => Self::Disabled, + Self::Pending => Self::Pending, } } - /// Returns the cached project state if it is valid. - /// - /// Depending on the state of the cache, this method takes different action: - /// - /// - If the cached state is up-to-date, this method simply returns `Some`. - /// - If the cached state is stale, this method triggers a refresh in the background and - /// returns `Some`. The stale period can be configured through - /// [`Config::project_grace_period`]. - /// - If there is no cached state or the cached state is fully outdated, this method triggers a - /// refresh in the background and returns `None`. - /// - /// If `no_cache` is set to true, this method always returns `None` and always triggers a - /// background refresh. - /// - /// To wait for a valid state instead, use [`get_state`](Self::get_state). - pub fn get_cached_state( - &mut self, - project_cache: Addr, - no_cache: bool, - ) -> ProjectState { - match self.get_or_fetch_state(project_cache, no_cache) { - GetOrFetch::Cached(state) => state, - GetOrFetch::Scheduled(_) => ProjectState::Pending, - } + /// Whether or not this state is pending. + pub fn is_pending(&self) -> bool { + matches!(self, Self::Pending) } - /// Obtains a valid project state and passes it to the sender once ready. - /// - /// This first checks if the state needs to be updated. This is the case if the project state - /// has passed its cache timeout. The `no_cache` flag forces an update. This does nothing if an - /// update is already running in the background. - /// - /// Independent of updating, _stale_ states are passed to the sender immediately as long as they - /// are in the [grace period](Config::project_grace_period). - pub fn get_state( - &mut self, - project_cache: Addr, - sender: ProjectSender, - no_cache: bool, - ) { - match self.get_or_fetch_state(project_cache, no_cache) { - GetOrFetch::Cached(state) => { - sender.send(state); - } - - GetOrFetch::Scheduled(channel) => { - channel.inner.attach(sender); - } + /// Utility function that returns the project config if enabled. + pub fn enabled(self) -> Option> { + match self { + Self::Enabled(info) => Some(info), + Self::Disabled | Self::Pending => None, } } - /// Ensures the project state gets updated. - /// - /// This first checks if the state needs to be updated. This is the case if the project state - /// has passed its cache timeout. The `no_cache` flag forces another update unless one is - /// already running in the background. - /// - /// If an update is required, the update will start in the background and complete at a later - /// point. Therefore, this method is useful to trigger an update early if it is already clear - /// that the project state will be needed soon. To retrieve an updated state, use - /// [`Project::get_state`] instead. - pub fn prefetch(&mut self, project_cache: Addr, no_cache: bool) -> &mut Self { - self.get_cached_state(project_cache, no_cache); - self - } - - /// Replaces the internal project state with a new one and triggers pending actions. - /// - /// Returns the *old* project state if it was replaced. + /// Returns the revision of the contained project info. /// - /// This flushes pending envelopes from [`ValidateEnvelope`] and - /// notifies all pending receivers from [`get_state`](Self::get_state). - /// - /// `no_cache` should be passed from the requesting call. Updates with `no_cache` will always - /// take precedence. - /// - /// [`ValidateEnvelope`]: crate::services::projects::cache::ValidateEnvelope - pub fn update_state( - &mut self, - project_cache: &Addr, - state: ProjectFetchState, - no_cache: bool, - ) -> Option { - // Initiate the backoff if the incoming state is invalid. Reset it otherwise. - if state.is_pending() { - self.next_fetch_attempt = Instant::now().checked_add(self.backoff.next_backoff()); - } else { - self.next_fetch_attempt = None; - self.backoff.reset(); - } - - let Some(channel) = self.state_channel.take() else { - relay_log::error!(tags.project_key = %self.project_key, "channel is missing for the state update"); - return None; - }; - - // If the channel has `no_cache` set but we are not a `no_cache` request, we have - // been superseeded. Put it back and let the other request take precedence. - if channel.no_cache && !no_cache { - self.state_channel = Some(channel); - return None; + /// `None` if the revision is missing or not available. + pub fn revision(&self) -> Revision { + match &self { + Self::Enabled(info) => info.rev.clone(), + Self::Disabled | Self::Pending => Revision::default(), } - - // If the state is pending, return back the taken channel and schedule state update. - if state.is_pending() { - // Only overwrite if the old state is expired: - let is_expired = matches!(self.state.expiry_state(&self.config), ExpiryState::Expired); - let old_state = match is_expired { - true => Some(std::mem::replace(&mut self.state, state)), - false => None, - }; - - self.state_channel = Some(channel); - let attempts = self.backoff.attempt() + 1; - relay_log::debug!( - "project {} state requested {attempts} times", - self.project_key - ); - - project_cache.send(RequestUpdate { - project_key: self.project_key, - no_cache, - cached_state: self.state.clone(), - }); - return old_state; - } - - let old_state = std::mem::replace(&mut self.state, state); - - // Flush all waiting recipients. - relay_log::debug!("project state {} updated", self.project_key); - channel.inner.send(self.state.current_state(&self.config)); - - self.after_state_updated(); - - Some(old_state) - } - - /// Called after all state validations and after the project state is updated. - /// - /// See also: [`Self::update_state`]. - fn after_state_updated(&mut self) { - // Check if the new sampling config got rid of any reservoir rules we have counters for. - self.remove_expired_reservoir_rules(); } /// Creates `Scoping` for this project if the state is loaded. /// /// Returns `Some` if the project state has been fetched and contains a project identifier, /// otherwise `None`. - pub fn scoping(&self) -> Option { - self.current_state().scoping(self.project_key) - } - - /// Runs the checks on incoming envelopes. - /// - /// See, [`crate::services::projects::cache::CheckEnvelope`] for more information - /// - /// * checks the rate limits - /// * validates the envelope meta in `check_request` - determines whether the given request - /// should be accepted or discarded - /// - /// IMPORTANT: If the [`ProjectState`] is invalid, the `check_request` will be skipped and only - /// rate limits will be validated. This function **must not** be called in the main processing - /// pipeline. - pub fn check_envelope( - &mut self, - mut envelope: ManagedEnvelope, - ) -> Result { - let state = match self.current_state() { - ProjectState::Enabled(state) => Some(state.clone()), - ProjectState::Disabled => { - // TODO(jjbayer): We should refactor this function to either return a Result or - // handle envelope rejections internally, but not both. - envelope.reject(Outcome::Invalid(DiscardReason::ProjectId)); - return Err(DiscardReason::ProjectId); - } - ProjectState::Pending => None, - }; - let mut scoping = envelope.scoping(); - - if let Some(ref state) = state { - scoping = state.scope_request(envelope.envelope().meta()); - envelope.scope(scoping); - - if let Err(reason) = state.check_envelope(envelope.envelope(), &self.config) { - envelope.reject(Outcome::Invalid(reason)); - return Err(reason); - } - } - - let current_limits = self.rate_limits.current_limits(); - - let quotas = state.as_deref().map(|s| s.get_quotas()).unwrap_or(&[]); - let envelope_limiter = EnvelopeLimiter::new(CheckLimits::NonIndexed, |item_scoping, _| { - Ok(current_limits.check_with_quotas(quotas, item_scoping)) - }); - - let (mut enforcement, mut rate_limits) = - envelope_limiter.compute(envelope.envelope_mut(), &scoping)?; - - let check_nested_spans = state - .as_ref() - .is_some_and(|s| s.has_feature(Feature::ExtractSpansFromEvent)); - - // If we can extract spans from the event, we want to try and count the number of nested - // spans to correctly emit negative outcomes in case the transaction itself is dropped. - if check_nested_spans { - sync_spans_to_enforcement(&envelope, &mut enforcement); + pub fn scoping(&self, project_key: ProjectKey) -> Option { + match self { + Self::Enabled(info) => info.scoping(project_key), + _ => None, } - - enforcement.apply_with_outcomes(&mut envelope); - - envelope.update(); - - // Special case: Expose active rate limits for all metric namespaces if there is at least - // one metrics item in the Envelope to communicate backoff to SDKs. This is necessary - // because `EnvelopeLimiter` cannot not check metrics without parsing item contents. - if envelope.envelope().items().any(|i| i.ty().is_metrics()) { - let mut metrics_scoping = scoping.item(DataCategory::MetricBucket); - metrics_scoping.namespace = MetricNamespaceScoping::Any; - rate_limits.merge(current_limits.check_with_quotas(quotas, metrics_scoping)); - } - - let envelope = if envelope.envelope().is_empty() { - // Individual rate limits have already been issued above - envelope.reject(Outcome::RateLimited(None)); - None - } else { - Some(envelope) - }; - - Ok(CheckedEnvelope { - envelope, - rate_limits, - }) } } -/// Adds category limits for the nested spans inside a transaction. -/// -/// On the fast path of rate limiting, we do not have nested spans of a transaction extracted -/// as top-level spans, thus if we limited a transaction, we want to count and emit negative -/// outcomes for each of the spans nested inside that transaction. -fn sync_spans_to_enforcement(envelope: &ManagedEnvelope, enforcement: &mut Enforcement) { - if !enforcement.is_event_active() { - return; - } - - let spans_count = count_nested_spans(envelope); - if spans_count == 0 { - return; - } - - if enforcement.event.is_active() { - enforcement.spans = enforcement.event.clone_for(DataCategory::Span, spans_count); - } - - if enforcement.event_indexed.is_active() { - enforcement.spans_indexed = enforcement - .event_indexed - .clone_for(DataCategory::SpanIndexed, spans_count); +impl From for ProjectState { + fn from(value: ParsedProjectState) -> Self { + let ParsedProjectState { disabled, info } = value; + match disabled { + true => Self::Disabled, + false => Self::Enabled(Arc::new(info)), + } } } -/// Counts the nested spans inside the first transaction envelope item inside the [`Envelope`](crate::envelope::Envelope). -fn count_nested_spans(envelope: &ManagedEnvelope) -> usize { - #[derive(Debug, Deserialize)] - struct PartialEvent { - spans: SeqCount, - } - - envelope - .envelope() - .items() - .find(|item| *item.ty() == ItemType::Transaction && !item.spans_extracted()) - .and_then(|item| serde_json::from_slice::(&item.payload()).ok()) - // We do + 1, since we count the transaction itself because it will be extracted - // as a span and counted during the slow path of rate limiting. - .map_or(0, |event| event.spans.0 + 1) +/// Project state as used in serialization / deserialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParsedProjectState { + /// Whether the project state is disabled. + #[serde(default)] + pub disabled: bool, + /// Project info. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(flatten)] + pub info: ProjectInfo, } -#[cfg(test)] -mod tests { - use crate::envelope::{ContentType, Envelope, Item}; - use crate::extractors::RequestMeta; - use crate::services::processor::ProcessingGroup; - use relay_base_schema::project::ProjectId; - use relay_event_schema::protocol::EventId; - use relay_test::mock_service; - use serde_json::json; - use smallvec::SmallVec; - - use super::*; - - #[test] - fn get_state_expired() { - for expiry in [9999, 0] { - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": expiry, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - // Initialize project with a state - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let project_info = ProjectInfo { - project_id: Some(ProjectId::new(123)), - ..Default::default() - }; - let mut project = Project::new(project_key, config.clone()); - project.state = ProjectFetchState::enabled(project_info); - - if expiry > 0 { - // With long expiry, should get a state - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - } else { - // With 0 expiry, project should expire immediately. No state can be set. - assert!(matches!(project.current_state(), ProjectState::Pending)); - } - } - } - - #[tokio::test] - async fn test_stale_cache() { - let (addr, _) = mock_service("project_cache", (), |&mut (), _| {}); - - let config = Arc::new( - Config::from_json_value(json!( - { - "cache": { - "project_expiry": 100, - "project_grace_period": 0, - "eviction_interval": 9999 // do not evict - } - } - )) - .unwrap(), - ); - - let channel = StateChannel::new(); - - // Initialize project with a state. - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - let mut project = Project::new(project_key, config); - project.state_channel = Some(channel); - project.state = ProjectFetchState::allowed(); - - assert!(project.next_fetch_attempt.is_none()); - // Try to update project with errored project state. - project.update_state(&addr, ProjectFetchState::pending(), false); - // Since we got invalid project state we still keep the old one meaning there - // still must be the project id set. - assert!(matches!(project.current_state(), ProjectState::Enabled(_))); - assert!(project.next_fetch_attempt.is_some()); - - // This tests that we actually initiate the backoff and the backoff mechanism works: - // * first call to `update_state` with invalid ProjectState starts the backoff, but since - // it's the first attemt, we get Duration of 0. - // * second call to `update_state` here will bumpt the `next_backoff` Duration to somehing - // like ~ 1s - // * and now, by calling `fetch_state` we test that it's a noop, since if backoff is active - // we should never fetch - // * without backoff it would just panic, not able to call the ProjectCache service - let channel = StateChannel::new(); - project.state_channel = Some(channel); - project.update_state(&addr, ProjectFetchState::pending(), false); - project.fetch_state(addr, false); - } - - fn create_project(config: Option) -> Project { - let project_key = ProjectKey::parse("e12d836b15bb49d7bbf99e64295d995b").unwrap(); - let mut project = Project::new(project_key, Arc::new(Config::default())); - let mut project_info = ProjectInfo { - project_id: Some(ProjectId::new(42)), - ..Default::default() - }; - let mut public_keys = SmallVec::new(); - public_keys.push(PublicKeyConfig { - public_key: project_key, - numeric_id: None, - }); - project_info.public_keys = public_keys; - if let Some(config) = config { - project_info.config = serde_json::from_value(config).unwrap(); - } - project.state = ProjectFetchState::enabled(project_info); - project - } - - fn request_meta() -> RequestMeta { - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - RequestMeta::new(dsn) - } - - #[test] - fn test_track_nested_spans_outcomes() { - let mut project = create_project(Some(json!({ - "features": [ - "organizations:indexed-spans-extraction" - ], - "quotas": [{ - "id": "foo", - "categories": ["transaction"], - "window": 3600, - "limit": 0, - "reasonCode": "foo", - }] - }))); - - let mut envelope = Envelope::from_request(Some(EventId::new()), request_meta()); - - let mut transaction = Item::new(ItemType::Transaction); - transaction.set_payload( - ContentType::Json, - r#"{ - "event_id": "52df9022835246eeb317dbd739ccd059", - "type": "transaction", - "transaction": "I have a stale timestamp, but I'm recent!", - "start_timestamp": 1, - "timestamp": 2, - "contexts": { - "trace": { - "trace_id": "ff62a8b040f340bda5d830223def1d81", - "span_id": "bd429c44b67a3eb4" - } - }, - "spans": [ - { - "span_id": "bd429c44b67a3eb4", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - }, - { - "span_id": "bd429c44b67a3eb5", - "start_timestamp": 1, - "timestamp": null, - "trace_id": "ff62a8b040f340bda5d830223def1d81" - } - ] -}"#, - ); - - envelope.add_item(transaction); - - let (outcome_aggregator, mut outcome_aggregator_rx) = Addr::custom(); - let (test_store, _) = Addr::custom(); - - let managed_envelope = ManagedEnvelope::new( - envelope, - outcome_aggregator.clone(), - test_store, - ProcessingGroup::Transaction, - ); - - let _ = project.check_envelope(managed_envelope); - drop(outcome_aggregator); - - let expected = [ - (DataCategory::Transaction, 1), - (DataCategory::TransactionIndexed, 1), - (DataCategory::Span, 3), - (DataCategory::SpanIndexed, 3), - ]; - - for (expected_category, expected_quantity) in expected { - let outcome = outcome_aggregator_rx.blocking_recv().unwrap(); - assert_eq!(outcome.category, expected_category); - assert_eq!(outcome.quantity, expected_quantity); - } - } +/// Limited project state for external Relays. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] +pub struct LimitedParsedProjectState { + /// Whether the project state is disabled. + pub disabled: bool, + /// Limited project info for external Relays. + /// + /// This contains no information when `disabled` is `true`, except for + /// public keys in static project configs (see [`crate::services::projects::source::local`]). + #[serde(with = "LimitedProjectInfo")] + #[serde(flatten)] + pub info: ProjectInfo, } diff --git a/relay-server/src/services/projects/project/state/fetch_state.rs b/relay-server/src/services/projects/project/state/fetch_state.rs deleted file mode 100644 index 020150f303..0000000000 --- a/relay-server/src/services/projects/project/state/fetch_state.rs +++ /dev/null @@ -1,162 +0,0 @@ -use std::sync::Arc; - -use tokio::time::Instant; - -use relay_config::Config; -use relay_dynamic_config::ProjectConfig; - -use crate::services::projects::project::state::info::ProjectInfo; -use crate::services::projects::project::ProjectState; - -/// Hides a cached project state and only exposes it if it has not expired. -#[derive(Clone, Debug)] -pub struct ProjectFetchState { - /// The time at which this project state was last updated. - last_fetch: Option, - state: ProjectState, -} - -impl ProjectFetchState { - /// Takes a [`ProjectState`] and sets it's last fetch to the current time. - pub fn new(state: ProjectState) -> Self { - Self { - last_fetch: Some(Instant::now()), - state, - } - } - - /// Refreshes the expiry of the fetch state. - pub fn refresh(old: ProjectFetchState) -> Self { - Self { - last_fetch: Some(Instant::now()), - state: old.state, - } - } - - /// Project state for an unknown but allowed project. - /// - /// This state is used for forwarding in Proxy mode. - pub fn allowed() -> Self { - Self::enabled(ProjectInfo { - project_id: None, - last_change: None, - rev: None, - public_keys: Default::default(), - slug: None, - config: ProjectConfig::default(), - organization_id: None, - }) - } - - /// An enabled project state created from a project info. - pub fn enabled(project_info: ProjectInfo) -> Self { - Self::new(ProjectState::Enabled(Arc::new(project_info))) - } - - // Returns a disabled state. - pub fn disabled() -> Self { - Self::new(ProjectState::Disabled) - } - - /// Returns a pending or invalid state. - pub fn pending() -> Self { - Self::new(ProjectState::Pending) - } - - /// Create a config that immediately counts as expired. - /// - /// This is what [`Project`](crate::services::projects::project::Project) initializes itself with. - pub fn expired() -> Self { - Self { - // Make sure the state immediately qualifies as expired: - last_fetch: None, - state: ProjectState::Pending, - } - } - - /// Sanitizes the contained project state. See [`ProjectState::sanitized`]. - pub fn sanitized(self) -> Self { - let Self { last_fetch, state } = self; - Self { - last_fetch, - state: state.sanitized(), - } - } - - /// Returns `true` if the contained state is pending. - pub fn is_pending(&self) -> bool { - matches!(self.state, ProjectState::Pending) - } - - /// Returns information about the expiry of a project state. - /// - /// If no detailed information is needed, use [`Self::current_state`] instead. - pub fn expiry_state(&self, config: &Config) -> ExpiryState { - match self.check_expiry(config) { - Expiry::Updated => ExpiryState::Updated(&self.state), - Expiry::Stale => ExpiryState::Stale(&self.state), - Expiry::Expired => ExpiryState::Expired, - } - } - - /// Returns the current project state, if it has not yet expired. - pub fn current_state(&self, config: &Config) -> ProjectState { - match self.expiry_state(config) { - ExpiryState::Updated(state) | ExpiryState::Stale(state) => state.clone(), - ExpiryState::Expired => ProjectState::Pending, - } - } - - /// Returns whether this state is outdated and needs to be refetched. - fn check_expiry(&self, config: &Config) -> Expiry { - let Some(last_fetch) = self.last_fetch else { - return Expiry::Expired; - }; - let expiry = match &self.state { - ProjectState::Enabled(info) if info.project_id.is_some() => { - config.project_cache_expiry() - } - _ => config.cache_miss_expiry(), - }; - - let elapsed = last_fetch.elapsed(); - if elapsed >= expiry + config.project_grace_period() { - Expiry::Expired - } else if elapsed >= expiry { - Expiry::Stale - } else { - Expiry::Updated - } - } - - /// Returns the revision of the contained project state. - /// - /// See: [`ProjectState::revision`]. - pub fn revision(&self) -> Option<&str> { - self.state.revision() - } -} - -/// Wrapper for a project state, with expiry information. -#[derive(Clone, Copy, Debug)] -pub enum ExpiryState<'a> { - /// An up-to-date project state. See [`Expiry::Updated`]. - Updated(&'a ProjectState), - /// A stale project state that can still be used. See [`Expiry::Stale`]. - Stale(&'a ProjectState), - /// An expired project state that should not be used. See [`Expiry::Expired`]. - Expired, -} - -/// The expiry status of a project state. Return value of [`ProjectFetchState::check_expiry`]. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -enum Expiry { - /// The project state is perfectly up to date. - Updated, - /// The project state is outdated but events depending on this project state can still be - /// processed. The state should be refreshed in the background though. - Stale, - /// The project state is completely outdated and events need to be buffered up until the new - /// state has been fetched. - Expired, -} diff --git a/relay-server/src/services/projects/project/state/mod.rs b/relay-server/src/services/projects/project/state/mod.rs deleted file mode 100644 index 868ad5ed4a..0000000000 --- a/relay-server/src/services/projects/project/state/mod.rs +++ /dev/null @@ -1,126 +0,0 @@ -//! Types that represent the current project state. -use std::sync::Arc; - -use serde::{Deserialize, Serialize}; - -use relay_base_schema::project::ProjectKey; -use relay_quotas::Scoping; - -mod fetch_state; -mod info; - -pub use self::fetch_state::{ExpiryState, ProjectFetchState}; -pub use self::info::{LimitedProjectInfo, ProjectInfo}; - -/// Representation of a project's current state. -#[derive(Clone, Debug)] -pub enum ProjectState { - /// A valid project that is not disabled. - Enabled(Arc), - /// A project that was marked as "gone" by the upstream. This variant does not expose - /// any other project information. - Disabled, - /// A project to which one of the following conditions apply: - /// - The project has not yet been fetched. - /// - The upstream returned "pending" for this project (see [`crate::services::projects::source::upstream`]). - /// - The upstream returned an unparsable project so we have to try again. - /// - The project has expired and must be treated as "has not been fetched". - Pending, -} - -impl ProjectState { - /// Runs a post-deserialization step to normalize the project config (e.g. legacy fields). - pub fn sanitized(self) -> Self { - match self { - ProjectState::Enabled(state) => { - ProjectState::Enabled(Arc::new(state.as_ref().clone().sanitized())) - } - ProjectState::Disabled => ProjectState::Disabled, - ProjectState::Pending => ProjectState::Pending, - } - } - - /// Whether or not this state is pending. - pub fn is_pending(&self) -> bool { - matches!(self, ProjectState::Pending) - } - - /// Utility function that returns the project config if enabled. - pub fn enabled(self) -> Option> { - match self { - ProjectState::Enabled(info) => Some(info), - ProjectState::Disabled | ProjectState::Pending => None, - } - } - - /// Returns the revision of the contained project info. - /// - /// `None` if the revision is missing or not available. - pub fn revision(&self) -> Option<&str> { - match &self { - ProjectState::Enabled(info) => info.rev.as_deref(), - ProjectState::Disabled => None, - ProjectState::Pending => None, - } - } - - /// Creates `Scoping` for this project if the state is loaded. - /// - /// Returns `Some` if the project state has been fetched and contains a project identifier, - /// otherwise `None`. - pub fn scoping(&self, project_key: ProjectKey) -> Option { - match self { - Self::Enabled(info) => info.scoping(project_key), - _ => None, - } - } -} - -impl From for ProjectState { - fn from(value: ParsedProjectState) -> Self { - let ParsedProjectState { disabled, info } = value; - match disabled { - true => ProjectState::Disabled, - false => ProjectState::Enabled(Arc::new(info)), - } - } -} - -/// Project state as used in serialization / deserialization. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParsedProjectState { - /// Whether the project state is disabled. - #[serde(default)] - pub disabled: bool, - /// Project info. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(flatten)] - pub info: ProjectInfo, -} - -/// Limited project state for external Relays. -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase", remote = "ParsedProjectState")] -pub struct LimitedParsedProjectState { - /// Whether the project state is disabled. - pub disabled: bool, - /// Limited project info for external Relays. - /// - /// This contains no information when `disabled` is `true`, except for - /// public keys in static project configs (see [`crate::services::projects::source::local`]). - #[serde(with = "LimitedProjectInfo")] - #[serde(flatten)] - pub info: ProjectInfo, -} - -/// Response indicating whether a project state needs to be updated -/// or the upstream does not have a newer version. -#[derive(Debug, Clone)] -pub enum UpstreamProjectState { - /// The upstream sent a [`ProjectState`]. - New(ProjectState), - /// The upstream indicated that there is no newer version of the state available. - NotModified, -} diff --git a/relay-server/src/services/projects/source/mod.rs b/relay-server/src/services/projects/source/mod.rs index 5b208ef3ae..1311acfc40 100644 --- a/relay-server/src/services/projects/source/mod.rs +++ b/relay-server/src/services/projects/source/mod.rs @@ -15,8 +15,7 @@ pub mod local; pub mod redis; pub mod upstream; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::ProjectFetchState; +use crate::services::projects::project::{ProjectState, Revision}; use crate::services::upstream::UpstreamRelay; use self::local::{LocalProjectSource, LocalProjectSourceService}; @@ -91,53 +90,55 @@ impl ProjectSource { } } + /// Fetches a project with `project_key` from the configured sources. + /// + /// Returns a fully sanitized project. pub async fn fetch( self, project_key: ProjectKey, no_cache: bool, - cached_state: ProjectFetchState, - ) -> Result { + current_revision: Revision, + ) -> Result { let state_opt = self .local_source .send(FetchOptionalProjectState { project_key }) .await?; if let Some(state) = state_opt { - return Ok(ProjectFetchState::new(state)); + return Ok(state.into()); } match self.config.relay_mode() { - RelayMode::Proxy => return Ok(ProjectFetchState::allowed()), - RelayMode::Static => return Ok(ProjectFetchState::disabled()), - RelayMode::Capture => return Ok(ProjectFetchState::allowed()), + RelayMode::Proxy => return Ok(ProjectState::new_allowed().into()), + RelayMode::Static => return Ok(ProjectState::Disabled.into()), + RelayMode::Capture => return Ok(ProjectState::new_allowed().into()), RelayMode::Managed => (), // Proceed with loading the config from redis or upstream } - let current_revision = cached_state.revision().map(String::from); #[cfg(feature = "processing")] if let Some(redis_source) = self.redis_source { let current_revision = current_revision.clone(); let redis_permit = self.redis_semaphore.acquire().await?; let state_fetch_result = tokio::task::spawn_blocking(move || { - redis_source.get_config_if_changed(project_key, current_revision.as_deref()) + redis_source.get_config_if_changed(project_key, current_revision) }) .await?; drop(redis_permit); match state_fetch_result { // New state fetched from Redis, possibly pending. - Ok(UpstreamProjectState::New(state)) => { + // + // If it is pending, we must fallback to fetching from the upstream. + Ok(SourceProjectState::New(state)) => { let state = state.sanitized(); if !state.is_pending() { - return Ok(ProjectFetchState::new(state)); + return Ok(state.into()); } } // Redis reported that we're holding an up-to-date version of the state already, // refresh the state and return the old cached state again. - Ok(UpstreamProjectState::NotModified) => { - return Ok(ProjectFetchState::refresh(cached_state)) - } + Ok(SourceProjectState::NotModified) => return Ok(SourceProjectState::NotModified), Err(error) => { relay_log::error!( error = &error as &dyn std::error::Error, @@ -156,10 +157,10 @@ impl ProjectSource { }) .await?; - match state { - UpstreamProjectState::New(state) => Ok(ProjectFetchState::new(state.sanitized())), - UpstreamProjectState::NotModified => Ok(ProjectFetchState::refresh(cached_state)), - } + Ok(match state { + SourceProjectState::New(state) => SourceProjectState::New(state.sanitized()), + SourceProjectState::NotModified => SourceProjectState::NotModified, + }) } } @@ -189,10 +190,7 @@ pub struct FetchProjectState { /// The upstream is allowed to omit full project configs /// for requests for which the requester already has the most /// recent revision. - /// - /// Settings this to `None` will essentially always re-fetch - /// the project config. - pub current_revision: Option, + pub current_revision: Revision, /// If true, all caches should be skipped and a fresh state should be computed. pub no_cache: bool, @@ -208,3 +206,19 @@ impl FetchOptionalProjectState { self.project_key } } + +/// Response indicating whether a project state needs to be updated +/// or the upstream does not have a newer version. +#[derive(Debug, Clone)] +pub enum SourceProjectState { + /// The upstream sent a [`ProjectState`]. + New(ProjectState), + /// The upstream indicated that there is no newer version of the state available. + NotModified, +} + +impl From for SourceProjectState { + fn from(value: ProjectState) -> Self { + Self::New(value) + } +} diff --git a/relay-server/src/services/projects/source/redis.rs b/relay-server/src/services/projects/source/redis.rs index e2a520f4e7..7109c408cd 100644 --- a/relay-server/src/services/projects/source/redis.rs +++ b/relay-server/src/services/projects/source/redis.rs @@ -5,8 +5,8 @@ use relay_config::Config; use relay_redis::{RedisError, RedisPool}; use relay_statsd::metric; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::{ParsedProjectState, ProjectState}; +use crate::services::projects::project::{ParsedProjectState, ProjectState, Revision}; +use crate::services::projects::source::SourceProjectState; use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; #[derive(Debug, Clone)] @@ -60,13 +60,13 @@ impl RedisProjectSource { pub fn get_config_if_changed( &self, key: ProjectKey, - revision: Option<&str>, - ) -> Result { + revision: Revision, + ) -> Result { let mut client = self.redis.client()?; let mut connection = client.connection()?; // Only check for the revision if we were passed a revision. - if let Some(revision) = revision { + if let Some(revision) = revision.as_str() { let current_revision: Option = relay_redis::redis::cmd("GET") .arg(self.get_redis_rev_key(key)) .query(&mut connection) @@ -80,7 +80,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "revision", ); - return Ok(UpstreamProjectState::NotModified); + return Ok(SourceProjectState::NotModified); } } @@ -94,7 +94,7 @@ impl RedisProjectSource { counter(RelayCounters::ProjectStateRedis) += 1, hit = "false" ); - return Ok(UpstreamProjectState::New(ProjectState::Pending)); + return Ok(SourceProjectState::New(ProjectState::Pending)); }; let response = ProjectState::from(parse_redis_response(response.as_slice())?); @@ -106,18 +106,18 @@ impl RedisProjectSource { // // While this is theoretically possible this should always been handled using the above revision // check using the additional Redis key. - if revision.is_some() && response.revision() == revision { + if response.revision() == revision { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config_revision" ); - Ok(UpstreamProjectState::NotModified) + Ok(SourceProjectState::NotModified) } else { metric!( counter(RelayCounters::ProjectStateRedis) += 1, hit = "project_config" ); - Ok(UpstreamProjectState::New(response)) + Ok(SourceProjectState::New(response)) } } diff --git a/relay-server/src/services/projects/source/upstream.rs b/relay-server/src/services/projects/source/upstream.rs index bcbfd5a279..6f051c5ec4 100644 --- a/relay-server/src/services/projects/source/upstream.rs +++ b/relay-server/src/services/projects/source/upstream.rs @@ -18,10 +18,9 @@ use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::Instant; -use crate::services::projects::project::state::UpstreamProjectState; -use crate::services::projects::project::ParsedProjectState; -use crate::services::projects::project::ProjectState; -use crate::services::projects::source::FetchProjectState; +use crate::services::projects::project::Revision; +use crate::services::projects::project::{ParsedProjectState, ProjectState}; +use crate::services::projects::source::{FetchProjectState, SourceProjectState}; use crate::services::upstream::{ Method, RequestPriority, SendQuery, UpstreamQuery, UpstreamRelay, UpstreamRequestError, }; @@ -41,7 +40,7 @@ pub struct GetProjectStates { /// /// The revisions are mapped by index to the project key, /// this is a separate field to keep the API compatible. - revisions: Vec>, + revisions: Vec, /// If `true` the upstream should return a full configuration. /// /// Upstreams will ignore this for non-internal Relays. @@ -99,10 +98,10 @@ impl UpstreamQuery for GetProjectStates { #[derive(Debug)] struct ProjectStateChannel { // Main broadcast channel. - channel: BroadcastChannel, + channel: BroadcastChannel, // Additional broadcast channels tracked from merge operations. - merged: Vec>, - revision: Option, + merged: Vec>, + revision: Revision, deadline: Instant, no_cache: bool, attempts: u64, @@ -114,8 +113,8 @@ struct ProjectStateChannel { impl ProjectStateChannel { pub fn new( - sender: BroadcastSender, - revision: Option, + sender: BroadcastSender, + revision: Revision, timeout: Duration, no_cache: bool, ) -> Self { @@ -142,18 +141,14 @@ impl ProjectStateChannel { /// If the new revision is different from the contained revision this clears the revision. /// To not have multiple fetches per revision per batch, we need to find a common denominator /// for requests with different revisions, which is always to fetch the full project config. - pub fn attach( - &mut self, - sender: BroadcastSender, - revision: Option, - ) { + pub fn attach(&mut self, sender: BroadcastSender, revision: Revision) { self.channel.attach(sender); if self.revision != revision { - self.revision = None; + self.revision = Revision::default(); } } - pub fn send(self, state: UpstreamProjectState) { + pub fn send(self, state: SourceProjectState) { for channel in self.merged { channel.send(state.clone()); } @@ -179,7 +174,7 @@ impl ProjectStateChannel { self.merged.push(channel); self.merged.extend(merged); if self.revision != revision { - self.revision = None; + self.revision = Revision::default(); } self.deadline = self.deadline.max(deadline); self.no_cache |= no_cache; @@ -198,16 +193,16 @@ type ProjectStateChannels = HashMap; /// Internally it maintains the buffer queue of the incoming requests, which got scheduled to fetch the /// state and takes care of the backoff in case there is a problem with the requests. #[derive(Debug)] -pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); +pub struct UpstreamProjectSource(FetchProjectState, BroadcastSender); impl Interface for UpstreamProjectSource {} impl FromMessage for UpstreamProjectSource { - type Response = BroadcastResponse; + type Response = BroadcastResponse; fn from_message( message: FetchProjectState, - sender: BroadcastSender, + sender: BroadcastSender, ) -> Self { Self(message, sender) } @@ -467,7 +462,7 @@ impl UpstreamProjectSourceService { let mut result = "ok"; let state = if response.unchanged.contains(&key) { result = "ok_unchanged"; - UpstreamProjectState::NotModified + SourceProjectState::NotModified } else { let state = response .configs @@ -485,7 +480,7 @@ impl UpstreamProjectSourceService { ErrorBoundary::Ok(Some(state)) => state.into(), }; - UpstreamProjectState::New(state) + SourceProjectState::New(state) }; metric!( @@ -697,7 +692,7 @@ mod tests { let mut response1 = service.send(FetchProjectState { project_key, - current_revision: Some("123".to_owned()), + current_revision: "123".into(), no_cache: false, }); @@ -708,7 +703,7 @@ mod tests { // request, after responding to the first inflight request. let mut response2 = service.send(FetchProjectState { project_key, - current_revision: None, + current_revision: Revision::default(), no_cache: false, }); @@ -732,8 +727,8 @@ mod tests { .await; let (response1, response2) = futures::future::join(response1, response2).await; - assert!(matches!(response1, Ok(UpstreamProjectState::NotModified))); - assert!(matches!(response2, Ok(UpstreamProjectState::NotModified))); + assert!(matches!(response1, Ok(SourceProjectState::NotModified))); + assert!(matches!(response2, Ok(SourceProjectState::NotModified))); // No more messages to upstream expected. assert!(upstream_rx.try_recv().is_err()); diff --git a/relay-server/src/services/spooler/mod.rs b/relay-server/src/services/spooler/mod.rs index 8aa90b7b2c..6201ff9b7a 100644 --- a/relay-server/src/services/spooler/mod.rs +++ b/relay-server/src/services/spooler/mod.rs @@ -54,7 +54,7 @@ use tokio::sync::mpsc; use crate::envelope::{Envelope, EnvelopeError}; use crate::services::outcome::TrackOutcome; use crate::services::processor::ProcessingGroup; -use crate::services::projects::cache::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; +use crate::services::projects::cache::legacy::{ProjectCache, RefreshIndexCache, UpdateSpoolIndex}; use crate::services::test_store::TestStore; use crate::statsd::{RelayCounters, RelayGauges, RelayHistograms, RelayTimers}; use crate::utils::{ManagedEnvelope, MemoryChecker}; @@ -156,7 +156,6 @@ impl QueueKey { /// It's sent in response to [`DequeueMany`] message from the [`ProjectCache`]. #[derive(Debug)] pub struct UnspooledEnvelope { - pub key: QueueKey, pub managed_envelope: ManagedEnvelope, } @@ -375,7 +374,6 @@ impl InMemory { self.envelope_count = self.envelope_count.saturating_sub(1); sender .send(UnspooledEnvelope { - key, managed_envelope: envelope, }) .ok(); @@ -585,14 +583,9 @@ impl OnDisk { }; match self.extract_envelope(envelope, services) { - Ok((key, managed_envelopes)) => { + Ok((_, managed_envelopes)) => { for managed_envelope in managed_envelopes { - sender - .send(UnspooledEnvelope { - key, - managed_envelope, - }) - .ok(); + sender.send(UnspooledEnvelope { managed_envelope }).ok(); } } Err(err) => relay_log::error!( @@ -1436,10 +1429,7 @@ mod tests { sender: tx.clone(), }); - let UnspooledEnvelope { - key: _, - managed_envelope, - } = rx.recv().await.unwrap(); + let UnspooledEnvelope { managed_envelope } = rx.recv().await.unwrap(); let received_at_received = managed_envelope.received_at(); // Check if the original start time elapsed to the same second as the restored one. diff --git a/relay-server/src/statsd.rs b/relay-server/src/statsd.rs index 94cd59f771..3e5ed3ec48 100644 --- a/relay-server/src/statsd.rs +++ b/relay-server/src/statsd.rs @@ -7,8 +7,6 @@ pub enum RelayGauges { /// The state of Relay with respect to the upstream connection. /// Possible values are `0` for normal operations and `1` for a network outage. NetworkOutage, - /// The number of items currently in the garbage disposal queue. - ProjectCacheGarbageQueueSize, /// The number of envelopes waiting for project states in memory. /// /// This number is always <= `EnvelopeQueueSize`. @@ -46,13 +44,14 @@ pub enum RelayGauges { /// The number of idle connections in the Redis Pool. #[cfg(feature = "processing")] RedisPoolIdleConnections, + /// The number of notifications in the broadcast channel of the project cache. + ProjectCacheNotificationChannel, } impl GaugeMetric for RelayGauges { fn name(&self) -> &'static str { match self { RelayGauges::NetworkOutage => "upstream.network_outage", - RelayGauges::ProjectCacheGarbageQueueSize => "project_cache.garbage.queue_size", RelayGauges::BufferEnvelopesMemoryCount => "buffer.envelopes_mem_count", RelayGauges::BufferEnvelopesDiskCount => "buffer.envelopes_disk_count", RelayGauges::BufferPeriodicUnspool => "buffer.unspool.periodic", @@ -64,6 +63,9 @@ impl GaugeMetric for RelayGauges { RelayGauges::RedisPoolConnections => "redis.pool.connections", #[cfg(feature = "processing")] RelayGauges::RedisPoolIdleConnections => "redis.pool.idle_connections", + RelayGauges::ProjectCacheNotificationChannel => { + "project_cache.notification_channel.size" + } } } } @@ -395,8 +397,6 @@ pub enum RelayTimers { /// Total time in milliseconds an envelope spends in Relay from the time it is received until it /// finishes processing and has been submitted to the upstream. EnvelopeTotalTime, - /// Total time in milliseconds spent evicting outdated and unused projects happens. - ProjectStateEvictionDuration, /// Total time in milliseconds spent fetching queued project configuration updates requests to /// resolve. /// @@ -487,25 +487,30 @@ pub enum RelayTimers { /// /// - `message`: The type of message that was processed. ProcessMessageDuration, - /// Timing in milliseconds for handling a project cache message. + /// Timing in milliseconds for processing a task in the project cache service. /// /// This metric is tagged with: - /// - `message`: The type of message that was processed. - ProjectCacheMessageDuration, - /// Timing in milliseconds for processing a message in the buffer service. + /// - `task`: The type of the task the project cache does. + ProjectCacheTaskDuration, + /// Timing in milliseconds for handling a legacy project cache message. /// /// This metric is tagged with: - /// /// - `message`: The type of message that was processed. - BufferMessageProcessDuration, - /// Timing in milliseconds for processing a task in the project cache service. + LegacyProjectCacheMessageDuration, + /// Timing in milliseconds for processing a task in the legacy project cache service. /// /// A task is a unit of work the service does. Each branch of the /// `tokio::select` is a different task type. /// /// This metric is tagged with: - /// - `task`: The type of the task the processor does. - ProjectCacheTaskDuration, + /// - `task`: The type of the task the project cache does. + LegacyProjectCacheTaskDuration, + /// Timing in milliseconds for processing a message in the buffer service. + /// + /// This metric is tagged with: + /// + /// - `message`: The type of message that was processed. + BufferMessageProcessDuration, /// Timing in milliseconds for handling and responding to a health check request. /// /// This metric is tagged with: @@ -572,7 +577,6 @@ impl TimerMetric for RelayTimers { RelayTimers::EnvelopeWaitTime => "event.wait_time", RelayTimers::EnvelopeProcessingTime => "event.processing_time", RelayTimers::EnvelopeTotalTime => "event.total_time", - RelayTimers::ProjectStateEvictionDuration => "project_state.eviction.duration", RelayTimers::ProjectStateRequestDuration => "project_state.request.duration", #[cfg(feature = "processing")] RelayTimers::ProjectStateDecompression => "project_state.decompression", @@ -585,9 +589,12 @@ impl TimerMetric for RelayTimers { RelayTimers::ReplayRecordingProcessing => "replay.recording.process", RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration", RelayTimers::ProcessMessageDuration => "processor.message.duration", - RelayTimers::ProjectCacheMessageDuration => "project_cache.message.duration", RelayTimers::BufferMessageProcessDuration => "buffer.message.duration", RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration", + RelayTimers::LegacyProjectCacheMessageDuration => { + "legacy_project_cache.message.duration" + } + RelayTimers::LegacyProjectCacheTaskDuration => "legacy_project_cache.task.duration", RelayTimers::HealthCheckDuration => "health.message.duration", #[cfg(feature = "processing")] RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets", @@ -683,15 +690,6 @@ pub enum RelayCounters { /// - `invalid`: Data was considered invalid and could not be recovered. The reason indicates /// the validation that failed. Outcomes, - /// Number of times a project state is looked up from the cache. - /// - /// This includes lookups for both cached and new projects. As part of this, updates for - /// outdated or expired project caches are triggered. - /// - /// Related metrics: - /// - `project_cache.hit`: For successful cache lookups, even for outdated projects. - /// - `project_cache.miss`: For failed lookups resulting in an update. - ProjectStateGet, /// Number of project state HTTP requests. /// /// Relay updates projects in batches. Every update cycle, Relay requests @@ -701,15 +699,6 @@ pub enum RelayCounters { /// Note that after an update loop has completed, there may be more projects pending updates. /// This is indicated by `project_state.pending`. ProjectStateRequest, - /// Number of times a project config was requested with `.no-cache`. - /// - /// This effectively counts the number of envelopes or events that have been sent with a - /// corresponding DSN. Actual queries to the upstream may still be deduplicated for these - /// project state requests. - /// - /// A maximum of 1 such requests per second is allowed per project key. This metric counts only - /// permitted requests. - ProjectStateNoCache, /// Number of times a project state is requested from the central Redis cache. /// /// This metric is tagged with: @@ -881,9 +870,7 @@ impl CounterMetric for RelayCounters { RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes", RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes", RelayCounters::Outcomes => "events.outcomes", - RelayCounters::ProjectStateGet => "project_state.get", RelayCounters::ProjectStateRequest => "project_state.request", - RelayCounters::ProjectStateNoCache => "project_state.no_cache", #[cfg(feature = "processing")] RelayCounters::ProjectStateRedis => "project_state.redis.requests", RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed", diff --git a/relay-server/src/testutils.rs b/relay-server/src/testutils.rs index 9f4c255d35..56e9742925 100644 --- a/relay-server/src/testutils.rs +++ b/relay-server/src/testutils.rs @@ -20,6 +20,7 @@ use crate::service::create_redis_pools; use crate::services::global_config::GlobalConfigHandle; use crate::services::outcome::TrackOutcome; use crate::services::processor::{self, EnvelopeProcessorService}; +use crate::services::projects::cache::ProjectCacheHandle; use crate::services::projects::project::ProjectInfo; use crate::services::test_store::TestStore; use crate::utils::{ThreadPool, ThreadPoolBuilder}; @@ -117,7 +118,6 @@ pub fn empty_envelope_with_dsn(dsn: &str) -> Box { pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); - let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); let (aggregator, _) = mock_service("aggregator", (), |&mut (), _| {}); let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); @@ -132,12 +132,12 @@ pub fn create_test_processor(config: Config) -> EnvelopeProcessorService { create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), + ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, processor::Addrs { outcome_aggregator, - project_cache, upstream_relay, test_store, #[cfg(feature = "processing")] @@ -162,6 +162,7 @@ pub fn create_test_processor_with_addrs( create_processor_pool(), Arc::clone(&config), GlobalConfigHandle::fixed(Default::default()), + ProjectCacheHandle::for_test(), Cogs::noop(), #[cfg(feature = "processing")] redis_pools, diff --git a/relay-server/src/utils/garbage.rs b/relay-server/src/utils/garbage.rs deleted file mode 100644 index 5b86cc80cd..0000000000 --- a/relay-server/src/utils/garbage.rs +++ /dev/null @@ -1,113 +0,0 @@ -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::{mpsc, Arc}; -use std::thread::JoinHandle; - -/// Garbage disposal agent. -/// -/// Spawns a background thread which drops items sent to it via [`GarbageDisposal::dispose`]. -#[derive(Debug)] -pub struct GarbageDisposal { - tx: mpsc::Sender, - queue_size: Arc, -} - -impl GarbageDisposal { - /// Returns a new instance plus a handle to join on the background thread. - /// Currently only used in tests. - fn new_joinable() -> (Self, JoinHandle<()>) { - let (tx, rx) = mpsc::channel(); - - let queue_size = Arc::new(AtomicUsize::new(0)); - let queue_size_clone = queue_size.clone(); - let join_handle = std::thread::spawn(move || { - relay_log::debug!("Start garbage collection thread"); - while let Ok(object) = rx.recv() { - queue_size_clone.fetch_sub(1, Ordering::Relaxed); // Wraps around on overflow - drop(object); - } - relay_log::debug!("Stop garbage collection thread"); - }); - - (Self { tx, queue_size }, join_handle) - } - - /// Spawns a new garbage disposal instance. - /// Every instance has its own background thread that received items to be dropped via - /// [`Self::dispose`]. - /// When the instance is dropped, the background thread stops automatically. - pub fn new() -> Self { - let (instance, _) = Self::new_joinable(); - instance - } - - /// Defers dropping an object by sending it to the background thread. - pub fn dispose(&self, object: U) - where - T: From, - { - self.queue_size.fetch_add(1, Ordering::Relaxed); - self.tx - .send(T::from(object)) - .map_err(|e| { - relay_log::error!("failed to send object to garbage disposal thread, drop here"); - drop(e.0); - }) - .ok(); - } - - /// Get current queue size. - pub fn queue_size(&self) -> usize { - self.queue_size.load(Ordering::Relaxed) - } -} - -impl Default for GarbageDisposal { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - use std::thread::ThreadId; - - use super::GarbageDisposal; - - struct SomeStruct { - thread_ids: Arc>>, - } - - impl Drop for SomeStruct { - fn drop(&mut self) { - self.thread_ids - .lock() - .unwrap() - .push(std::thread::current().id()) - } - } - - #[test] - fn test_garbage_disposal() { - let thread_ids = Arc::new(Mutex::new(Vec::::new())); - - let x1 = SomeStruct { - thread_ids: thread_ids.clone(), - }; - drop(x1); - - let x2 = SomeStruct { - thread_ids: thread_ids.clone(), - }; - - let (garbage, join_handle) = GarbageDisposal::::new_joinable(); - garbage.dispose(x2); - drop(garbage); // breaks the while loop by dropping rx - join_handle.join().ok(); // wait for thread to finish its work - - let thread_ids = thread_ids.lock().unwrap(); - assert_eq!(thread_ids.len(), 2); - assert_eq!(thread_ids[0], std::thread::current().id()); - assert!(thread_ids[0] != thread_ids[1]); - } -} diff --git a/relay-server/src/utils/mod.rs b/relay-server/src/utils/mod.rs index 8d12ee347e..9c428d3b5c 100644 --- a/relay-server/src/utils/mod.rs +++ b/relay-server/src/utils/mod.rs @@ -1,6 +1,5 @@ mod api; mod dynamic_sampling; -mod garbage; mod managed_envelope; mod multipart; mod param_parser; @@ -22,7 +21,6 @@ mod unreal; pub use self::api::*; pub use self::dynamic_sampling::*; -pub use self::garbage::*; pub use self::managed_envelope::*; pub use self::memory::*; pub use self::multipart::*; diff --git a/tests/integration/test_projectconfigs.py b/tests/integration/test_projectconfigs.py index 18f1f2b39d..c10e807cf8 100644 --- a/tests/integration/test_projectconfigs.py +++ b/tests/integration/test_projectconfigs.py @@ -12,6 +12,42 @@ RelayInfo = namedtuple("RelayInfo", ["id", "public_key", "secret_key", "internal"]) +def request_config( + relay, packed, signature, *, version="3", relay_id=None, headers=None +): + return relay.post( + f"/api/0/relays/projectconfigs/?version={version}", + data=packed, + headers={ + "X-Sentry-Relay-Id": relay_id or relay.relay_id, + "X-Sentry-Relay-Signature": signature, + } + | (headers or {}), + ) + + +def get_response(relay, packed, signature, *, version="3", relay_id=None, headers=None): + data = None + deadline = time.monotonic() + 15 + while time.monotonic() <= deadline: + response = request_config( + relay, + packed, + signature, + version=version, + relay_id=relay_id, + headers=headers, + ) + assert response.ok + data = response.json() + print(data) + if data["configs"] or data.get("global") or data.get("unchanged"): + return data, response + time.sleep(0.01) + + assert False, "Relay did still not receive a project config from minisentry" + + @pytest.mark.parametrize( "caller, projects", [ @@ -84,16 +120,9 @@ def test_dynamic_relays(mini_sentry, relay, caller, projects): packed, signature = caller.secret_key.pack(request) - resp = relay1.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={"X-Sentry-Relay-Id": caller.id, "X-Sentry-Relay-Signature": signature}, - ) - - assert resp.ok + data, _ = get_response(relay1, packed, signature, relay_id=caller.id) # test that it returns valid data - data = resp.json() for p in public_keys: assert data["configs"][p] is not None @@ -104,14 +133,7 @@ def test_invalid_json(mini_sentry, relay): body = {} # missing the required `publicKeys` field packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) + response = request_config(relay, packed, signature) assert response.status_code == 400 # Bad Request assert "JSON" in response.text @@ -121,7 +143,7 @@ def test_invalid_signature(mini_sentry, relay): relay = relay(mini_sentry) response = relay.post( - "/api/0/relays/projectconfigs/?version=2", + "/api/0/relays/projectconfigs/?version=3", data='{"publicKeys":[]}', headers={ "X-Sentry-Relay-Id": relay.relay_id, @@ -148,17 +170,9 @@ def test_broken_projectkey(mini_sentry, relay): } packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) + data, _ = get_response(relay, packed, signature) - assert response.ok - assert public_key in response.json()["configs"] + assert public_key in data["configs"] def test_pending_projects(mini_sentry, relay): @@ -172,63 +186,17 @@ def test_pending_projects(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - def request_config(): - return relay.post( - "/api/0/relays/projectconfigs/?version=3", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - response = request_config() - + response = request_config(relay, packed, signature) assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data["configs"] - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config() - assert response.ok - data = response.json() - if data["configs"]: - break - else: - assert False, "Relay did still not receive a project config from minisentry" + data, _ = get_response(relay, packed, signature) assert public_key in data["configs"] assert data.get("pending") is None -def request_config(relay, packed, signature, version: str): - return relay.post( - f"/api/0/relays/projectconfigs/?version={version}", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - -def get_response(relay, packed, signature, version="3"): - data = None - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - # send 1 r/s - time.sleep(1) - response = request_config(relay, packed, signature, version) - assert response.ok - data = response.json() - if data["configs"]: - break - else: - print("Relay did still not receive a project config from minisentry") - return data - - def test_unparsable_project_config(mini_sentry, relay): project_key = 42 relay_config = { @@ -287,7 +255,7 @@ def assert_clear_test_failures(): assert_clear_test_failures() # The state should be fixed and updated by now, since we keep re-trying to fetch new one all the time. - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] time.sleep(1) @@ -326,7 +294,7 @@ def test_cached_project_config(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -336,7 +304,7 @@ def test_cached_project_config(mini_sentry, relay): # Give it a bit time for update to go through. time.sleep(1) - data = get_response(relay, packed, signature) + data, _ = get_response(relay, packed, signature) assert data["configs"][public_key]["projectId"] == project_key assert not data["configs"][public_key]["disabled"] @@ -373,7 +341,7 @@ def test_get_global_config(mini_sentry, relay): body = {"publicKeys": [], "global": True} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - data = get_response(relay, packed, signature, version="3") + data, _ = get_response(relay, packed, signature, version="3") global_extraction_config = data["global"].pop("metricExtraction") assert "span_metrics_common" in global_extraction_config["groups"] @@ -389,19 +357,11 @@ def test_compression(mini_sentry, relay): body = {"publicKeys": [public_key]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - response = relay.post( - "/api/0/relays/projectconfigs/?version=2", - data=packed, - headers={ - "Accept-Encoding": "gzip", - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, + data, response = get_response( + relay, packed, signature, headers={"Accept-Encoding": "gzip"} ) - - assert response.ok assert response.headers["content-encoding"] == "gzip" - assert public_key in response.json()["configs"] + assert public_key in data["configs"] def test_unchanged_projects(mini_sentry, relay): @@ -413,32 +373,14 @@ def test_unchanged_projects(mini_sentry, relay): body = {"publicKeys": [public_key], "revisions": ["123"]} packed, signature = SecretKey.parse(relay.secret_key).pack(body) - def request_config(): - return relay.post( - "/api/0/relays/projectconfigs/?version=3", - data=packed, - headers={ - "X-Sentry-Relay-Id": relay.relay_id, - "X-Sentry-Relay-Signature": signature, - }, - ) - - response = request_config() + response = request_config(relay, packed, signature) assert response.ok data = response.json() assert public_key in data["pending"] assert public_key not in data.get("unchanged", []) - deadline = time.monotonic() + 15 - while time.monotonic() <= deadline: - response = request_config() - assert response.ok - data = response.json() - if data.get("unchanged"): - break - else: - assert False, "Relay did still not receive a project config from minisentry" + data, response = get_response(relay, packed, signature) assert public_key in data["unchanged"] assert public_key not in data["configs"] diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index a934319c2c..d7ab5cc2e3 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -16,7 +16,12 @@ def test_local_project_config(mini_sentry, relay): project_id = 42 config = mini_sentry.basic_project_config(project_id) relay_config = { - "cache": {"file_interval": 1, "project_expiry": 1, "project_grace_period": 0} + "cache": { + "file_interval": 1, + "project_expiry": 1, + "project_grace_period": 0, + "eviction_interval": 1, + } } relay = relay(mini_sentry, relay_config, wait_health_check=False) relay.config_dir.mkdir("projects").join("42.json").write( @@ -45,17 +50,14 @@ def test_local_project_config(mini_sentry, relay): assert event["logentry"] == {"formatted": "Hello, World!"} relay.config_dir.join("projects").join("42.json").write( - json.dumps({"disabled": True}) + json.dumps({"disabled": True, "publicKeys": config["publicKeys"]}) ) - time.sleep(2) - try: - # This may or may not respond with 403, depending on how quickly the future to fetch project - # states executes. - relay.send_event(project_id, dsn_key=dsn_key) - except HTTPError: - pass + time.sleep(2) + relay.send_event(project_id, dsn_key=dsn_key) + time.sleep(0.3) + pytest.raises(HTTPError, lambda: relay.send_event(project_id, dsn_key=dsn_key)) pytest.raises(queue.Empty, lambda: mini_sentry.captured_events.get(timeout=1)) @@ -80,6 +82,7 @@ def get_project_config(): "miss_expiry": 1, "project_expiry": 1, "project_grace_period": grace_period, + "eviction_interval": 1, } }, ) @@ -324,7 +327,8 @@ def get_project_config(): "cache": { "miss_expiry": 1, "project_expiry": 1, - "project_grace_period": 0, + "project_grace_period": 5, + "eviction_interval": 1, } }, ) @@ -343,8 +347,8 @@ def get_project_config(): relay.send_event(42) - assert project_config_fetch.wait(timeout=1) - assert with_rev.wait(timeout=1) + assert project_config_fetch.wait(timeout=2) + assert with_rev.wait(timeout=2) event = mini_sentry.captured_events.get(timeout=1).get_event() assert event["logentry"] == {"formatted": "Hello, World!"}